├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── args-wrap.cpp ├── args-wrap.hpp ├── args.hxx ├── bench.cpp ├── config.mk ├── cpuid.cpp ├── cpuid.hpp ├── cycle-timer.c ├── cycle-timer.h ├── cyclic-barrier.hpp ├── dummy.rebuild ├── fairlocks.cpp ├── fairlocks.hpp ├── fmt ├── include │ └── fmt │ │ ├── chrono.h │ │ ├── color.h │ │ ├── compile.h │ │ ├── core.h │ │ ├── format-inl.h │ │ ├── format.h │ │ ├── locale.h │ │ ├── os.h │ │ ├── ostream.h │ │ ├── posix.h │ │ ├── printf.h │ │ └── ranges.h └── src │ ├── format.cc │ └── os.cc ├── hedley.h ├── results ├── g1-16 │ ├── combined.csv │ ├── data_fast.csv │ └── data_slow.csv ├── g2-16 │ ├── combined.csv │ ├── data_fast.csv │ └── data_slow.csv ├── icl │ ├── combined.csv │ ├── data_fast.csv │ └── data_slow.csv └── skl │ ├── combined.csv │ ├── data_fast.csv │ └── data_slow.csv ├── scripts ├── all-plots.sh ├── data.sh ├── details.sh └── plot-bar.py ├── stats.hpp ├── table.cpp ├── table.hpp ├── tsc-support.cpp ├── tsc-support.hpp └── util.hpp /.gitignore: -------------------------------------------------------------------------------- 1 | # by default exclude anythign in jevents without an extension since it's 2 | # probably a binary file 3 | /jevents/** 4 | !/jevents/**/ 5 | !/jevents/**/*.* 6 | !/jevents/**/Makefile 7 | 8 | /.* 9 | !.gitignore 10 | *.o 11 | *.a 12 | *.log 13 | __pycache__ 14 | /tmp/* 15 | *.d 16 | /out* 17 | 18 | # exes 19 | /test 20 | /bench 21 | /voltmon 22 | 23 | perf.data 24 | perf.data.* 25 | 26 | /temp 27 | /local.mk 28 | 29 | /compile_commands.json 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | include config.mk 2 | 3 | # rebuild when makefile changes 4 | -include dummy.rebuild 5 | 6 | EXE := bench 7 | 8 | .PHONY: all clean 9 | 10 | CXX ?= g++ 11 | CC ?= gcc 12 | 13 | # make submakes use the specified compiler also 14 | export CXX 15 | export CC 16 | 17 | # any file that is only conditionally compiled goes here, 18 | # we filter it out from the wildcard below and then add 19 | # it back in using COND_SRC, which gets built up based 20 | # on various conditions 21 | CONDSRC_MASTER := tsc-support.cpp cpuid.cpp 22 | CONDSRC := 23 | 24 | ifneq ($(USE_RDTSC),0) 25 | CONDSRC += tsc-support.cpp cpuid.cpp 26 | endif 27 | 28 | DEFINES = -DUSE_RDTSC=$(USE_RDTSC) 29 | 30 | INCLUDES += -Ifmt/include 31 | 32 | ARCH_FLAGS := $(MARCH_ARG)=$(CPU_ARCH) 33 | 34 | COMMON_FLAGS := -MMD -Wall $(ARCH_FLAGS) -g $(O_LEVEL) $(INCLUDES) $(NDEBUG) 35 | 36 | CFLAGS += $(DEFINES) $(COMMON_FLAGS) 37 | CXXFLAGS += $(DEFINES) $(COMMON_FLAGS) -Wno-unused-variable 38 | 39 | SRC_FILES := $(wildcard *.cpp) $(wildcard *.c) fmt/src/format.cc 40 | SRC_FILES := $(filter-out $(CONDSRC_MASTER), $(SRC_FILES)) $(CONDSRC) 41 | 42 | # on most compilers we should use no-pie since the nasm stuff isn't position independent 43 | # but since old compilers don't support it, you can override it with PIE= on the command line 44 | PIE ?= -no-pie 45 | LDFLAGS += $(PIE) -lpthread 46 | 47 | EXTRA_DEPS := 48 | 49 | OBJECTS := $(SRC_FILES:.cpp=.o) $(ASM_FILES:.asm=.o) 50 | OBJECTS := $(OBJECTS:.cc=.o) 51 | OBJECTS := $(OBJECTS:.c=.o) 52 | DEPFILES = $(OBJECTS:.o=.d) 53 | # $(info OBJECTS=$(OBJECTS)) 54 | 55 | ########### 56 | # Targets # 57 | ########### 58 | 59 | all: bench 60 | 61 | -include $(DEPFILES) 62 | 63 | clean: 64 | find -name '*.o' -delete 65 | find -name '*.d' -delete 66 | rm -f $(EXE) 67 | 68 | $(EXE): $(OBJECTS) $(EXTRA_DEPS) 69 | $(CXX) $(OBJECTS) $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS) $(LDLIBS) -o $@ 70 | 71 | %.o : %.c 72 | $(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $< 73 | 74 | %.o : %.cpp 75 | $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $< 76 | 77 | 78 | %.o: %.asm 79 | $(ASM) $(ASM_FLAGS) -f elf64 $< 80 | 81 | LOCAL_MK = $(wildcard local.mk) 82 | 83 | ifndef MAKE_CLEAN_RECURSION 84 | # https://stackoverflow.com/a/3892826/149138 85 | dummy.rebuild: Makefile config.mk $(LOCAL_MK) 86 | touch $@ 87 | $(MAKE) -s clean MAKE_CLEAN_RECURSION=1 88 | endif 89 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This benchmark helps illustrates a hierarchy of concurrency costs, as described in [this blog post](https://travisdowns.github.io/blog/2020/07/06/concurrency-costs.html). 2 | 3 | It requires a Linux system to build, and has been tested on Ubuntu 19.04 and 20.04. It should work on Windows using Windows Subsystem for Linux (WSL), although I haven't tested it. 4 | 5 | ## Building 6 | 7 | make 8 | 9 | ## Running 10 | 11 | ./bench [options] 12 | 13 | The list of available options can be obtained by running `./bench --help`. Currently, they are: 14 | 15 | ~~~ 16 | ./bench {OPTIONS} 17 | 18 | conc-bench: Demonstrate concurrency perforamnce levels 19 | 20 | OPTIONS: 21 | 22 | --help Display this help menu 23 | --force-tsc-calibrate Force manual TSC calibration loop, even 24 | if cpuid TSC Hz is available 25 | --no-pin Don't try to pin threads to CPU - gives 26 | worse results but works around affinity 27 | issues on TravisCI 28 | --verbose Output more info 29 | --list List the available tests and their 30 | descriptions 31 | --csv Output a csv table instead of the 32 | default 33 | --progress Display progress to stdout 34 | --algos=[TEST-ID] Run only the algorithms in the comma 35 | separated list 36 | --batch=[BATCH-SIZE] Make BATCH-SIZE calls to the function 37 | under test in between checks for test 38 | termination 39 | --trial-time=[TIME-MS] The time for each trial in ms 40 | --min-threads=[MIN] The minimum number of threads to use 41 | --max-threads=[MAX] The maximum number of threads to use 42 | --warmup-ms=[MILLISECONDS] Warmup milliseconds for each thread 43 | after pinning (default 100) 44 | ~~~ 45 | 46 | ## Data Collection and Plotting 47 | 48 | You can examine the `scripts/data.sh` script to see how data was collected, and `scripts/all-plots.sh` (all the heavy lifting happens in `plot-bar.py`) to see how the data reshaping and plotting works. 49 | -------------------------------------------------------------------------------- /args-wrap.cpp: -------------------------------------------------------------------------------- 1 | #include "args-wrap.hpp" 2 | #include "args.hxx" 3 | 4 | namespace argsw { 5 | 6 | std::array empty_short; 7 | 8 | ArgumentParser::ArgumentParser(const std::string &description_, const std::string &epilog_) { 9 | delegate = new args::ArgumentParser(description_, epilog_); 10 | } 11 | 12 | ArgumentParser::~ArgumentParser() { 13 | delete delegate; 14 | } 15 | 16 | bool ArgumentParser::ParseCLI(const int argc, const char * const * argv, string_consumer* help, string_consumer* parse_error) { 17 | try { 18 | return delegate->ParseCLI(argc, argv); 19 | } catch (const args::Help& e) { 20 | if (help) { 21 | help(delegate->Help()); 22 | } else { 23 | throw; 24 | } 25 | } catch (const args::ParseError& e) { 26 | if (parse_error) { 27 | parse_error(e.what()); 28 | } else { 29 | throw; 30 | } 31 | } 32 | return false; 33 | } 34 | 35 | std::string ArgumentParser::Help() const { 36 | return delegate->Help(); 37 | } 38 | 39 | HelpFlag::HelpFlag(ArgumentParser& group, const std::string &name_, const std::string &help_, std::initializer_list matcher_) { 40 | delegate = new args::HelpFlag(*group.delegate, name_, help_, {empty_short, matcher_}); 41 | } 42 | 43 | HelpFlag::~HelpFlag() { 44 | delete delegate; 45 | } 46 | 47 | Flag::Flag(ArgumentParser& group, const std::string &name_, const std::string &help_, std::initializer_list matcher_) { 48 | base = delegate = new args::Flag(*group.delegate, name_, help_, {empty_short, matcher_}); 49 | } 50 | 51 | Flag::~Flag() { 52 | delete delegate; 53 | } 54 | 55 | template 56 | ValueFlag::ValueFlag(ArgumentParser& group, const std::string &name_, const std::string &help_, std::initializer_list matcher_, 57 | const T &defaultValue_) { 58 | base = delegate = new args::ValueFlag(*group.delegate, name_, help_, {empty_short, matcher_}, defaultValue_); 59 | } 60 | 61 | template 62 | ValueFlag::~ValueFlag() { 63 | delete delegate; 64 | } 65 | 66 | template 67 | T& ValueFlag::Get() noexcept { 68 | return delegate->Get(); 69 | } 70 | 71 | Base::operator bool() const noexcept 72 | { 73 | return base->Matched(); 74 | } 75 | 76 | #define VF_TYPES_X(fn) \ 77 | fn(int) \ 78 | fn(unsigned) \ 79 | fn(size_t) \ 80 | fn(double) \ 81 | fn(std::string) 82 | 83 | #define EXPLICIT_VF(type) template class ValueFlag; 84 | 85 | // explicitly instantiate the ValueFlag specializations 86 | VF_TYPES_X(EXPLICIT_VF); 87 | 88 | }; 89 | -------------------------------------------------------------------------------- /args-wrap.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace args { 4 | class ArgumentParser; 5 | class Group; 6 | class Matcher; 7 | class HelpFlag; 8 | class Flag; 9 | template 10 | class ValueFlag; 11 | struct ValueReader; 12 | class Base; 13 | } 14 | 15 | namespace argsw { 16 | 17 | class ArgumentParser; 18 | 19 | class Base { 20 | protected: 21 | args::Base* base; 22 | public: 23 | operator bool() const noexcept; 24 | }; 25 | 26 | class HelpFlag { 27 | args::HelpFlag* delegate; 28 | 29 | public: 30 | HelpFlag(ArgumentParser& group, const std::string &name_, const std::string &help_, std::initializer_list matcher_); 31 | ~HelpFlag(); 32 | }; 33 | 34 | class Flag : public Base { 35 | args::Flag* delegate; 36 | 37 | public: 38 | Flag(ArgumentParser& group, const std::string &name_, const std::string &help_, std::initializer_list matcher_); 39 | ~Flag(); 40 | }; 41 | 42 | template 43 | class ValueFlag : public Base { 44 | args::ValueFlag* delegate; 45 | 46 | public: 47 | ValueFlag(ArgumentParser& group, const std::string &name_, const std::string &help_, std::initializer_list matcher_, 48 | const T &defaultValue_ = T()); 49 | ~ValueFlag(); 50 | 51 | T &Get() noexcept; 52 | }; 53 | 54 | using string_consumer = void(const std::string&); 55 | 56 | class ArgumentParser { 57 | args::ArgumentParser* delegate; 58 | 59 | friend HelpFlag; 60 | friend Flag; 61 | template 62 | friend class ValueFlag; 63 | public: 64 | ArgumentParser(const std::string& description_, const std::string& epilog_ = std::string()); 65 | ~ArgumentParser(); 66 | 67 | bool ParseCLI(const int argc, const char * const * argv, string_consumer* help = nullptr, string_consumer* parse_error = nullptr); 68 | std::string Help() const; 69 | 70 | }; 71 | 72 | }; // namespace argsw -------------------------------------------------------------------------------- /config.mk: -------------------------------------------------------------------------------- 1 | -include local.mk 2 | 3 | # set DEBUG to 1 to enable various debugging checks 4 | DEBUG ?= 0 5 | CPP_STD ?= c++11 6 | C_STD ?= c11 7 | CPU_ARCH ?= native 8 | 9 | # $(info DEBUG=$(DEBUG)) 10 | 11 | ifeq ($(DEBUG),1) 12 | O_LEVEL ?= -O0 13 | NASM_DEBUG ?= 1 14 | NDEBUG= 15 | else 16 | O_LEVEL ?= -O2 17 | NASM_DEBUG ?= 0 18 | NDEBUG=-DNDEBUG 19 | endif 20 | 21 | ## detect the platform and use rdtsc only on x86 22 | UNAME_M := $(shell uname -m) 23 | 24 | ifeq ($(UNAME_M),x86_64) 25 | USE_RDTSC ?= 1 26 | MARCH_ARG=-march 27 | else 28 | USE_RDTSC ?= 0 29 | # non-x86 platforms seem to use mcpu rather than march, bleh 30 | MARCH_ARG=-mcpu 31 | endif 32 | 33 | $(info ARCH=$(UNAME_M) USE_RDTSC=$(USE_RDTSC)) 34 | -------------------------------------------------------------------------------- /cpuid.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * cpuid.cpp 3 | */ 4 | 5 | #include "cpuid.hpp" 6 | 7 | #include 8 | 9 | using std::uint8_t; 10 | using std::uint32_t; 11 | 12 | 13 | std::string cpuid_result::to_string() { 14 | std::string s; 15 | s += "eax = " + std::to_string(eax) + ", "; 16 | s += "ebx = " + std::to_string(ebx) + ", "; 17 | s += "ecx = " + std::to_string(ecx) + ", "; 18 | s += "edx = " + std::to_string(edx); 19 | return s; 20 | } 21 | 22 | uint32_t cpuid_highest_leaf_inner() { 23 | return cpuid(0).eax; 24 | } 25 | 26 | uint32_t cpuid_highest_leaf() { 27 | static uint32_t cached = cpuid_highest_leaf_inner(); 28 | return cached; 29 | } 30 | 31 | cpuid_result cpuid(int leaf, int subleaf) { 32 | cpuid_result ret = {}; 33 | asm ("cpuid" 34 | : 35 | "=a" (ret.eax), 36 | "=b" (ret.ebx), 37 | "=c" (ret.ecx), 38 | "=d" (ret.edx) 39 | : 40 | "a" (leaf), 41 | "c" (subleaf) 42 | ); 43 | return ret; 44 | } 45 | 46 | cpuid_result cpuid(int leaf) { 47 | return cpuid(leaf, 0); 48 | } 49 | 50 | family_model gfm_inner() { 51 | auto cpuid1 = cpuid(1); 52 | family_model ret; 53 | ret.family = (cpuid1.eax >> 8) & 0xF; 54 | ret.model = (cpuid1.eax >> 4) & 0xF; 55 | ret.stepping = (cpuid1.eax ) & 0xF; 56 | if (ret.family == 15) { 57 | ret.family += (cpuid1.eax >> 20) & 0xFF; // extended family 58 | } 59 | if (ret.family == 15 || ret.family == 6) { 60 | ret.model += ((cpuid1.eax >> 16) & 0xF) << 4; // extended model 61 | } 62 | return ret; 63 | } 64 | 65 | family_model get_family_model() { 66 | static family_model cached_family_model = gfm_inner(); 67 | return cached_family_model; 68 | } 69 | 70 | std::string get_brand_string() { 71 | auto check = cpuid(0x80000000); 72 | if (check.eax < 0x80000004) { 73 | return std::string("unkown (eax =") + std::to_string(check.eax) +")"; 74 | } 75 | std::string ret; 76 | for (uint32_t eax : {0x80000002, 0x80000003, 0x80000004}) { 77 | char buf[17]; 78 | auto fourchars = cpuid(eax); 79 | memcpy(buf + 0, &fourchars.eax, 4); 80 | memcpy(buf + 4, &fourchars.ebx, 4); 81 | memcpy(buf + 8, &fourchars.ecx, 4); 82 | memcpy(buf + 12, &fourchars.edx, 4); 83 | buf[16] = '\0'; 84 | ret += buf; 85 | } 86 | return ret; 87 | } 88 | 89 | /* get bits [start:end] inclusive of the given value */ 90 | uint32_t get_bits(uint32_t value, int start, int end) { 91 | value >>= start; 92 | uint32_t mask = ((uint64_t)-1) << (end - start + 1); 93 | return value & ~mask; 94 | } 95 | 96 | /** 97 | * Get the shift amount for unique physical core IDs 98 | */ 99 | int get_smt_shift() 100 | { 101 | if (cpuid_highest_leaf() < 0xb) { 102 | return -1; 103 | } 104 | uint32_t smtShift = -1u; 105 | for (uint32_t subleaf = 0; ; subleaf++) { 106 | cpuid_result leafb = cpuid(0xb, subleaf); 107 | uint32_t type = get_bits(leafb.ecx, 8 ,15); 108 | if (!get_bits(leafb.ebx,0,15) || type == 0) { 109 | // done 110 | break; 111 | } 112 | if (type == 1) { 113 | // here's the value we are after: make sure we don't have more than one entry for 114 | // this type though! 115 | if (smtShift != -1u) { 116 | fprintf(stderr, "Warning: more than one level of type 1 in the x2APIC hierarchy"); 117 | } 118 | smtShift = get_bits(leafb.eax, 0, 4); 119 | } 120 | } 121 | return smtShift; 122 | } 123 | 124 | -------------------------------------------------------------------------------- /cpuid.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * cpuid.hpp 3 | */ 4 | 5 | #ifndef CPUID_HPP_ 6 | #define CPUID_HPP_ 7 | 8 | #include 9 | #include 10 | 11 | struct cpuid_result { 12 | std::uint32_t eax, ebx, ecx, edx; 13 | std::string to_string(); 14 | }; 15 | 16 | struct family_model { 17 | uint8_t family; 18 | uint8_t model; 19 | uint8_t stepping; 20 | std::string to_string() { 21 | std::string s; 22 | s += "family = " + std::to_string(family) + ", "; 23 | s += "model = " + std::to_string(model) + ", "; 24 | s += "stepping = " + std::to_string(stepping); 25 | return s; 26 | } 27 | }; 28 | 29 | 30 | /** the highest supported leaf value */ 31 | uint32_t cpuid_highest_leaf(); 32 | 33 | /* return the CPUID result for querying the given leaf (EAX) and no subleaf (ECX=0) */ 34 | cpuid_result cpuid(int leaf); 35 | 36 | /* return the CPUID result for querying the given leaf (EAX) and subleaf (ECX) */ 37 | cpuid_result cpuid(int leaf, int subleaf); 38 | 39 | family_model get_family_model(); 40 | 41 | std::string get_brand_string(); 42 | 43 | int get_smt_shift(); 44 | 45 | /* get bits [start:end] inclusive of the given value */ 46 | uint32_t get_bits(uint32_t value, int start, int end); 47 | 48 | #endif /* CPUID_HPP_ */ 49 | -------------------------------------------------------------------------------- /cycle-timer.c: -------------------------------------------------------------------------------- 1 | /* 2 | * cycle-timer.c 3 | * 4 | * Implementation for cycle-timer.h 5 | */ 6 | 7 | #include "cycle-timer.h" 8 | #include "hedley.h" 9 | 10 | #include 11 | #include 12 | 13 | #include 14 | 15 | 16 | const size_t ITERS = 10000; 17 | const size_t TRIES = 11; 18 | const size_t WARMUP = 1000; 19 | 20 | volatile size_t sink; 21 | 22 | /** 23 | * Calibration loop that relies on store throughput being exactly 1 per cycle 24 | * on all modern x86 chips, and the loop overhead running totally in parallel. 25 | */ 26 | HEDLEY_NEVER_INLINE 27 | __attribute__((aligned(32))) 28 | void store_calibration(size_t iters) { 29 | do { 30 | sink = iters; 31 | } while (--iters > 0); 32 | } 33 | 34 | int intcompare(const void *l_, const void *r_) { 35 | int64_t l = *(const uint64_t *)l_; 36 | int64_t r = *(const uint64_t *)r_; 37 | return (l > r) - (l < r); 38 | } 39 | 40 | /* 41 | * Calculate the frequency of the CPU based on timing a tight loop that we expect to 42 | * take one iteration per cycle. 43 | * 44 | * ITERS is the base number of iterations to use: the calibration routine is actually 45 | * run twice, once with ITERS iterations and once with 2*ITERS, and a delta is used to 46 | * remove measurement overhead. 47 | */ 48 | HEDLEY_NEVER_INLINE 49 | static double get_ghz(bool print) { 50 | 51 | const char *force = getenv("CYCLE_TIMER_FORCE_MHZ"); 52 | if (force) { 53 | int mhz = atoi(force); 54 | if (mhz) { 55 | double ghz = mhz / 1000.; 56 | if (print) fprintf(stderr, "Forced CPU speed (CYCLE_TIMER_FORCE_MHZ): %5.2f GHz\n", ghz); 57 | return ghz; 58 | } else { 59 | if (print) fprintf(stderr, "Bad value for CYCLE_TIMER_FORCE_MHZ: '%s' (falling back to cal loop)\n", force); 60 | } 61 | } 62 | 63 | int64_t results[TRIES]; 64 | 65 | for (size_t w = 0; w < WARMUP + 1; w++) { 66 | for (size_t r = 0; r < TRIES; r++) { 67 | cl_timepoint t0 = cl_now(); 68 | store_calibration(ITERS); 69 | cl_timepoint t1 = cl_now(); 70 | store_calibration(ITERS * 2); 71 | cl_timepoint t2 = cl_now(); 72 | results[r] = cl_delta(t1, t2).nanos - cl_delta(t0, t1).nanos; 73 | } 74 | } 75 | 76 | // return the median value 77 | qsort(results, TRIES, sizeof(results[0]), intcompare); 78 | double ghz = ((double)ITERS / results[TRIES/2]); 79 | if (print) fprintf(stderr, "Estimated CPU speed: %5.2f GHz\n", ghz); 80 | return ghz; 81 | } 82 | 83 | static bool is_init = false; 84 | double ghz; 85 | 86 | void cl_init(bool print) { 87 | if (HEDLEY_UNLIKELY(!is_init)) { 88 | ghz = get_ghz(print); 89 | is_init = true; 90 | } 91 | }; 92 | 93 | cl_timepoint cl_now() { 94 | struct timespec spec; 95 | if (clock_gettime(CLOCK_MONOTONIC, &spec)) { 96 | return (cl_timepoint){0}; 97 | } else { 98 | return (cl_timepoint){spec.tv_sec * 1000000000ll + spec.tv_nsec}; 99 | } 100 | } 101 | 102 | /* 103 | * Take an interval value and convert it to cycles based on the 104 | * detected frequency of this host. 105 | */ 106 | double cl_to_cycles(cl_interval interval) { 107 | cl_init(false); 108 | return interval.nanos * ghz; 109 | } 110 | 111 | /* 112 | * Take an interval value and "convert" it to nanos. 113 | */ 114 | double cl_to_nanos(cl_interval interval) { 115 | return interval.nanos; 116 | } 117 | -------------------------------------------------------------------------------- /cycle-timer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * cycle-timer.h 3 | * 4 | * A timer that returns results in CPU cycles in addition to nanoseconds. 5 | * It measures cycles indirectly by measuring the wall-time, and then converting 6 | * that to a cycle count based on a calibration loop performed once at startup. 7 | */ 8 | 9 | #ifndef CYCLE_TIMER_H_ 10 | #define CYCLE_TIMER_H_ 11 | 12 | #ifdef __cplusplus 13 | extern "C" { 14 | #endif 15 | 16 | #include 17 | #include 18 | 19 | /** 20 | * A point in time, or an interval when subtracted. You should probably 21 | * treat this as an opaque struct, in case I change the implementation 22 | * someday. 23 | */ 24 | struct cl_timepoint_ { 25 | int64_t nanos; 26 | }; 27 | typedef struct cl_timepoint_ cl_timepoint; 28 | 29 | /** 30 | * An interval created by subtracting two points in time, measured 31 | * in nanoseconds. 32 | */ 33 | struct cl_interval_ { 34 | int64_t nanos; 35 | }; 36 | typedef struct cl_interval_ cl_interval; 37 | 38 | /* return the current moment in time as a cycletimer_result */ 39 | cl_timepoint cl_now(); 40 | 41 | /* 42 | * Return the interval between timepoints first and second. 43 | * This value is positive iff second occurs after first. 44 | */ 45 | static inline cl_interval cl_delta(cl_timepoint first, cl_timepoint second) { 46 | return (cl_interval){second.nanos - first.nanos}; 47 | } 48 | 49 | /* 50 | * Take an interval value and convert it to cycles based on the 51 | * detected frequency of this host. 52 | */ 53 | double cl_to_cycles(cl_interval interval); 54 | 55 | double cl_to_nanos(cl_interval interval); 56 | 57 | /* 58 | * Initialize the cycletimer infrastructure. Mostly this just means calculating 59 | * the cycle to nanoseconds value (i.e., the CPU frequency). You never *need* to 60 | * use this function, if you haven't call it, it will happens automatically when 61 | * init is necessary (usually lazily - when accessing the cl_to_cycles), 62 | * but may be lengthy, so this method is offered so that the user can trigger 63 | * it at a time of their choosing (and allowing the user to elect whether to 64 | * print out diagnostic information about the calibration). 65 | * 66 | * If you pass true for print, diagnostic information like the detected CPU 67 | * frequency is printed to stderr. 68 | */ 69 | void cl_init(bool print); 70 | 71 | #ifdef __cplusplus 72 | } 73 | #endif 74 | 75 | #endif /* CYCLE_TIMER_HPP_ */ 76 | -------------------------------------------------------------------------------- /cyclic-barrier.hpp: -------------------------------------------------------------------------------- 1 | #ifndef CYCLIC_BARRIER_H_ 2 | #define CYCLIC_BARRIER_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | /** 9 | * Auto-reset spin-based barrier. 10 | * 11 | * Waits for N threads to arrive, then releases those N threads and resets itself, again 12 | * waiting for N threads to arrive. 13 | */ 14 | template 15 | struct cyclic_barrier_template { 16 | using int32_t = std::int32_t; 17 | int32_t break_count; 18 | std::atomic current; 19 | 20 | std::function breaker_function; 21 | T return_value; 22 | std::mutex lock; 23 | 24 | template 25 | cyclic_barrier_template(uint32_t count, F f) : break_count(count), current{0}, breaker_function{f} {} 26 | 27 | /* increment and hot spin on the waiter count until it hits the break point, returns the spin count in case you care */ 28 | T wait() { 29 | for (size_t count = 0; ; count++) { 30 | 31 | auto waiters = current.load(); 32 | 33 | if (waiters < 0) { 34 | // while waiters < 0, there are draining earlier waiters, so we wait 35 | // for them to leave 36 | continue; 37 | } 38 | assert(waiters < break_count); 39 | 40 | // two remaining cases: we are not the breaking waiter, in which case we increment and wait ... 41 | if (waiters < break_count - 1) { 42 | if (current.compare_exchange_strong(waiters, waiters + 1)) { 43 | // printf("> tid %zu is waiting (w: %u)\n", (size_t)gettid(), waiters); 44 | // we successfully started our wait 45 | auto original = waiters; 46 | while ((waiters = current.load()) >= 0) { 47 | count++; 48 | assert(waiters >= original); // waiters can only go up, until it goes negative 49 | } 50 | auto ret = return_value; 51 | current++; 52 | return ret; 53 | } 54 | } else { 55 | // ... or else we are (potentially) the breaking waiter, in which case we flip the sign of waiters 56 | // which unblocks the other waiters 57 | // std::lock_guard guard(lock); 58 | waiters = current.load(); 59 | if (waiters == break_count - 1) { 60 | auto ret = return_value = breaker_function(); 61 | current.store(-waiters); 62 | // printf("> tid %zu is breaking (w: %d)\n", (size_t)gettid(), waiters); 63 | return ret; 64 | } 65 | 66 | } 67 | } 68 | } 69 | }; 70 | 71 | struct cyclic_barrier : public cyclic_barrier_template { 72 | cyclic_barrier(uint32_t count) : cyclic_barrier_template{count, [](){ return 0; }} {} 73 | }; 74 | 75 | #endif // guard -------------------------------------------------------------------------------- /dummy.rebuild: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/travisdowns/concurrency-hierarchy-bench/1b3f7f8ceeafd0285ee8c813dd5304f84ad437c8/dummy.rebuild -------------------------------------------------------------------------------- /fairlocks.cpp: -------------------------------------------------------------------------------- 1 | #include "fairlocks.hpp" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #ifdef __x86_64__ 8 | #include 9 | inline void cpu_relax() { _mm_pause(); } 10 | #elif defined(__aarch64__) 11 | inline void cpu_relax() { asm volatile ("yield"); } 12 | #else 13 | inline void cpu_relax() {} 14 | #endif 15 | 16 | using namespace locks; 17 | 18 | void spin_base::unlock() { 19 | islocked.store(false, std::memory_order_release); 20 | } 21 | 22 | template 23 | static inline void spin_impl(std::atomic& is_locked, S spinner) { 24 | do { 25 | while (is_locked.load(std::memory_order_relaxed)) { 26 | spinner(); 27 | } 28 | } while (is_locked.exchange(true, std::memory_order_acquire)); 29 | } 30 | 31 | void spinlock_hot::lock() { spin_impl(islocked, []{}); } 32 | 33 | void spinlock_pause::lock() { spin_impl(islocked, cpu_relax); } 34 | 35 | void spinlock_yield::lock() { spin_impl(islocked, []{ sched_yield(); }); } 36 | 37 | void blocking_ticket::lock() { 38 | auto ticket = dispenser.fetch_add(1, std::memory_order_relaxed); 39 | 40 | if (ticket == serving.load(std::memory_order_acquire)) 41 | return; 42 | 43 | std::unique_lock lock(mutex); 44 | while (ticket != serving.load(std::memory_order_acquire)) { 45 | cvar.wait(lock); 46 | } 47 | } 48 | 49 | void blocking_ticket::unlock() { 50 | std::unique_lock lock(mutex); 51 | auto s = serving.load(std::memory_order_relaxed) + 1; 52 | serving.store(s, std::memory_order_release); 53 | auto d = dispenser.load(std::memory_order_relaxed); 54 | assert(s <= d); 55 | if (s < d) { 56 | // wake waiters 57 | cvar.notify_all(); 58 | } 59 | } 60 | 61 | struct fifo_queued::queue_elem { 62 | std::condition_variable cvar; 63 | bool owner = false; 64 | }; 65 | 66 | void fifo_queued::lock() { 67 | std::unique_lock guard(mutex); 68 | if (!locked) { 69 | locked = true; 70 | return; 71 | } 72 | 73 | queue_elem node; 74 | cvar_queue.push_back(&node); 75 | 76 | do { 77 | node.cvar.wait(guard); 78 | } while (!node.owner); 79 | 80 | assert(locked && cvar_queue.front() == &node); 81 | cvar_queue.pop_front(); 82 | } 83 | 84 | void fifo_queued::unlock() { 85 | std::unique_lock guard(mutex); 86 | if (cvar_queue.empty()) { 87 | locked = false; 88 | } else { 89 | auto& next = cvar_queue.front(); 90 | next->owner = true; 91 | next->cvar.notify_one(); 92 | } 93 | } 94 | 95 | int cmpxchg(int& var, int old, int desired) { 96 | return __sync_val_compare_and_swap(&var, old, desired); 97 | } 98 | 99 | int xchg(int& var, int val) { 100 | return __atomic_exchange_n(&var, val, __ATOMIC_ACQUIRE); 101 | } 102 | 103 | int atomic_dec(int& var) { 104 | return __sync_fetch_and_sub(&var, 1); 105 | } 106 | 107 | /** 108 | * The futex related calls are cribbed from: 109 | * // https://github.com/eliben/code-for-blog/blob/master/2018/futex-basics/futex-basic-process.c 110 | */ 111 | int futex(int* uaddr, int futex_op, int val, const struct timespec* timeout, 112 | int* uaddr2, int val3) { 113 | return syscall(SYS_futex, uaddr, futex_op, val, timeout, uaddr2, val3); 114 | } 115 | 116 | 117 | void futex_wait(int* futex_addr, int val) { 118 | int ret = futex(futex_addr, FUTEX_WAIT, val, NULL, NULL, 0); 119 | (void)ret; 120 | assert(ret == 0 || (ret == -1 && errno == EAGAIN)); 121 | } 122 | 123 | void futex_wake(int* futex_addr, int nwait) { 124 | int futex_rc = futex(futex_addr, FUTEX_WAKE, nwait, NULL, NULL, 0); 125 | if (futex_rc == -1) { 126 | perror("futex wake"); 127 | exit(1); 128 | } 129 | } 130 | 131 | void mutex3::lock() { 132 | int c; 133 | if ((c = cmpxchg(val, 0, 1)) != 0) { 134 | if (c != 2) { 135 | c = xchg(val, 2); 136 | } 137 | while (c != 0) { 138 | futex_wait(&val, 2); 139 | c = xchg(val, 2); 140 | } 141 | } 142 | } 143 | 144 | void mutex3::unlock() { 145 | if (atomic_dec(val) != 1) { 146 | // printf("%d unlock wake\n", tid); 147 | val = 0; 148 | futex_wake(&val, 1); 149 | } else { 150 | // printf("%d unlock fast\n", tid); 151 | } 152 | } -------------------------------------------------------------------------------- /fairlocks.hpp: -------------------------------------------------------------------------------- 1 | #ifndef FAIRLOCKS_HPP_ 2 | #define FAIRLOCKS_HPP_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | namespace locks { 14 | 15 | class spin_base { 16 | protected: 17 | std::atomic islocked{}; 18 | public: 19 | void unlock(); 20 | }; 21 | 22 | struct spinlock_hot : spin_base { 23 | void lock(); 24 | }; 25 | 26 | struct spinlock_pause : spin_base { 27 | void lock(); 28 | }; 29 | 30 | struct spinlock_yield : spin_base { 31 | void lock(); 32 | }; 33 | 34 | using spin_f = int(); 35 | 36 | template 37 | class ticket_template { 38 | std::atomic dispenser{}, serving{}; 39 | 40 | public: 41 | void lock() { 42 | auto ticket = dispenser.fetch_add(1, std::memory_order_relaxed); 43 | 44 | while (ticket != serving.load(std::memory_order_acquire)) 45 | SPINF(); 46 | } 47 | 48 | void unlock() { 49 | serving.store(serving.load() + 1, std::memory_order_release); 50 | } 51 | }; 52 | 53 | static int nop() { 54 | return 0; 55 | } 56 | 57 | using ticket_spin = ticket_template; 58 | using ticket_yield = ticket_template; 59 | 60 | class blocking_ticket { 61 | std::atomic dispenser{}, serving{}; 62 | std::mutex mutex; 63 | std::condition_variable cvar; 64 | 65 | public: 66 | void lock(); 67 | 68 | void unlock(); 69 | }; 70 | 71 | class fifo_queued { 72 | struct queue_elem; 73 | 74 | std::mutex mutex; 75 | std::deque cvar_queue; 76 | bool locked = false; 77 | 78 | public: 79 | void lock(); 80 | 81 | void unlock(); 82 | }; 83 | 84 | /** 85 | * mutex3 from "Futexes Are Tricky" 86 | * https://akkadia.org/drepper/futex.pdf 87 | */ 88 | class mutex3 { 89 | public: 90 | mutex3() : val(0) {} 91 | 92 | void lock(); 93 | 94 | void unlock(); 95 | private: 96 | int val; 97 | }; 98 | 99 | } // namespace locks 100 | 101 | #endif -------------------------------------------------------------------------------- /fmt/include/fmt/compile.h: -------------------------------------------------------------------------------- 1 | // Formatting library for C++ - experimental format string compilation 2 | // 3 | // Copyright (c) 2012 - present, Victor Zverovich and fmt contributors 4 | // All rights reserved. 5 | // 6 | // For the license information refer to format.h. 7 | 8 | #ifndef FMT_COMPILE_H_ 9 | #define FMT_COMPILE_H_ 10 | 11 | #include 12 | 13 | #include "format.h" 14 | 15 | FMT_BEGIN_NAMESPACE 16 | namespace internal { 17 | 18 | // Part of a compiled format string. It can be either literal text or a 19 | // replacement field. 20 | template struct format_part { 21 | enum class kind { arg_index, arg_name, text, replacement }; 22 | 23 | struct replacement { 24 | arg_ref arg_id; 25 | dynamic_format_specs specs; 26 | }; 27 | 28 | kind part_kind; 29 | union value { 30 | int arg_index; 31 | basic_string_view str; 32 | replacement repl; 33 | 34 | FMT_CONSTEXPR value(int index = 0) : arg_index(index) {} 35 | FMT_CONSTEXPR value(basic_string_view s) : str(s) {} 36 | FMT_CONSTEXPR value(replacement r) : repl(r) {} 37 | } val; 38 | // Position past the end of the argument id. 39 | const Char* arg_id_end = nullptr; 40 | 41 | FMT_CONSTEXPR format_part(kind k = kind::arg_index, value v = {}) 42 | : part_kind(k), val(v) {} 43 | 44 | static FMT_CONSTEXPR format_part make_arg_index(int index) { 45 | return format_part(kind::arg_index, index); 46 | } 47 | static FMT_CONSTEXPR format_part make_arg_name(basic_string_view name) { 48 | return format_part(kind::arg_name, name); 49 | } 50 | static FMT_CONSTEXPR format_part make_text(basic_string_view text) { 51 | return format_part(kind::text, text); 52 | } 53 | static FMT_CONSTEXPR format_part make_replacement(replacement repl) { 54 | return format_part(kind::replacement, repl); 55 | } 56 | }; 57 | 58 | template struct part_counter { 59 | unsigned num_parts = 0; 60 | 61 | FMT_CONSTEXPR void on_text(const Char* begin, const Char* end) { 62 | if (begin != end) ++num_parts; 63 | } 64 | 65 | FMT_CONSTEXPR void on_arg_id() { ++num_parts; } 66 | FMT_CONSTEXPR void on_arg_id(int) { ++num_parts; } 67 | FMT_CONSTEXPR void on_arg_id(basic_string_view) { ++num_parts; } 68 | 69 | FMT_CONSTEXPR void on_replacement_field(const Char*) {} 70 | 71 | FMT_CONSTEXPR const Char* on_format_specs(const Char* begin, 72 | const Char* end) { 73 | // Find the matching brace. 74 | unsigned brace_counter = 0; 75 | for (; begin != end; ++begin) { 76 | if (*begin == '{') { 77 | ++brace_counter; 78 | } else if (*begin == '}') { 79 | if (brace_counter == 0u) break; 80 | --brace_counter; 81 | } 82 | } 83 | return begin; 84 | } 85 | 86 | FMT_CONSTEXPR void on_error(const char*) {} 87 | }; 88 | 89 | // Counts the number of parts in a format string. 90 | template 91 | FMT_CONSTEXPR unsigned count_parts(basic_string_view format_str) { 92 | part_counter counter; 93 | parse_format_string(format_str, counter); 94 | return counter.num_parts; 95 | } 96 | 97 | template 98 | class format_string_compiler : public error_handler { 99 | private: 100 | using part = format_part; 101 | 102 | PartHandler handler_; 103 | part part_; 104 | basic_string_view format_str_; 105 | basic_format_parse_context parse_context_; 106 | 107 | public: 108 | FMT_CONSTEXPR format_string_compiler(basic_string_view format_str, 109 | PartHandler handler) 110 | : handler_(handler), 111 | format_str_(format_str), 112 | parse_context_(format_str) {} 113 | 114 | FMT_CONSTEXPR void on_text(const Char* begin, const Char* end) { 115 | if (begin != end) 116 | handler_(part::make_text({begin, to_unsigned(end - begin)})); 117 | } 118 | 119 | FMT_CONSTEXPR void on_arg_id() { 120 | part_ = part::make_arg_index(parse_context_.next_arg_id()); 121 | } 122 | 123 | FMT_CONSTEXPR void on_arg_id(int id) { 124 | parse_context_.check_arg_id(id); 125 | part_ = part::make_arg_index(id); 126 | } 127 | 128 | FMT_CONSTEXPR void on_arg_id(basic_string_view id) { 129 | part_ = part::make_arg_name(id); 130 | } 131 | 132 | FMT_CONSTEXPR void on_replacement_field(const Char* ptr) { 133 | part_.arg_id_end = ptr; 134 | handler_(part_); 135 | } 136 | 137 | FMT_CONSTEXPR const Char* on_format_specs(const Char* begin, 138 | const Char* end) { 139 | auto repl = typename part::replacement(); 140 | dynamic_specs_handler> handler( 141 | repl.specs, parse_context_); 142 | auto it = parse_format_specs(begin, end, handler); 143 | if (*it != '}') on_error("missing '}' in format string"); 144 | repl.arg_id = part_.part_kind == part::kind::arg_index 145 | ? arg_ref(part_.val.arg_index) 146 | : arg_ref(part_.val.str); 147 | auto part = part::make_replacement(repl); 148 | part.arg_id_end = begin; 149 | handler_(part); 150 | return it; 151 | } 152 | }; 153 | 154 | // Compiles a format string and invokes handler(part) for each parsed part. 155 | template 156 | FMT_CONSTEXPR void compile_format_string(basic_string_view format_str, 157 | PartHandler handler) { 158 | parse_format_string( 159 | format_str, 160 | format_string_compiler(format_str, handler)); 161 | } 162 | 163 | template 164 | void format_arg( 165 | basic_format_parse_context& parse_ctx, 166 | Context& ctx, Id arg_id) { 167 | ctx.advance_to( 168 | visit_format_arg(arg_formatter(ctx, &parse_ctx), ctx.arg(arg_id))); 169 | } 170 | 171 | // vformat_to is defined in a subnamespace to prevent ADL. 172 | namespace cf { 173 | template 174 | auto vformat_to(Range out, CompiledFormat& cf, basic_format_args args) 175 | -> typename Context::iterator { 176 | using char_type = typename Context::char_type; 177 | basic_format_parse_context parse_ctx( 178 | to_string_view(cf.format_str_)); 179 | Context ctx(out.begin(), args); 180 | 181 | const auto& parts = cf.parts(); 182 | for (auto part_it = std::begin(parts); part_it != std::end(parts); 183 | ++part_it) { 184 | const auto& part = *part_it; 185 | const auto& value = part.val; 186 | 187 | using format_part_t = format_part; 188 | switch (part.part_kind) { 189 | case format_part_t::kind::text: { 190 | const auto text = value.str; 191 | auto output = ctx.out(); 192 | auto&& it = reserve(output, text.size()); 193 | it = std::copy_n(text.begin(), text.size(), it); 194 | ctx.advance_to(output); 195 | break; 196 | } 197 | 198 | case format_part_t::kind::arg_index: 199 | advance_to(parse_ctx, part.arg_id_end); 200 | internal::format_arg(parse_ctx, ctx, value.arg_index); 201 | break; 202 | 203 | case format_part_t::kind::arg_name: 204 | advance_to(parse_ctx, part.arg_id_end); 205 | internal::format_arg(parse_ctx, ctx, value.str); 206 | break; 207 | 208 | case format_part_t::kind::replacement: { 209 | const auto& arg_id_value = value.repl.arg_id.val; 210 | const auto arg = value.repl.arg_id.kind == arg_id_kind::index 211 | ? ctx.arg(arg_id_value.index) 212 | : ctx.arg(arg_id_value.name); 213 | 214 | auto specs = value.repl.specs; 215 | 216 | handle_dynamic_spec(specs.width, specs.width_ref, ctx); 217 | handle_dynamic_spec(specs.precision, 218 | specs.precision_ref, ctx); 219 | 220 | error_handler h; 221 | numeric_specs_checker checker(h, arg.type()); 222 | if (specs.align == align::numeric) checker.require_numeric_argument(); 223 | if (specs.sign != sign::none) checker.check_sign(); 224 | if (specs.alt) checker.require_numeric_argument(); 225 | if (specs.precision >= 0) checker.check_precision(); 226 | 227 | advance_to(parse_ctx, part.arg_id_end); 228 | ctx.advance_to( 229 | visit_format_arg(arg_formatter(ctx, nullptr, &specs), arg)); 230 | break; 231 | } 232 | } 233 | } 234 | return ctx.out(); 235 | } 236 | } // namespace cf 237 | 238 | struct basic_compiled_format {}; 239 | 240 | template 241 | struct compiled_format_base : basic_compiled_format { 242 | using char_type = char_t; 243 | using parts_container = std::vector>; 244 | 245 | parts_container compiled_parts; 246 | 247 | explicit compiled_format_base(basic_string_view format_str) { 248 | compile_format_string(format_str, 249 | [this](const format_part& part) { 250 | compiled_parts.push_back(part); 251 | }); 252 | } 253 | 254 | const parts_container& parts() const { return compiled_parts; } 255 | }; 256 | 257 | template struct format_part_array { 258 | format_part data[N] = {}; 259 | FMT_CONSTEXPR format_part_array() = default; 260 | }; 261 | 262 | template 263 | FMT_CONSTEXPR format_part_array compile_to_parts( 264 | basic_string_view format_str) { 265 | format_part_array parts; 266 | unsigned counter = 0; 267 | // This is not a lambda for compatibility with older compilers. 268 | struct { 269 | format_part* parts; 270 | unsigned* counter; 271 | FMT_CONSTEXPR void operator()(const format_part& part) { 272 | parts[(*counter)++] = part; 273 | } 274 | } collector{parts.data, &counter}; 275 | compile_format_string(format_str, collector); 276 | if (counter < N) { 277 | parts.data[counter] = 278 | format_part::make_text(basic_string_view()); 279 | } 280 | return parts; 281 | } 282 | 283 | template constexpr const T& constexpr_max(const T& a, const T& b) { 284 | return (a < b) ? b : a; 285 | } 286 | 287 | template 288 | struct compiled_format_base::value>> 289 | : basic_compiled_format { 290 | using char_type = char_t; 291 | 292 | FMT_CONSTEXPR explicit compiled_format_base(basic_string_view) {} 293 | 294 | // Workaround for old compilers. Format string compilation will not be 295 | // performed there anyway. 296 | #if FMT_USE_CONSTEXPR 297 | static FMT_CONSTEXPR_DECL const unsigned num_format_parts = 298 | constexpr_max(count_parts(to_string_view(S())), 1u); 299 | #else 300 | static const unsigned num_format_parts = 1; 301 | #endif 302 | 303 | using parts_container = format_part[num_format_parts]; 304 | 305 | const parts_container& parts() const { 306 | static FMT_CONSTEXPR_DECL const auto compiled_parts = 307 | compile_to_parts( 308 | internal::to_string_view(S())); 309 | return compiled_parts.data; 310 | } 311 | }; 312 | 313 | template 314 | class compiled_format : private compiled_format_base { 315 | public: 316 | using typename compiled_format_base::char_type; 317 | 318 | private: 319 | basic_string_view format_str_; 320 | 321 | template 322 | friend auto cf::vformat_to(Range out, CompiledFormat& cf, 323 | basic_format_args args) -> 324 | typename Context::iterator; 325 | 326 | public: 327 | compiled_format() = delete; 328 | explicit constexpr compiled_format(basic_string_view format_str) 329 | : compiled_format_base(format_str), format_str_(format_str) {} 330 | }; 331 | 332 | #ifdef __cpp_if_constexpr 333 | template struct type_list {}; 334 | 335 | // Returns a reference to the argument at index N from [first, rest...]. 336 | template 337 | constexpr const auto& get(const T& first, const Args&... rest) { 338 | static_assert(N < 1 + sizeof...(Args), "index is out of bounds"); 339 | if constexpr (N == 0) 340 | return first; 341 | else 342 | return get(rest...); 343 | } 344 | 345 | template struct get_type_impl; 346 | 347 | template struct get_type_impl> { 348 | using type = remove_cvref_t(std::declval()...))>; 349 | }; 350 | 351 | template 352 | using get_type = typename get_type_impl::type; 353 | 354 | template struct is_compiled_format : std::false_type {}; 355 | 356 | template struct text { 357 | basic_string_view data; 358 | using char_type = Char; 359 | 360 | template 361 | OutputIt format(OutputIt out, const Args&...) const { 362 | // TODO: reserve 363 | return copy_str(data.begin(), data.end(), out); 364 | } 365 | }; 366 | 367 | template 368 | struct is_compiled_format> : std::true_type {}; 369 | 370 | template 371 | constexpr text make_text(basic_string_view s, size_t pos, 372 | size_t size) { 373 | return {{&s[pos], size}}; 374 | } 375 | 376 | template , int> = 0> 378 | OutputIt format_default(OutputIt out, T value) { 379 | // TODO: reserve 380 | format_int fi(value); 381 | return std::copy(fi.data(), fi.data() + fi.size(), out); 382 | } 383 | 384 | template 385 | OutputIt format_default(OutputIt out, double value) { 386 | writer w(out); 387 | w.write(value); 388 | return w.out(); 389 | } 390 | 391 | template 392 | OutputIt format_default(OutputIt out, Char value) { 393 | *out++ = value; 394 | return out; 395 | } 396 | 397 | template 398 | OutputIt format_default(OutputIt out, const Char* value) { 399 | auto length = std::char_traits::length(value); 400 | return copy_str(value, value + length, out); 401 | } 402 | 403 | // A replacement field that refers to argument N. 404 | template struct field { 405 | using char_type = Char; 406 | 407 | template 408 | OutputIt format(OutputIt out, const Args&... args) const { 409 | // This ensures that the argument type is convertile to `const T&`. 410 | const T& arg = get(args...); 411 | return format_default(out, arg); 412 | } 413 | }; 414 | 415 | template 416 | struct is_compiled_format> : std::true_type {}; 417 | 418 | template struct concat { 419 | L lhs; 420 | R rhs; 421 | using char_type = typename L::char_type; 422 | 423 | template 424 | OutputIt format(OutputIt out, const Args&... args) const { 425 | out = lhs.format(out, args...); 426 | return rhs.format(out, args...); 427 | } 428 | }; 429 | 430 | template 431 | struct is_compiled_format> : std::true_type {}; 432 | 433 | template 434 | constexpr concat make_concat(L lhs, R rhs) { 435 | return {lhs, rhs}; 436 | } 437 | 438 | struct unknown_format {}; 439 | 440 | template 441 | constexpr size_t parse_text(basic_string_view str, size_t pos) { 442 | for (size_t size = str.size(); pos != size; ++pos) { 443 | if (str[pos] == '{' || str[pos] == '}') break; 444 | } 445 | return pos; 446 | } 447 | 448 | template 449 | constexpr auto compile_format_string(S format_str); 450 | 451 | template 452 | constexpr auto parse_tail(T head, S format_str) { 453 | if constexpr (POS != to_string_view(format_str).size()) { 454 | constexpr auto tail = compile_format_string(format_str); 455 | if constexpr (std::is_same, 456 | unknown_format>()) 457 | return tail; 458 | else 459 | return make_concat(head, tail); 460 | } else { 461 | return head; 462 | } 463 | } 464 | 465 | // Compiles a non-empty format string and returns the compiled representation 466 | // or unknown_format() on unrecognized input. 467 | template 468 | constexpr auto compile_format_string(S format_str) { 469 | using char_type = typename S::char_type; 470 | constexpr basic_string_view str = format_str; 471 | if constexpr (str[POS] == '{') { 472 | if (POS + 1 == str.size()) 473 | throw format_error("unmatched '{' in format string"); 474 | if constexpr (str[POS + 1] == '{') { 475 | return parse_tail(make_text(str, POS, 1), format_str); 476 | } else if constexpr (str[POS + 1] == '}') { 477 | using type = get_type; 478 | if constexpr (std::is_same::value) { 479 | return parse_tail(field(), 480 | format_str); 481 | } else { 482 | return unknown_format(); 483 | } 484 | } else { 485 | return unknown_format(); 486 | } 487 | } else if constexpr (str[POS] == '}') { 488 | if (POS + 1 == str.size()) 489 | throw format_error("unmatched '}' in format string"); 490 | return parse_tail(make_text(str, POS, 1), format_str); 491 | } else { 492 | constexpr auto end = parse_text(str, POS + 1); 493 | return parse_tail(make_text(str, POS, end - POS), 494 | format_str); 495 | } 496 | } 497 | #endif // __cpp_if_constexpr 498 | } // namespace internal 499 | 500 | #if FMT_USE_CONSTEXPR 501 | # ifdef __cpp_if_constexpr 502 | template ::value)> 504 | constexpr auto compile(S format_str) { 505 | constexpr basic_string_view str = format_str; 506 | if constexpr (str.size() == 0) { 507 | return internal::make_text(str, 0, 0); 508 | } else { 509 | constexpr auto result = 510 | internal::compile_format_string, 0, 0>( 511 | format_str); 512 | if constexpr (std::is_same, 513 | internal::unknown_format>()) { 514 | return internal::compiled_format(to_string_view(format_str)); 515 | } else { 516 | return result; 517 | } 518 | } 519 | } 520 | 521 | template ::value)> 524 | std::basic_string format(const CompiledFormat& cf, const Args&... args) { 525 | basic_memory_buffer buffer; 526 | cf.format(std::back_inserter(buffer), args...); 527 | return to_string(buffer); 528 | } 529 | 530 | template ::value)> 532 | OutputIt format_to(OutputIt out, const CompiledFormat& cf, 533 | const Args&... args) { 534 | return cf.format(out, args...); 535 | } 536 | # else 537 | template ::value)> 539 | constexpr auto compile(S format_str) -> internal::compiled_format { 540 | return internal::compiled_format(to_string_view(format_str)); 541 | } 542 | # endif // __cpp_if_constexpr 543 | #endif // FMT_USE_CONSTEXPR 544 | 545 | // Compiles the format string which must be a string literal. 546 | template 547 | auto compile(const Char (&format_str)[N]) 548 | -> internal::compiled_format { 549 | return internal::compiled_format( 550 | basic_string_view(format_str, N - 1)); 551 | } 552 | 553 | template ::value)> 557 | std::basic_string format(const CompiledFormat& cf, const Args&... args) { 558 | basic_memory_buffer buffer; 559 | using range = buffer_range; 560 | using context = buffer_context; 561 | internal::cf::vformat_to(range(buffer), cf, 562 | make_format_args(args...)); 563 | return to_string(buffer); 564 | } 565 | 566 | template ::value)> 569 | OutputIt format_to(OutputIt out, const CompiledFormat& cf, 570 | const Args&... args) { 571 | using char_type = typename CompiledFormat::char_type; 572 | using range = internal::output_range; 573 | using context = format_context_t; 574 | return internal::cf::vformat_to(range(out), cf, 575 | make_format_args(args...)); 576 | } 577 | 578 | template ::value)> 580 | format_to_n_result format_to_n(OutputIt out, size_t n, 581 | const CompiledFormat& cf, 582 | const Args&... args) { 583 | auto it = 584 | format_to(internal::truncating_iterator(out, n), cf, args...); 585 | return {it.base(), it.count()}; 586 | } 587 | 588 | template 589 | std::size_t formatted_size(const CompiledFormat& cf, const Args&... args) { 590 | return format_to(internal::counting_iterator(), cf, args...).count(); 591 | } 592 | 593 | FMT_END_NAMESPACE 594 | 595 | #endif // FMT_COMPILE_H_ 596 | -------------------------------------------------------------------------------- /fmt/include/fmt/locale.h: -------------------------------------------------------------------------------- 1 | // Formatting library for C++ - std::locale support 2 | // 3 | // Copyright (c) 2012 - present, Victor Zverovich 4 | // All rights reserved. 5 | // 6 | // For the license information refer to format.h. 7 | 8 | #ifndef FMT_LOCALE_H_ 9 | #define FMT_LOCALE_H_ 10 | 11 | #include 12 | 13 | #include "format.h" 14 | 15 | FMT_BEGIN_NAMESPACE 16 | 17 | namespace internal { 18 | template 19 | typename buffer_context::iterator vformat_to( 20 | const std::locale& loc, buffer& buf, 21 | basic_string_view format_str, 22 | basic_format_args>> args) { 23 | using range = buffer_range; 24 | return vformat_to>(buf, to_string_view(format_str), args, 25 | internal::locale_ref(loc)); 26 | } 27 | 28 | template 29 | std::basic_string vformat( 30 | const std::locale& loc, basic_string_view format_str, 31 | basic_format_args>> args) { 32 | basic_memory_buffer buffer; 33 | internal::vformat_to(loc, buffer, format_str, args); 34 | return fmt::to_string(buffer); 35 | } 36 | } // namespace internal 37 | 38 | template > 39 | inline std::basic_string vformat( 40 | const std::locale& loc, const S& format_str, 41 | basic_format_args>> args) { 42 | return internal::vformat(loc, to_string_view(format_str), args); 43 | } 44 | 45 | template > 46 | inline std::basic_string format(const std::locale& loc, 47 | const S& format_str, Args&&... args) { 48 | return internal::vformat( 49 | loc, to_string_view(format_str), 50 | internal::make_args_checked(format_str, args...)); 51 | } 52 | 53 | template ::value, char_t>> 56 | inline OutputIt vformat_to( 57 | OutputIt out, const std::locale& loc, const S& format_str, 58 | format_args_t, Char> args) { 59 | using range = internal::output_range; 60 | return vformat_to>( 61 | range(out), to_string_view(format_str), args, internal::locale_ref(loc)); 62 | } 63 | 64 | template ::value&& 66 | internal::is_string::value)> 67 | inline OutputIt format_to(OutputIt out, const std::locale& loc, 68 | const S& format_str, Args&&... args) { 69 | internal::check_format_string(format_str); 70 | using context = format_context_t>; 71 | format_arg_store as{args...}; 72 | return vformat_to(out, loc, to_string_view(format_str), 73 | basic_format_args(as)); 74 | } 75 | 76 | FMT_END_NAMESPACE 77 | 78 | #endif // FMT_LOCALE_H_ 79 | -------------------------------------------------------------------------------- /fmt/include/fmt/os.h: -------------------------------------------------------------------------------- 1 | // Formatting library for C++ - optional OS-specific functionality 2 | // 3 | // Copyright (c) 2012 - present, Victor Zverovich 4 | // All rights reserved. 5 | // 6 | // For the license information refer to format.h. 7 | 8 | #ifndef FMT_OS_H_ 9 | #define FMT_OS_H_ 10 | 11 | #if defined(__MINGW32__) || defined(__CYGWIN__) 12 | // Workaround MinGW bug https://sourceforge.net/p/mingw/bugs/2024/. 13 | # undef __STRICT_ANSI__ 14 | #endif 15 | 16 | #include 17 | #include // for locale_t 18 | #include 19 | #include // for strtod_l 20 | 21 | #include 22 | 23 | #if defined __APPLE__ || defined(__FreeBSD__) 24 | # include // for LC_NUMERIC_MASK on OS X 25 | #endif 26 | 27 | #include "format.h" 28 | 29 | // UWP doesn't provide _pipe. 30 | #if FMT_HAS_INCLUDE("winapifamily.h") 31 | # include 32 | #endif 33 | #if FMT_HAS_INCLUDE("fcntl.h") && \ 34 | (!defined(WINAPI_FAMILY) || (WINAPI_FAMILY == WINAPI_FAMILY_DESKTOP_APP)) 35 | # include // for O_RDONLY 36 | # define FMT_USE_FCNTL 1 37 | #else 38 | # define FMT_USE_FCNTL 0 39 | #endif 40 | 41 | #ifndef FMT_POSIX 42 | # if defined(_WIN32) && !defined(__MINGW32__) 43 | // Fix warnings about deprecated symbols. 44 | # define FMT_POSIX(call) _##call 45 | # else 46 | # define FMT_POSIX(call) call 47 | # endif 48 | #endif 49 | 50 | // Calls to system functions are wrapped in FMT_SYSTEM for testability. 51 | #ifdef FMT_SYSTEM 52 | # define FMT_POSIX_CALL(call) FMT_SYSTEM(call) 53 | #else 54 | # define FMT_SYSTEM(call) call 55 | # ifdef _WIN32 56 | // Fix warnings about deprecated symbols. 57 | # define FMT_POSIX_CALL(call) ::_##call 58 | # else 59 | # define FMT_POSIX_CALL(call) ::call 60 | # endif 61 | #endif 62 | 63 | // Retries the expression while it evaluates to error_result and errno 64 | // equals to EINTR. 65 | #ifndef _WIN32 66 | # define FMT_RETRY_VAL(result, expression, error_result) \ 67 | do { \ 68 | (result) = (expression); \ 69 | } while ((result) == (error_result) && errno == EINTR) 70 | #else 71 | # define FMT_RETRY_VAL(result, expression, error_result) result = (expression) 72 | #endif 73 | 74 | #define FMT_RETRY(result, expression) FMT_RETRY_VAL(result, expression, -1) 75 | 76 | FMT_BEGIN_NAMESPACE 77 | 78 | /** 79 | \rst 80 | A reference to a null-terminated string. It can be constructed from a C 81 | string or ``std::string``. 82 | 83 | You can use one of the following type aliases for common character types: 84 | 85 | +---------------+-----------------------------+ 86 | | Type | Definition | 87 | +===============+=============================+ 88 | | cstring_view | basic_cstring_view | 89 | +---------------+-----------------------------+ 90 | | wcstring_view | basic_cstring_view | 91 | +---------------+-----------------------------+ 92 | 93 | This class is most useful as a parameter type to allow passing 94 | different types of strings to a function, for example:: 95 | 96 | template 97 | std::string format(cstring_view format_str, const Args & ... args); 98 | 99 | format("{}", 42); 100 | format(std::string("{}"), 42); 101 | \endrst 102 | */ 103 | template class basic_cstring_view { 104 | private: 105 | const Char* data_; 106 | 107 | public: 108 | /** Constructs a string reference object from a C string. */ 109 | basic_cstring_view(const Char* s) : data_(s) {} 110 | 111 | /** 112 | \rst 113 | Constructs a string reference from an ``std::string`` object. 114 | \endrst 115 | */ 116 | basic_cstring_view(const std::basic_string& s) : data_(s.c_str()) {} 117 | 118 | /** Returns the pointer to a C string. */ 119 | const Char* c_str() const { return data_; } 120 | }; 121 | 122 | using cstring_view = basic_cstring_view; 123 | using wcstring_view = basic_cstring_view; 124 | 125 | // An error code. 126 | class error_code { 127 | private: 128 | int value_; 129 | 130 | public: 131 | explicit error_code(int value = 0) FMT_NOEXCEPT : value_(value) {} 132 | 133 | int get() const FMT_NOEXCEPT { return value_; } 134 | }; 135 | 136 | #ifdef _WIN32 137 | namespace internal { 138 | // A converter from UTF-16 to UTF-8. 139 | // It is only provided for Windows since other systems support UTF-8 natively. 140 | class utf16_to_utf8 { 141 | private: 142 | memory_buffer buffer_; 143 | 144 | public: 145 | utf16_to_utf8() {} 146 | FMT_API explicit utf16_to_utf8(wstring_view s); 147 | operator string_view() const { return string_view(&buffer_[0], size()); } 148 | size_t size() const { return buffer_.size() - 1; } 149 | const char* c_str() const { return &buffer_[0]; } 150 | std::string str() const { return std::string(&buffer_[0], size()); } 151 | 152 | // Performs conversion returning a system error code instead of 153 | // throwing exception on conversion error. This method may still throw 154 | // in case of memory allocation error. 155 | FMT_API int convert(wstring_view s); 156 | }; 157 | 158 | FMT_API void format_windows_error(buffer& out, int error_code, 159 | string_view message) FMT_NOEXCEPT; 160 | } // namespace internal 161 | 162 | /** A Windows error. */ 163 | class windows_error : public system_error { 164 | private: 165 | FMT_API void init(int error_code, string_view format_str, format_args args); 166 | 167 | public: 168 | /** 169 | \rst 170 | Constructs a :class:`fmt::windows_error` object with the description 171 | of the form 172 | 173 | .. parsed-literal:: 174 | **: ** 175 | 176 | where ** is the formatted message and ** is the 177 | system message corresponding to the error code. 178 | *error_code* is a Windows error code as given by ``GetLastError``. 179 | If *error_code* is not a valid error code such as -1, the system message 180 | will look like "error -1". 181 | 182 | **Example**:: 183 | 184 | // This throws a windows_error with the description 185 | // cannot open file 'madeup': The system cannot find the file specified. 186 | // or similar (system message may vary). 187 | const char *filename = "madeup"; 188 | LPOFSTRUCT of = LPOFSTRUCT(); 189 | HFILE file = OpenFile(filename, &of, OF_READ); 190 | if (file == HFILE_ERROR) { 191 | throw fmt::windows_error(GetLastError(), 192 | "cannot open file '{}'", filename); 193 | } 194 | \endrst 195 | */ 196 | template 197 | windows_error(int error_code, string_view message, const Args&... args) { 198 | init(error_code, message, make_format_args(args...)); 199 | } 200 | }; 201 | 202 | // Reports a Windows error without throwing an exception. 203 | // Can be used to report errors from destructors. 204 | FMT_API void report_windows_error(int error_code, 205 | string_view message) FMT_NOEXCEPT; 206 | #endif // _WIN32 207 | 208 | // A buffered file. 209 | class buffered_file { 210 | private: 211 | FILE* file_; 212 | 213 | friend class file; 214 | 215 | explicit buffered_file(FILE* f) : file_(f) {} 216 | 217 | public: 218 | buffered_file(const buffered_file&) = delete; 219 | void operator=(const buffered_file&) = delete; 220 | 221 | // Constructs a buffered_file object which doesn't represent any file. 222 | buffered_file() FMT_NOEXCEPT : file_(nullptr) {} 223 | 224 | // Destroys the object closing the file it represents if any. 225 | FMT_API ~buffered_file() FMT_NOEXCEPT; 226 | 227 | public: 228 | buffered_file(buffered_file&& other) FMT_NOEXCEPT : file_(other.file_) { 229 | other.file_ = nullptr; 230 | } 231 | 232 | buffered_file& operator=(buffered_file&& other) { 233 | close(); 234 | file_ = other.file_; 235 | other.file_ = nullptr; 236 | return *this; 237 | } 238 | 239 | // Opens a file. 240 | FMT_API buffered_file(cstring_view filename, cstring_view mode); 241 | 242 | // Closes the file. 243 | FMT_API void close(); 244 | 245 | // Returns the pointer to a FILE object representing this file. 246 | FILE* get() const FMT_NOEXCEPT { return file_; } 247 | 248 | // We place parentheses around fileno to workaround a bug in some versions 249 | // of MinGW that define fileno as a macro. 250 | FMT_API int(fileno)() const; 251 | 252 | void vprint(string_view format_str, format_args args) { 253 | fmt::vprint(file_, format_str, args); 254 | } 255 | 256 | template 257 | inline void print(string_view format_str, const Args&... args) { 258 | vprint(format_str, make_format_args(args...)); 259 | } 260 | }; 261 | 262 | #if FMT_USE_FCNTL 263 | // A file. Closed file is represented by a file object with descriptor -1. 264 | // Methods that are not declared with FMT_NOEXCEPT may throw 265 | // fmt::system_error in case of failure. Note that some errors such as 266 | // closing the file multiple times will cause a crash on Windows rather 267 | // than an exception. You can get standard behavior by overriding the 268 | // invalid parameter handler with _set_invalid_parameter_handler. 269 | class file { 270 | private: 271 | int fd_; // File descriptor. 272 | 273 | // Constructs a file object with a given descriptor. 274 | explicit file(int fd) : fd_(fd) {} 275 | 276 | public: 277 | // Possible values for the oflag argument to the constructor. 278 | enum { 279 | RDONLY = FMT_POSIX(O_RDONLY), // Open for reading only. 280 | WRONLY = FMT_POSIX(O_WRONLY), // Open for writing only. 281 | RDWR = FMT_POSIX(O_RDWR) // Open for reading and writing. 282 | }; 283 | 284 | // Constructs a file object which doesn't represent any file. 285 | file() FMT_NOEXCEPT : fd_(-1) {} 286 | 287 | // Opens a file and constructs a file object representing this file. 288 | FMT_API file(cstring_view path, int oflag); 289 | 290 | public: 291 | file(const file&) = delete; 292 | void operator=(const file&) = delete; 293 | 294 | file(file&& other) FMT_NOEXCEPT : fd_(other.fd_) { other.fd_ = -1; } 295 | 296 | file& operator=(file&& other) FMT_NOEXCEPT { 297 | close(); 298 | fd_ = other.fd_; 299 | other.fd_ = -1; 300 | return *this; 301 | } 302 | 303 | // Destroys the object closing the file it represents if any. 304 | FMT_API ~file() FMT_NOEXCEPT; 305 | 306 | // Returns the file descriptor. 307 | int descriptor() const FMT_NOEXCEPT { return fd_; } 308 | 309 | // Closes the file. 310 | FMT_API void close(); 311 | 312 | // Returns the file size. The size has signed type for consistency with 313 | // stat::st_size. 314 | FMT_API long long size() const; 315 | 316 | // Attempts to read count bytes from the file into the specified buffer. 317 | FMT_API std::size_t read(void* buffer, std::size_t count); 318 | 319 | // Attempts to write count bytes from the specified buffer to the file. 320 | FMT_API std::size_t write(const void* buffer, std::size_t count); 321 | 322 | // Duplicates a file descriptor with the dup function and returns 323 | // the duplicate as a file object. 324 | FMT_API static file dup(int fd); 325 | 326 | // Makes fd be the copy of this file descriptor, closing fd first if 327 | // necessary. 328 | FMT_API void dup2(int fd); 329 | 330 | // Makes fd be the copy of this file descriptor, closing fd first if 331 | // necessary. 332 | FMT_API void dup2(int fd, error_code& ec) FMT_NOEXCEPT; 333 | 334 | // Creates a pipe setting up read_end and write_end file objects for reading 335 | // and writing respectively. 336 | FMT_API static void pipe(file& read_end, file& write_end); 337 | 338 | // Creates a buffered_file object associated with this file and detaches 339 | // this file object from the file. 340 | FMT_API buffered_file fdopen(const char* mode); 341 | }; 342 | 343 | // Returns the memory page size. 344 | long getpagesize(); 345 | #endif // FMT_USE_FCNTL 346 | 347 | #ifdef FMT_LOCALE 348 | // A "C" numeric locale. 349 | class Locale { 350 | private: 351 | # ifdef _WIN32 352 | using locale_t = _locale_t; 353 | 354 | enum { LC_NUMERIC_MASK = LC_NUMERIC }; 355 | 356 | static locale_t newlocale(int category_mask, const char* locale, locale_t) { 357 | return _create_locale(category_mask, locale); 358 | } 359 | 360 | static void freelocale(locale_t locale) { _free_locale(locale); } 361 | 362 | static double strtod_l(const char* nptr, char** endptr, _locale_t locale) { 363 | return _strtod_l(nptr, endptr, locale); 364 | } 365 | # endif 366 | 367 | locale_t locale_; 368 | 369 | public: 370 | using type = locale_t; 371 | Locale(const Locale&) = delete; 372 | void operator=(const Locale&) = delete; 373 | 374 | Locale() : locale_(newlocale(LC_NUMERIC_MASK, "C", nullptr)) { 375 | if (!locale_) FMT_THROW(system_error(errno, "cannot create locale")); 376 | } 377 | ~Locale() { freelocale(locale_); } 378 | 379 | type get() const { return locale_; } 380 | 381 | // Converts string to floating-point number and advances str past the end 382 | // of the parsed input. 383 | double strtod(const char*& str) const { 384 | char* end = nullptr; 385 | double result = strtod_l(str, &end, locale_); 386 | str = end; 387 | return result; 388 | } 389 | }; 390 | #endif // FMT_LOCALE 391 | FMT_END_NAMESPACE 392 | 393 | #endif // FMT_OS_H_ 394 | -------------------------------------------------------------------------------- /fmt/include/fmt/ostream.h: -------------------------------------------------------------------------------- 1 | // Formatting library for C++ - std::ostream support 2 | // 3 | // Copyright (c) 2012 - present, Victor Zverovich 4 | // All rights reserved. 5 | // 6 | // For the license information refer to format.h. 7 | 8 | #ifndef FMT_OSTREAM_H_ 9 | #define FMT_OSTREAM_H_ 10 | 11 | #include 12 | #include "format.h" 13 | 14 | FMT_BEGIN_NAMESPACE 15 | namespace internal { 16 | 17 | template class formatbuf : public std::basic_streambuf { 18 | private: 19 | using int_type = typename std::basic_streambuf::int_type; 20 | using traits_type = typename std::basic_streambuf::traits_type; 21 | 22 | buffer& buffer_; 23 | 24 | public: 25 | formatbuf(buffer& buf) : buffer_(buf) {} 26 | 27 | protected: 28 | // The put-area is actually always empty. This makes the implementation 29 | // simpler and has the advantage that the streambuf and the buffer are always 30 | // in sync and sputc never writes into uninitialized memory. The obvious 31 | // disadvantage is that each call to sputc always results in a (virtual) call 32 | // to overflow. There is no disadvantage here for sputn since this always 33 | // results in a call to xsputn. 34 | 35 | int_type overflow(int_type ch = traits_type::eof()) FMT_OVERRIDE { 36 | if (!traits_type::eq_int_type(ch, traits_type::eof())) 37 | buffer_.push_back(static_cast(ch)); 38 | return ch; 39 | } 40 | 41 | std::streamsize xsputn(const Char* s, std::streamsize count) FMT_OVERRIDE { 42 | buffer_.append(s, s + count); 43 | return count; 44 | } 45 | }; 46 | 47 | template struct test_stream : std::basic_ostream { 48 | private: 49 | // Hide all operator<< from std::basic_ostream. 50 | void_t<> operator<<(null<>); 51 | void_t<> operator<<(const Char*); 52 | 53 | template ::value && 54 | !std::is_enum::value)> 55 | void_t<> operator<<(T); 56 | }; 57 | 58 | // Checks if T has a user-defined operator<< (e.g. not a member of 59 | // std::ostream). 60 | template class is_streamable { 61 | private: 62 | template 63 | static bool_constant&>() 64 | << std::declval()), 65 | void_t<>>::value> 66 | test(int); 67 | 68 | template static std::false_type test(...); 69 | 70 | using result = decltype(test(0)); 71 | 72 | public: 73 | static const bool value = result::value; 74 | }; 75 | 76 | // Write the content of buf to os. 77 | template 78 | void write(std::basic_ostream& os, buffer& buf) { 79 | const Char* buf_data = buf.data(); 80 | using unsigned_streamsize = std::make_unsigned::type; 81 | unsigned_streamsize size = buf.size(); 82 | unsigned_streamsize max_size = to_unsigned(max_value()); 83 | do { 84 | unsigned_streamsize n = size <= max_size ? size : max_size; 85 | os.write(buf_data, static_cast(n)); 86 | buf_data += n; 87 | size -= n; 88 | } while (size != 0); 89 | } 90 | 91 | template 92 | void format_value(buffer& buf, const T& value, 93 | locale_ref loc = locale_ref()) { 94 | formatbuf format_buf(buf); 95 | std::basic_ostream output(&format_buf); 96 | #if !defined(FMT_STATIC_THOUSANDS_SEPARATOR) 97 | if (loc) output.imbue(loc.get()); 98 | #endif 99 | output.exceptions(std::ios_base::failbit | std::ios_base::badbit); 100 | output << value; 101 | buf.resize(buf.size()); 102 | } 103 | 104 | // Formats an object of type T that has an overloaded ostream operator<<. 105 | template 106 | struct fallback_formatter::value>> 107 | : formatter, Char> { 108 | template 109 | auto format(const T& value, Context& ctx) -> decltype(ctx.out()) { 110 | basic_memory_buffer buffer; 111 | format_value(buffer, value, ctx.locale()); 112 | basic_string_view str(buffer.data(), buffer.size()); 113 | return formatter, Char>::format(str, ctx); 114 | } 115 | }; 116 | } // namespace internal 117 | 118 | template 119 | void vprint(std::basic_ostream& os, basic_string_view format_str, 120 | basic_format_args>> args) { 121 | basic_memory_buffer buffer; 122 | internal::vformat_to(buffer, format_str, args); 123 | internal::write(os, buffer); 124 | } 125 | 126 | /** 127 | \rst 128 | Prints formatted data to the stream *os*. 129 | 130 | **Example**:: 131 | 132 | fmt::print(cerr, "Don't {}!", "panic"); 133 | \endrst 134 | */ 135 | template ::value, char_t>> 137 | void print(std::basic_ostream& os, const S& format_str, Args&&... args) { 138 | vprint(os, to_string_view(format_str), 139 | internal::make_args_checked(format_str, args...)); 140 | } 141 | FMT_END_NAMESPACE 142 | 143 | #endif // FMT_OSTREAM_H_ 144 | -------------------------------------------------------------------------------- /fmt/include/fmt/posix.h: -------------------------------------------------------------------------------- 1 | #include "os.h" 2 | #warning "fmt/posix.h is deprecated; use fmt/os.h instead" -------------------------------------------------------------------------------- /fmt/include/fmt/ranges.h: -------------------------------------------------------------------------------- 1 | // Formatting library for C++ - experimental range support 2 | // 3 | // Copyright (c) 2012 - present, Victor Zverovich 4 | // All rights reserved. 5 | // 6 | // For the license information refer to format.h. 7 | // 8 | // Copyright (c) 2018 - present, Remotion (Igor Schulz) 9 | // All Rights Reserved 10 | // {fmt} support for ranges, containers and types tuple interface. 11 | 12 | #ifndef FMT_RANGES_H_ 13 | #define FMT_RANGES_H_ 14 | 15 | #include 16 | #include 17 | 18 | #include "format.h" 19 | 20 | // output only up to N items from the range. 21 | #ifndef FMT_RANGE_OUTPUT_LENGTH_LIMIT 22 | # define FMT_RANGE_OUTPUT_LENGTH_LIMIT 256 23 | #endif 24 | 25 | FMT_BEGIN_NAMESPACE 26 | 27 | template struct formatting_base { 28 | template 29 | FMT_CONSTEXPR auto parse(ParseContext& ctx) -> decltype(ctx.begin()) { 30 | return ctx.begin(); 31 | } 32 | }; 33 | 34 | template 35 | struct formatting_range : formatting_base { 36 | static FMT_CONSTEXPR_DECL const std::size_t range_length_limit = 37 | FMT_RANGE_OUTPUT_LENGTH_LIMIT; // output only up to N items from the 38 | // range. 39 | Char prefix; 40 | Char delimiter; 41 | Char postfix; 42 | formatting_range() : prefix('{'), delimiter(','), postfix('}') {} 43 | static FMT_CONSTEXPR_DECL const bool add_delimiter_spaces = true; 44 | static FMT_CONSTEXPR_DECL const bool add_prepostfix_space = false; 45 | }; 46 | 47 | template 48 | struct formatting_tuple : formatting_base { 49 | Char prefix; 50 | Char delimiter; 51 | Char postfix; 52 | formatting_tuple() : prefix('('), delimiter(','), postfix(')') {} 53 | static FMT_CONSTEXPR_DECL const bool add_delimiter_spaces = true; 54 | static FMT_CONSTEXPR_DECL const bool add_prepostfix_space = false; 55 | }; 56 | 57 | namespace internal { 58 | 59 | template 60 | OutputIterator copy(const RangeT& range, OutputIterator out) { 61 | for (auto it = range.begin(), end = range.end(); it != end; ++it) 62 | *out++ = *it; 63 | return out; 64 | } 65 | 66 | template 67 | OutputIterator copy(const char* str, OutputIterator out) { 68 | while (*str) *out++ = *str++; 69 | return out; 70 | } 71 | 72 | template 73 | OutputIterator copy(char ch, OutputIterator out) { 74 | *out++ = ch; 75 | return out; 76 | } 77 | 78 | /// Return true value if T has std::string interface, like std::string_view. 79 | template class is_like_std_string { 80 | template 81 | static auto check(U* p) 82 | -> decltype((void)p->find('a'), p->length(), (void)p->data(), int()); 83 | template static void check(...); 84 | 85 | public: 86 | static FMT_CONSTEXPR_DECL const bool value = 87 | is_string::value || !std::is_void(nullptr))>::value; 88 | }; 89 | 90 | template 91 | struct is_like_std_string> : std::true_type {}; 92 | 93 | template struct conditional_helper {}; 94 | 95 | template struct is_range_ : std::false_type {}; 96 | 97 | #if !FMT_MSC_VER || FMT_MSC_VER > 1800 98 | template 99 | struct is_range_< 100 | T, conditional_t().begin()), 102 | decltype(std::declval().end())>, 103 | void>> : std::true_type {}; 104 | #endif 105 | 106 | /// tuple_size and tuple_element check. 107 | template class is_tuple_like_ { 108 | template 109 | static auto check(U* p) -> decltype(std::tuple_size::value, int()); 110 | template static void check(...); 111 | 112 | public: 113 | static FMT_CONSTEXPR_DECL const bool value = 114 | !std::is_void(nullptr))>::value; 115 | }; 116 | 117 | // Check for integer_sequence 118 | #if defined(__cpp_lib_integer_sequence) || FMT_MSC_VER >= 1900 119 | template 120 | using integer_sequence = std::integer_sequence; 121 | template using index_sequence = std::index_sequence; 122 | template 123 | using make_index_sequence = std::make_index_sequence; 124 | #else 125 | template struct integer_sequence { 126 | using value_type = T; 127 | 128 | static FMT_CONSTEXPR std::size_t size() { return sizeof...(N); } 129 | }; 130 | 131 | template 132 | using index_sequence = integer_sequence; 133 | 134 | template 135 | struct make_integer_sequence : make_integer_sequence {}; 136 | template 137 | struct make_integer_sequence : integer_sequence {}; 138 | 139 | template 140 | using make_index_sequence = make_integer_sequence; 141 | #endif 142 | 143 | template 144 | void for_each(index_sequence, Tuple&& tup, F&& f) FMT_NOEXCEPT { 145 | using std::get; 146 | // using free function get(T) now. 147 | const int _[] = {0, ((void)f(get(tup)), 0)...}; 148 | (void)_; // blocks warnings 149 | } 150 | 151 | template 152 | FMT_CONSTEXPR make_index_sequence::value> get_indexes( 153 | T const&) { 154 | return {}; 155 | } 156 | 157 | template void for_each(Tuple&& tup, F&& f) { 158 | const auto indexes = get_indexes(tup); 159 | for_each(indexes, std::forward(tup), std::forward(f)); 160 | } 161 | 162 | template ::type>::value)> 164 | FMT_CONSTEXPR const char* format_str_quoted(bool add_space, const Arg&) { 165 | return add_space ? " {}" : "{}"; 166 | } 167 | 168 | template ::type>::value)> 170 | FMT_CONSTEXPR const char* format_str_quoted(bool add_space, const Arg&) { 171 | return add_space ? " \"{}\"" : "\"{}\""; 172 | } 173 | 174 | FMT_CONSTEXPR const char* format_str_quoted(bool add_space, const char*) { 175 | return add_space ? " \"{}\"" : "\"{}\""; 176 | } 177 | FMT_CONSTEXPR const wchar_t* format_str_quoted(bool add_space, const wchar_t*) { 178 | return add_space ? L" \"{}\"" : L"\"{}\""; 179 | } 180 | 181 | FMT_CONSTEXPR const char* format_str_quoted(bool add_space, const char) { 182 | return add_space ? " '{}'" : "'{}'"; 183 | } 184 | FMT_CONSTEXPR const wchar_t* format_str_quoted(bool add_space, const wchar_t) { 185 | return add_space ? L" '{}'" : L"'{}'"; 186 | } 187 | 188 | } // namespace internal 189 | 190 | template struct is_tuple_like { 191 | static FMT_CONSTEXPR_DECL const bool value = 192 | internal::is_tuple_like_::value && !internal::is_range_::value; 193 | }; 194 | 195 | template 196 | struct formatter::value>> { 197 | private: 198 | // C++11 generic lambda for format() 199 | template struct format_each { 200 | template void operator()(const T& v) { 201 | if (i > 0) { 202 | if (formatting.add_prepostfix_space) { 203 | *out++ = ' '; 204 | } 205 | out = internal::copy(formatting.delimiter, out); 206 | } 207 | out = format_to(out, 208 | internal::format_str_quoted( 209 | (formatting.add_delimiter_spaces && i > 0), v), 210 | v); 211 | ++i; 212 | } 213 | 214 | formatting_tuple& formatting; 215 | std::size_t& i; 216 | typename std::add_lvalue_reference().out())>::type out; 218 | }; 219 | 220 | public: 221 | formatting_tuple formatting; 222 | 223 | template 224 | FMT_CONSTEXPR auto parse(ParseContext& ctx) -> decltype(ctx.begin()) { 225 | return formatting.parse(ctx); 226 | } 227 | 228 | template 229 | auto format(const TupleT& values, FormatContext& ctx) -> decltype(ctx.out()) { 230 | auto out = ctx.out(); 231 | std::size_t i = 0; 232 | internal::copy(formatting.prefix, out); 233 | 234 | internal::for_each(values, format_each{formatting, i, out}); 235 | if (formatting.add_prepostfix_space) { 236 | *out++ = ' '; 237 | } 238 | internal::copy(formatting.postfix, out); 239 | 240 | return ctx.out(); 241 | } 242 | }; 243 | 244 | template struct is_range { 245 | static FMT_CONSTEXPR_DECL const bool value = 246 | internal::is_range_::value && 247 | !internal::is_like_std_string::value && 248 | !std::is_convertible>::value && 249 | !std::is_constructible, T>::value; 250 | }; 251 | 252 | template 253 | struct formatter::value>> { 255 | formatting_range formatting; 256 | 257 | template 258 | FMT_CONSTEXPR auto parse(ParseContext& ctx) -> decltype(ctx.begin()) { 259 | return formatting.parse(ctx); 260 | } 261 | 262 | template 263 | typename FormatContext::iterator format(const RangeT& values, 264 | FormatContext& ctx) { 265 | auto out = internal::copy(formatting.prefix, ctx.out()); 266 | std::size_t i = 0; 267 | for (auto it = values.begin(), end = values.end(); it != end; ++it) { 268 | if (i > 0) { 269 | if (formatting.add_prepostfix_space) *out++ = ' '; 270 | out = internal::copy(formatting.delimiter, out); 271 | } 272 | out = format_to(out, 273 | internal::format_str_quoted( 274 | (formatting.add_delimiter_spaces && i > 0), *it), 275 | *it); 276 | if (++i > formatting.range_length_limit) { 277 | out = format_to(out, " ... "); 278 | break; 279 | } 280 | } 281 | if (formatting.add_prepostfix_space) *out++ = ' '; 282 | return internal::copy(formatting.postfix, out); 283 | } 284 | }; 285 | 286 | template struct tuple_arg_join : internal::view { 287 | const std::tuple& tuple; 288 | basic_string_view sep; 289 | 290 | tuple_arg_join(const std::tuple& t, basic_string_view s) 291 | : tuple{t}, sep{s} {} 292 | }; 293 | 294 | template 295 | struct formatter, Char> { 296 | template 297 | FMT_CONSTEXPR auto parse(ParseContext& ctx) -> decltype(ctx.begin()) { 298 | return ctx.begin(); 299 | } 300 | 301 | template 302 | typename FormatContext::iterator format( 303 | const tuple_arg_join& value, FormatContext& ctx) { 304 | return format(value, ctx, internal::make_index_sequence{}); 305 | } 306 | 307 | private: 308 | template 309 | typename FormatContext::iterator format( 310 | const tuple_arg_join& value, FormatContext& ctx, 311 | internal::index_sequence) { 312 | return format_args(value, ctx, std::get(value.tuple)...); 313 | } 314 | 315 | template 316 | typename FormatContext::iterator format_args( 317 | const tuple_arg_join&, FormatContext& ctx) { 318 | // NOTE: for compilers that support C++17, this empty function instantiation 319 | // can be replaced with a constexpr branch in the variadic overload. 320 | return ctx.out(); 321 | } 322 | 323 | template 324 | typename FormatContext::iterator format_args( 325 | const tuple_arg_join& value, FormatContext& ctx, 326 | const Arg& arg, const Args&... args) { 327 | using base = formatter::type, Char>; 328 | auto out = ctx.out(); 329 | out = base{}.format(arg, ctx); 330 | if (sizeof...(Args) > 0) { 331 | out = std::copy(value.sep.begin(), value.sep.end(), out); 332 | ctx.advance_to(out); 333 | return format_args(value, ctx, args...); 334 | } 335 | return out; 336 | } 337 | }; 338 | 339 | /** 340 | \rst 341 | Returns an object that formats `tuple` with elements separated by `sep`. 342 | 343 | **Example**:: 344 | 345 | std::tuple t = {1, 'a'}; 346 | fmt::print("{}", fmt::join(t, ", ")); 347 | // Output: "1, a" 348 | \endrst 349 | */ 350 | template 351 | FMT_CONSTEXPR tuple_arg_join join(const std::tuple& tuple, 352 | string_view sep) { 353 | return {tuple, sep}; 354 | } 355 | 356 | template 357 | FMT_CONSTEXPR tuple_arg_join join(const std::tuple& tuple, 358 | wstring_view sep) { 359 | return {tuple, sep}; 360 | } 361 | 362 | /** 363 | \rst 364 | Returns an object that formats `initializer_list` with elements separated by 365 | `sep`. 366 | 367 | **Example**:: 368 | 369 | fmt::print("{}", fmt::join({1, 2, 3}, ", ")); 370 | // Output: "1, 2, 3" 371 | \endrst 372 | */ 373 | template 374 | arg_join>, char> join( 375 | std::initializer_list list, string_view sep) { 376 | return join(std::begin(list), std::end(list), sep); 377 | } 378 | 379 | template 380 | arg_join>, wchar_t> join( 381 | std::initializer_list list, wstring_view sep) { 382 | return join(std::begin(list), std::end(list), sep); 383 | } 384 | 385 | FMT_END_NAMESPACE 386 | 387 | #endif // FMT_RANGES_H_ 388 | -------------------------------------------------------------------------------- /fmt/src/format.cc: -------------------------------------------------------------------------------- 1 | // Formatting library for C++ 2 | // 3 | // Copyright (c) 2012 - 2016, Victor Zverovich 4 | // All rights reserved. 5 | // 6 | // For the license information refer to format.h. 7 | 8 | #include "fmt/format-inl.h" 9 | 10 | FMT_BEGIN_NAMESPACE 11 | namespace internal { 12 | 13 | template 14 | int format_float(char* buf, std::size_t size, const char* format, int precision, 15 | T value) { 16 | #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION 17 | if (precision > 100000) 18 | throw std::runtime_error( 19 | "fuzz mode - avoid large allocation inside snprintf"); 20 | #endif 21 | // Suppress the warning about nonliteral format string. 22 | int (*snprintf_ptr)(char*, size_t, const char*, ...) = FMT_SNPRINTF; 23 | return precision < 0 ? snprintf_ptr(buf, size, format, value) 24 | : snprintf_ptr(buf, size, format, precision, value); 25 | } 26 | struct sprintf_specs { 27 | int precision; 28 | char type; 29 | bool alt : 1; 30 | 31 | template 32 | constexpr sprintf_specs(basic_format_specs specs) 33 | : precision(specs.precision), type(specs.type), alt(specs.alt) {} 34 | 35 | constexpr bool has_precision() const { return precision >= 0; } 36 | }; 37 | 38 | // This is deprecated and is kept only to preserve ABI compatibility. 39 | template 40 | char* sprintf_format(Double value, internal::buffer& buf, 41 | sprintf_specs specs) { 42 | // Buffer capacity must be non-zero, otherwise MSVC's vsnprintf_s will fail. 43 | FMT_ASSERT(buf.capacity() != 0, "empty buffer"); 44 | 45 | // Build format string. 46 | enum { max_format_size = 10 }; // longest format: %#-*.*Lg 47 | char format[max_format_size]; 48 | char* format_ptr = format; 49 | *format_ptr++ = '%'; 50 | if (specs.alt || !specs.type) *format_ptr++ = '#'; 51 | if (specs.precision >= 0) { 52 | *format_ptr++ = '.'; 53 | *format_ptr++ = '*'; 54 | } 55 | if (std::is_same::value) *format_ptr++ = 'L'; 56 | 57 | char type = specs.type; 58 | 59 | if (type == '%') 60 | type = 'f'; 61 | else if (type == 0 || type == 'n') 62 | type = 'g'; 63 | #if FMT_MSC_VER 64 | if (type == 'F') { 65 | // MSVC's printf doesn't support 'F'. 66 | type = 'f'; 67 | } 68 | #endif 69 | *format_ptr++ = type; 70 | *format_ptr = '\0'; 71 | 72 | // Format using snprintf. 73 | char* start = nullptr; 74 | char* decimal_point_pos = nullptr; 75 | for (;;) { 76 | std::size_t buffer_size = buf.capacity(); 77 | start = &buf[0]; 78 | int result = 79 | format_float(start, buffer_size, format, specs.precision, value); 80 | if (result >= 0) { 81 | unsigned n = internal::to_unsigned(result); 82 | if (n < buf.capacity()) { 83 | // Find the decimal point. 84 | auto p = buf.data(), end = p + n; 85 | if (*p == '+' || *p == '-') ++p; 86 | if (specs.type != 'a' && specs.type != 'A') { 87 | while (p < end && *p >= '0' && *p <= '9') ++p; 88 | if (p < end && *p != 'e' && *p != 'E') { 89 | decimal_point_pos = p; 90 | if (!specs.type) { 91 | // Keep only one trailing zero after the decimal point. 92 | ++p; 93 | if (*p == '0') ++p; 94 | while (p != end && *p >= '1' && *p <= '9') ++p; 95 | char* where = p; 96 | while (p != end && *p == '0') ++p; 97 | if (p == end || *p < '0' || *p > '9') { 98 | if (p != end) std::memmove(where, p, to_unsigned(end - p)); 99 | n -= static_cast(p - where); 100 | } 101 | } 102 | } 103 | } 104 | buf.resize(n); 105 | break; // The buffer is large enough - continue with formatting. 106 | } 107 | buf.reserve(n + 1); 108 | } else { 109 | // If result is negative we ask to increase the capacity by at least 1, 110 | // but as std::vector, the buffer grows exponentially. 111 | buf.reserve(buf.capacity() + 1); 112 | } 113 | } 114 | return decimal_point_pos; 115 | } 116 | } // namespace internal 117 | 118 | template FMT_API char* internal::sprintf_format(double, internal::buffer&, 119 | sprintf_specs); 120 | template FMT_API char* internal::sprintf_format(long double, 121 | internal::buffer&, 122 | sprintf_specs); 123 | 124 | template struct FMT_INSTANTIATION_DEF_API internal::basic_data; 125 | 126 | // Workaround a bug in MSVC2013 that prevents instantiation of format_float. 127 | int (*instantiate_format_float)(double, int, internal::float_specs, 128 | internal::buffer&) = 129 | internal::format_float; 130 | 131 | #ifndef FMT_STATIC_THOUSANDS_SEPARATOR 132 | template FMT_API internal::locale_ref::locale_ref(const std::locale& loc); 133 | template FMT_API std::locale internal::locale_ref::get() const; 134 | #endif 135 | 136 | // Explicit instantiations for char. 137 | 138 | template FMT_API std::string internal::grouping_impl(locale_ref); 139 | template FMT_API char internal::thousands_sep_impl(locale_ref); 140 | template FMT_API char internal::decimal_point_impl(locale_ref); 141 | 142 | template FMT_API void internal::buffer::append(const char*, const char*); 143 | 144 | template FMT_API void internal::arg_map::init( 145 | const basic_format_args& args); 146 | 147 | template FMT_API std::string internal::vformat( 148 | string_view, basic_format_args); 149 | 150 | template FMT_API format_context::iterator internal::vformat_to( 151 | internal::buffer&, string_view, basic_format_args); 152 | 153 | template FMT_API int internal::snprintf_float(double, int, 154 | internal::float_specs, 155 | internal::buffer&); 156 | template FMT_API int internal::snprintf_float(long double, int, 157 | internal::float_specs, 158 | internal::buffer&); 159 | template FMT_API int internal::format_float(double, int, internal::float_specs, 160 | internal::buffer&); 161 | template FMT_API int internal::format_float(long double, int, 162 | internal::float_specs, 163 | internal::buffer&); 164 | 165 | // Explicit instantiations for wchar_t. 166 | 167 | template FMT_API std::string internal::grouping_impl(locale_ref); 168 | template FMT_API wchar_t internal::thousands_sep_impl(locale_ref); 169 | template FMT_API wchar_t internal::decimal_point_impl(locale_ref); 170 | 171 | template FMT_API void internal::buffer::append(const wchar_t*, 172 | const wchar_t*); 173 | 174 | template FMT_API std::wstring internal::vformat( 175 | wstring_view, basic_format_args); 176 | FMT_END_NAMESPACE 177 | -------------------------------------------------------------------------------- /fmt/src/os.cc: -------------------------------------------------------------------------------- 1 | // Formatting library for C++ - optional OS-specific functionality 2 | // 3 | // Copyright (c) 2012 - 2016, Victor Zverovich 4 | // All rights reserved. 5 | // 6 | // For the license information refer to format.h. 7 | 8 | // Disable bogus MSVC warnings. 9 | #if !defined(_CRT_SECURE_NO_WARNINGS) && defined(_MSC_VER) 10 | # define _CRT_SECURE_NO_WARNINGS 11 | #endif 12 | 13 | #include "fmt/os.h" 14 | 15 | #include 16 | 17 | #if FMT_USE_FCNTL 18 | # include 19 | # include 20 | 21 | # ifndef _WIN32 22 | # include 23 | # else 24 | # ifndef WIN32_LEAN_AND_MEAN 25 | # define WIN32_LEAN_AND_MEAN 26 | # endif 27 | # include 28 | # include 29 | 30 | # define O_CREAT _O_CREAT 31 | # define O_TRUNC _O_TRUNC 32 | 33 | # ifndef S_IRUSR 34 | # define S_IRUSR _S_IREAD 35 | # endif 36 | 37 | # ifndef S_IWUSR 38 | # define S_IWUSR _S_IWRITE 39 | # endif 40 | 41 | # ifdef __MINGW32__ 42 | # define _SH_DENYNO 0x40 43 | # endif 44 | # endif // _WIN32 45 | #endif // FMT_USE_FCNTL 46 | 47 | #ifdef _WIN32 48 | # include 49 | #endif 50 | 51 | #ifdef fileno 52 | # undef fileno 53 | #endif 54 | 55 | namespace { 56 | #ifdef _WIN32 57 | // Return type of read and write functions. 58 | using RWResult = int; 59 | 60 | // On Windows the count argument to read and write is unsigned, so convert 61 | // it from size_t preventing integer overflow. 62 | inline unsigned convert_rwcount(std::size_t count) { 63 | return count <= UINT_MAX ? static_cast(count) : UINT_MAX; 64 | } 65 | #else 66 | // Return type of read and write functions. 67 | using RWResult = ssize_t; 68 | 69 | inline std::size_t convert_rwcount(std::size_t count) { return count; } 70 | #endif 71 | } // namespace 72 | 73 | FMT_BEGIN_NAMESPACE 74 | 75 | #ifdef _WIN32 76 | internal::utf16_to_utf8::utf16_to_utf8(wstring_view s) { 77 | if (int error_code = convert(s)) { 78 | FMT_THROW(windows_error(error_code, 79 | "cannot convert string from UTF-16 to UTF-8")); 80 | } 81 | } 82 | 83 | int internal::utf16_to_utf8::convert(wstring_view s) { 84 | if (s.size() > INT_MAX) return ERROR_INVALID_PARAMETER; 85 | int s_size = static_cast(s.size()); 86 | if (s_size == 0) { 87 | // WideCharToMultiByte does not support zero length, handle separately. 88 | buffer_.resize(1); 89 | buffer_[0] = 0; 90 | return 0; 91 | } 92 | 93 | int length = WideCharToMultiByte(CP_UTF8, 0, s.data(), s_size, nullptr, 0, 94 | nullptr, nullptr); 95 | if (length == 0) return GetLastError(); 96 | buffer_.resize(length + 1); 97 | length = WideCharToMultiByte(CP_UTF8, 0, s.data(), s_size, &buffer_[0], 98 | length, nullptr, nullptr); 99 | if (length == 0) return GetLastError(); 100 | buffer_[length] = 0; 101 | return 0; 102 | } 103 | 104 | void windows_error::init(int err_code, string_view format_str, 105 | format_args args) { 106 | error_code_ = err_code; 107 | memory_buffer buffer; 108 | internal::format_windows_error(buffer, err_code, vformat(format_str, args)); 109 | std::runtime_error& base = *this; 110 | base = std::runtime_error(to_string(buffer)); 111 | } 112 | 113 | void internal::format_windows_error(internal::buffer& out, int error_code, 114 | string_view message) FMT_NOEXCEPT { 115 | FMT_TRY { 116 | wmemory_buffer buf; 117 | buf.resize(inline_buffer_size); 118 | for (;;) { 119 | wchar_t* system_message = &buf[0]; 120 | int result = FormatMessageW( 121 | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, nullptr, 122 | error_code, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), system_message, 123 | static_cast(buf.size()), nullptr); 124 | if (result != 0) { 125 | utf16_to_utf8 utf8_message; 126 | if (utf8_message.convert(system_message) == ERROR_SUCCESS) { 127 | internal::writer w(out); 128 | w.write(message); 129 | w.write(": "); 130 | w.write(utf8_message); 131 | return; 132 | } 133 | break; 134 | } 135 | if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) 136 | break; // Can't get error message, report error code instead. 137 | buf.resize(buf.size() * 2); 138 | } 139 | } 140 | FMT_CATCH(...) {} 141 | format_error_code(out, error_code, message); 142 | } 143 | 144 | void report_windows_error(int error_code, 145 | fmt::string_view message) FMT_NOEXCEPT { 146 | report_error(internal::format_windows_error, error_code, message); 147 | } 148 | #endif // _WIN32 149 | 150 | buffered_file::~buffered_file() FMT_NOEXCEPT { 151 | if (file_ && FMT_SYSTEM(fclose(file_)) != 0) 152 | report_system_error(errno, "cannot close file"); 153 | } 154 | 155 | buffered_file::buffered_file(cstring_view filename, cstring_view mode) { 156 | FMT_RETRY_VAL(file_, FMT_SYSTEM(fopen(filename.c_str(), mode.c_str())), 157 | nullptr); 158 | if (!file_) 159 | FMT_THROW(system_error(errno, "cannot open file {}", filename.c_str())); 160 | } 161 | 162 | void buffered_file::close() { 163 | if (!file_) return; 164 | int result = FMT_SYSTEM(fclose(file_)); 165 | file_ = nullptr; 166 | if (result != 0) FMT_THROW(system_error(errno, "cannot close file")); 167 | } 168 | 169 | // A macro used to prevent expansion of fileno on broken versions of MinGW. 170 | #define FMT_ARGS 171 | 172 | int buffered_file::fileno() const { 173 | int fd = FMT_POSIX_CALL(fileno FMT_ARGS(file_)); 174 | if (fd == -1) FMT_THROW(system_error(errno, "cannot get file descriptor")); 175 | return fd; 176 | } 177 | 178 | #if FMT_USE_FCNTL 179 | file::file(cstring_view path, int oflag) { 180 | int mode = S_IRUSR | S_IWUSR; 181 | # if defined(_WIN32) && !defined(__MINGW32__) 182 | fd_ = -1; 183 | FMT_POSIX_CALL(sopen_s(&fd_, path.c_str(), oflag, _SH_DENYNO, mode)); 184 | # else 185 | FMT_RETRY(fd_, FMT_POSIX_CALL(open(path.c_str(), oflag, mode))); 186 | # endif 187 | if (fd_ == -1) 188 | FMT_THROW(system_error(errno, "cannot open file {}", path.c_str())); 189 | } 190 | 191 | file::~file() FMT_NOEXCEPT { 192 | // Don't retry close in case of EINTR! 193 | // See http://linux.derkeiler.com/Mailing-Lists/Kernel/2005-09/3000.html 194 | if (fd_ != -1 && FMT_POSIX_CALL(close(fd_)) != 0) 195 | report_system_error(errno, "cannot close file"); 196 | } 197 | 198 | void file::close() { 199 | if (fd_ == -1) return; 200 | // Don't retry close in case of EINTR! 201 | // See http://linux.derkeiler.com/Mailing-Lists/Kernel/2005-09/3000.html 202 | int result = FMT_POSIX_CALL(close(fd_)); 203 | fd_ = -1; 204 | if (result != 0) FMT_THROW(system_error(errno, "cannot close file")); 205 | } 206 | 207 | long long file::size() const { 208 | # ifdef _WIN32 209 | // Use GetFileSize instead of GetFileSizeEx for the case when _WIN32_WINNT 210 | // is less than 0x0500 as is the case with some default MinGW builds. 211 | // Both functions support large file sizes. 212 | DWORD size_upper = 0; 213 | HANDLE handle = reinterpret_cast(_get_osfhandle(fd_)); 214 | DWORD size_lower = FMT_SYSTEM(GetFileSize(handle, &size_upper)); 215 | if (size_lower == INVALID_FILE_SIZE) { 216 | DWORD error = GetLastError(); 217 | if (error != NO_ERROR) 218 | FMT_THROW(windows_error(GetLastError(), "cannot get file size")); 219 | } 220 | unsigned long long long_size = size_upper; 221 | return (long_size << sizeof(DWORD) * CHAR_BIT) | size_lower; 222 | # else 223 | using Stat = struct stat; 224 | Stat file_stat = Stat(); 225 | if (FMT_POSIX_CALL(fstat(fd_, &file_stat)) == -1) 226 | FMT_THROW(system_error(errno, "cannot get file attributes")); 227 | static_assert(sizeof(long long) >= sizeof(file_stat.st_size), 228 | "return type of file::size is not large enough"); 229 | return file_stat.st_size; 230 | # endif 231 | } 232 | 233 | std::size_t file::read(void* buffer, std::size_t count) { 234 | RWResult result = 0; 235 | FMT_RETRY(result, FMT_POSIX_CALL(read(fd_, buffer, convert_rwcount(count)))); 236 | if (result < 0) FMT_THROW(system_error(errno, "cannot read from file")); 237 | return internal::to_unsigned(result); 238 | } 239 | 240 | std::size_t file::write(const void* buffer, std::size_t count) { 241 | RWResult result = 0; 242 | FMT_RETRY(result, FMT_POSIX_CALL(write(fd_, buffer, convert_rwcount(count)))); 243 | if (result < 0) FMT_THROW(system_error(errno, "cannot write to file")); 244 | return internal::to_unsigned(result); 245 | } 246 | 247 | file file::dup(int fd) { 248 | // Don't retry as dup doesn't return EINTR. 249 | // http://pubs.opengroup.org/onlinepubs/009695399/functions/dup.html 250 | int new_fd = FMT_POSIX_CALL(dup(fd)); 251 | if (new_fd == -1) 252 | FMT_THROW(system_error(errno, "cannot duplicate file descriptor {}", fd)); 253 | return file(new_fd); 254 | } 255 | 256 | void file::dup2(int fd) { 257 | int result = 0; 258 | FMT_RETRY(result, FMT_POSIX_CALL(dup2(fd_, fd))); 259 | if (result == -1) { 260 | FMT_THROW(system_error(errno, "cannot duplicate file descriptor {} to {}", 261 | fd_, fd)); 262 | } 263 | } 264 | 265 | void file::dup2(int fd, error_code& ec) FMT_NOEXCEPT { 266 | int result = 0; 267 | FMT_RETRY(result, FMT_POSIX_CALL(dup2(fd_, fd))); 268 | if (result == -1) ec = error_code(errno); 269 | } 270 | 271 | void file::pipe(file& read_end, file& write_end) { 272 | // Close the descriptors first to make sure that assignments don't throw 273 | // and there are no leaks. 274 | read_end.close(); 275 | write_end.close(); 276 | int fds[2] = {}; 277 | # ifdef _WIN32 278 | // Make the default pipe capacity same as on Linux 2.6.11+. 279 | enum { DEFAULT_CAPACITY = 65536 }; 280 | int result = FMT_POSIX_CALL(pipe(fds, DEFAULT_CAPACITY, _O_BINARY)); 281 | # else 282 | // Don't retry as the pipe function doesn't return EINTR. 283 | // http://pubs.opengroup.org/onlinepubs/009696799/functions/pipe.html 284 | int result = FMT_POSIX_CALL(pipe(fds)); 285 | # endif 286 | if (result != 0) FMT_THROW(system_error(errno, "cannot create pipe")); 287 | // The following assignments don't throw because read_fd and write_fd 288 | // are closed. 289 | read_end = file(fds[0]); 290 | write_end = file(fds[1]); 291 | } 292 | 293 | buffered_file file::fdopen(const char* mode) { 294 | // Don't retry as fdopen doesn't return EINTR. 295 | FILE* f = FMT_POSIX_CALL(fdopen(fd_, mode)); 296 | if (!f) 297 | FMT_THROW( 298 | system_error(errno, "cannot associate stream with file descriptor")); 299 | buffered_file bf(f); 300 | fd_ = -1; 301 | return bf; 302 | } 303 | 304 | long getpagesize() { 305 | # ifdef _WIN32 306 | SYSTEM_INFO si; 307 | GetSystemInfo(&si); 308 | return si.dwPageSize; 309 | # else 310 | long size = FMT_POSIX_CALL(sysconf(_SC_PAGESIZE)); 311 | if (size < 0) FMT_THROW(system_error(errno, "cannot get memory page size")); 312 | return size; 313 | # endif 314 | } 315 | #endif // FMT_USE_FCNTL 316 | FMT_END_NAMESPACE 317 | -------------------------------------------------------------------------------- /results/g1-16/data_slow.csv: -------------------------------------------------------------------------------- 1 | Trial,Cores,Implementation,Nanos/Op,Total,Rlen 2 | 0,17,plain add,4.1,2055063600,1.0 3 | 1,17,plain add,4.1,2055072000,1.0 4 | 2,17,plain add,4.1,2054916000,1.0 5 | 3,17,plain add,4.1,2054980800,1.0 6 | 4,17,plain add,4.1,2054831800,1.0 7 | 5,17,plain add,4.1,2054890400,1.0 8 | 6,17,plain add,4.1,2054932200,1.0 9 | 7,17,plain add,4.1,2054984600,1.0 10 | 8,17,plain add,4.1,2054735600,1.0 11 | 9,17,plain add,4.1,2054831800,1.0 12 | 10,17,plain add,4.1,2055156400,1.0 13 | 11,17,plain add,4.1,2055111000,1.0 14 | 12,17,plain add,4.1,2054867000,1.0 15 | 13,17,plain add,4.1,2055147200,1.0 16 | 14,17,plain add,4.1,2055035800,1.0 17 | 0,18,plain add,4.4,2055066000,1.0 18 | 1,18,plain add,4.4,2054927400,1.0 19 | 2,18,plain add,4.4,2055136400,1.0 20 | 3,18,plain add,4.4,2054927800,1.0 21 | 4,18,plain add,4.4,2054907800,1.0 22 | 5,18,plain add,4.4,2054967600,1.0 23 | 6,18,plain add,4.4,2055133800,1.0 24 | 7,18,plain add,4.4,2055160800,1.0 25 | 8,18,plain add,4.4,2054912200,1.0 26 | 9,18,plain add,4.4,2055324600,1.0 27 | 10,18,plain add,4.4,2055078800,1.0 28 | 11,18,plain add,4.4,2055156000,1.0 29 | 12,18,plain add,4.4,2055013200,1.0 30 | 13,18,plain add,4.4,2055171800,1.0 31 | 14,18,plain add,4.4,2054911000,1.0 32 | 0,17,tls add,4.7,1815055600,1.0 33 | 1,17,tls add,4.7,1814681200,1.0 34 | 2,17,tls add,4.7,1815143800,1.0 35 | 3,17,tls add,4.7,1814725000,1.0 36 | 4,17,tls add,4.7,1815109200,1.0 37 | 5,17,tls add,4.7,1814727200,1.0 38 | 6,17,tls add,4.7,1815097600,1.0 39 | 7,17,tls add,4.7,1814729800,1.0 40 | 8,17,tls add,4.7,1815102200,1.0 41 | 9,17,tls add,4.7,1814669000,1.0 42 | 10,17,tls add,4.7,1814527800,1.0 43 | 11,17,tls add,4.7,1814718000,1.0 44 | 12,17,tls add,4.7,1815115600,1.0 45 | 13,17,tls add,4.7,1815080200,1.0 46 | 14,17,tls add,4.7,1814764000,1.0 47 | 0,18,tls add,5.0,1815246000,1.0 48 | 1,18,tls add,5.0,1815531400,1.0 49 | 2,18,tls add,5.0,1815155200,1.0 50 | 3,18,tls add,5.0,1815478200,1.0 51 | 4,18,tls add,5.0,1815218400,1.0 52 | 5,18,tls add,5.0,1815496800,1.0 53 | 6,18,tls add,5.0,1815204000,1.0 54 | 7,18,tls add,5.0,1815505400,1.0 55 | 8,18,tls add,5.0,1815244000,1.0 56 | 9,18,tls add,5.0,1815553400,1.0 57 | 10,18,tls add,5.0,1815226800,1.0 58 | 11,18,tls add,5.0,1815530200,1.0 59 | 12,18,tls add,5.0,1815220400,1.0 60 | 13,18,tls add,5.0,1815512800,1.0 61 | 14,18,tls add,4.9,1815149800,1.0 62 | 0,17,atomic add,1472.9,5768600,1.0 63 | 1,17,atomic add,1451.2,5855600,1.0 64 | 2,17,atomic add,1492.3,5694000,1.0 65 | 3,17,atomic add,1493.7,5691400,1.0 66 | 4,17,atomic add,1489.8,5701000,1.0 67 | 5,17,atomic add,1487.9,5710800,1.0 68 | 6,17,atomic add,1488.6,5708200,1.0 69 | 7,17,atomic add,1486.7,5715000,1.0 70 | 8,17,atomic add,1475.7,5757800,1.0 71 | 9,17,atomic add,1449.0,5864000,1.0 72 | 10,17,atomic add,1448.8,5865200,1.0 73 | 11,17,atomic add,1449.6,5864600,1.0 74 | 12,17,atomic add,1449.1,5860800,1.0 75 | 13,17,atomic add,1450.0,5860400,1.0 76 | 14,17,atomic add,1446.1,5867200,1.0 77 | 0,18,atomic add,1538.1,5849400,1.0 78 | 1,18,atomic add,1538.8,5844000,1.0 79 | 2,18,atomic add,1536.3,5851200,1.0 80 | 3,18,atomic add,1537.1,5850600,1.0 81 | 4,18,atomic add,1537.5,5849000,1.0 82 | 5,18,atomic add,1537.3,5850000,1.0 83 | 6,18,atomic add,1538.6,5844600,1.0 84 | 7,18,atomic add,1538.2,5846200,1.0 85 | 8,18,atomic add,1538.3,5848800,1.0 86 | 9,18,atomic add,1538.9,5843800,1.0 87 | 10,18,atomic add,1535.3,5855000,1.0 88 | 11,18,atomic add,1537.2,5850400,1.0 89 | 12,18,atomic add,1537.7,5848400,1.0 90 | 13,18,atomic add,1535.5,5856800,1.0 91 | 14,18,atomic add,1532.6,5852200,1.0 92 | 0,17,cas add,2255.2,3766000,1.0 93 | 1,17,cas add,2256.1,3766400,1.0 94 | 2,17,cas add,2255.0,3768200,1.0 95 | 3,17,cas add,2255.1,3768000,1.0 96 | 4,17,cas add,2255.3,3767800,1.0 97 | 5,17,cas add,2256.1,3766800,1.0 98 | 6,17,cas add,2258.2,3764800,1.0 99 | 7,17,cas add,2253.4,3769000,1.0 100 | 8,17,cas add,2255.8,3767000,1.0 101 | 9,17,cas add,2256.7,3765200,1.0 102 | 10,17,cas add,2255.9,3767000,1.0 103 | 11,17,cas add,2254.9,3768400,1.0 104 | 12,17,cas add,2255.2,3767800,1.0 105 | 13,17,cas add,2255.7,3767000,1.0 106 | 14,17,cas add,2252.8,3766800,1.0 107 | 0,18,cas add,2388.4,3765800,1.0 108 | 1,18,cas add,2389.4,3764000,1.0 109 | 2,18,cas add,2388.7,3765000,1.0 110 | 3,18,cas add,2389.9,3765000,1.0 111 | 4,18,cas add,2389.3,3764200,1.0 112 | 5,18,cas add,2386.4,3766800,1.0 113 | 6,18,cas add,2389.2,3764400,1.0 114 | 7,18,cas add,2389.7,3763600,1.0 115 | 8,18,cas add,2390.0,3763000,1.0 116 | 9,18,cas add,2389.7,3763200,1.0 117 | 10,18,cas add,2390.0,3762800,1.0 118 | 11,18,cas add,2390.4,3764000,1.0 119 | 12,18,cas add,2388.5,3765200,1.0 120 | 13,18,cas add,2386.5,3767000,1.0 121 | 14,18,cas add,2382.6,3764800,1.0 122 | 0,17,cas multi,18.8,451122600,1.0 123 | 1,17,cas multi,18.8,451115600,1.0 124 | 2,17,cas multi,18.8,451106600,1.0 125 | 3,17,cas multi,18.8,451125800,1.0 126 | 4,17,cas multi,18.8,451127000,1.0 127 | 5,17,cas multi,18.8,451084200,1.0 128 | 6,17,cas multi,18.8,451118800,1.0 129 | 7,17,cas multi,18.8,451126000,1.0 130 | 8,17,cas multi,18.8,451118400,1.0 131 | 9,17,cas multi,18.8,451122000,1.0 132 | 10,17,cas multi,18.8,451100800,1.0 133 | 11,17,cas multi,18.8,451129200,1.0 134 | 12,17,cas multi,18.8,451119800,1.0 135 | 13,17,cas multi,18.8,451127600,1.0 136 | 14,17,cas multi,18.8,451118000,1.0 137 | 0,18,cas multi,19.9,451090600,1.0 138 | 1,18,cas multi,19.9,450972600,1.0 139 | 2,18,cas multi,19.9,451109600,1.0 140 | 3,18,cas multi,19.9,451120000,1.0 141 | 4,18,cas multi,19.9,451108800,1.0 142 | 5,18,cas multi,19.9,451117400,1.0 143 | 6,18,cas multi,19.9,451113600,1.0 144 | 7,18,cas multi,19.9,451099600,1.0 145 | 8,18,cas multi,19.9,451101400,1.0 146 | 9,18,cas multi,19.9,451129800,1.0 147 | 10,18,cas multi,19.9,451111200,1.0 148 | 11,18,cas multi,19.9,451114200,1.0 149 | 12,18,cas multi,19.9,451114000,1.0 150 | 13,18,cas multi,19.9,451118800,1.0 151 | 14,18,cas multi,19.9,451113200,1.0 152 | 0,17,mutex add,4445.6,1916400,1.0 153 | 1,17,mutex add,4444.9,1916000,1.0 154 | 2,17,mutex add,4451.1,1913400,1.0 155 | 3,17,mutex add,4472.3,1904400,1.0 156 | 4,17,mutex add,4438.1,1918800,1.0 157 | 5,17,mutex add,4445.9,1915200,1.0 158 | 6,17,mutex add,4451.0,1914400,1.0 159 | 7,17,mutex add,4442.4,1917200,1.0 160 | 8,17,mutex add,4433.0,1920800,1.0 161 | 9,17,mutex add,4445.7,1916200,1.0 162 | 10,17,mutex add,4449.0,1914200,1.0 163 | 11,17,mutex add,4445.6,1916000,1.0 164 | 12,17,mutex add,4434.9,1921000,1.0 165 | 13,17,mutex add,4455.0,1912200,1.0 166 | 14,17,mutex add,4445.6,1912800,1.0 167 | 0,18,mutex add,4683.5,1929000,1.0 168 | 1,18,mutex add,4678.2,1931600,1.0 169 | 2,18,mutex add,4676.8,1931200,1.0 170 | 3,18,mutex add,4680.7,1928600,1.0 171 | 4,18,mutex add,4686.7,1927400,1.0 172 | 5,18,mutex add,4677.5,1930400,1.0 173 | 6,18,mutex add,4663.4,1937200,1.0 174 | 7,18,mutex add,4677.7,1930400,1.0 175 | 8,18,mutex add,4683.8,1928400,1.0 176 | 9,18,mutex add,4681.7,1929200,1.0 177 | 10,18,mutex add,4654.1,1940600,1.0 178 | 11,18,mutex add,4691.6,1924200,1.0 179 | 12,18,mutex add,4670.4,1934400,1.0 180 | 13,18,mutex add,4671.5,1933600,1.0 181 | 14,18,mutex add,4661.4,1931600,1.0 182 | 0,17,pure spin,3391.8,2506200,1.0 183 | 1,17,pure spin,3393.7,2503000,1.0 184 | 2,17,pure spin,3396.7,2502200,1.0 185 | 3,17,pure spin,3394.7,2503800,1.0 186 | 4,17,pure spin,3499.0,2429200,1.0 187 | 5,17,pure spin,3499.4,2429000,1.0 188 | 6,17,pure spin,3392.2,2505600,1.0 189 | 7,17,pure spin,3501.4,2428800,1.0 190 | 8,17,pure spin,3390.2,2507200,1.0 191 | 9,17,pure spin,3385.8,2506000,1.0 192 | 10,17,pure spin,3396.5,2503800,1.0 193 | 11,17,pure spin,3388.9,2503400,1.0 194 | 12,17,pure spin,3389.8,2507600,1.0 195 | 13,17,pure spin,3395.7,2503000,1.0 196 | 14,17,pure spin,3401.2,2496000,1.0 197 | 0,18,pure spin,3592.4,2505200,1.0 198 | 1,18,pure spin,3592.4,2504200,1.0 199 | 2,18,pure spin,3713.0,2421800,1.0 200 | 3,18,pure spin,3590.8,2505200,1.0 201 | 4,18,pure spin,3591.2,2505000,1.0 202 | 5,18,pure spin,3845.7,2344200,1.0 203 | 6,18,pure spin,3711.2,2421800,1.0 204 | 7,18,pure spin,3591.6,2504600,1.0 205 | 8,18,pure spin,3598.5,2497400,1.0 206 | 9,18,pure spin,3616.9,2487200,1.0 207 | 10,18,pure spin,3749.2,2401600,1.0 208 | 11,18,pure spin,3609.4,2492200,1.0 209 | 12,18,pure spin,3737.2,2408000,1.0 210 | 13,18,pure spin,3611.1,2492200,1.0 211 | 14,18,pure spin,3602.6,2491000,1.0 212 | 0,17,pause spin,3309.1,2568600,1.0 213 | 1,17,pause spin,3289.2,2584600,1.0 214 | 2,17,pause spin,3309.6,2569400,1.0 215 | 3,17,pause spin,3331.6,2551200,1.0 216 | 4,17,pause spin,3449.7,2462800,1.0 217 | 5,17,pause spin,3333.8,2549800,1.0 218 | 6,17,pause spin,3342.6,2543000,1.0 219 | 7,17,pause spin,3338.8,2545800,1.0 220 | 8,17,pause spin,3338.0,2546600,1.0 221 | 9,17,pause spin,3336.9,2547000,1.0 222 | 10,17,pause spin,3329.4,2554000,1.0 223 | 11,17,pause spin,3334.7,2549000,1.0 224 | 12,17,pause spin,3351.4,2531200,1.0 225 | 13,17,pause spin,3400.3,2499800,1.0 226 | 14,17,pause spin,3503.4,2481200,1.0 227 | 0,18,pause spin,3649.6,2467000,1.0 228 | 1,18,pause spin,3652.3,2462000,1.0 229 | 2,18,pause spin,3648.2,2465600,1.0 230 | 3,18,pause spin,3646.8,2465800,1.0 231 | 4,18,pause spin,3650.0,2464400,1.0 232 | 5,18,pause spin,3771.9,2385000,1.0 233 | 6,18,pause spin,3534.1,2545600,1.0 234 | 7,18,pause spin,3539.7,2542600,1.0 235 | 8,18,pause spin,3651.1,2464800,1.0 236 | 9,18,pause spin,3642.3,2467600,1.0 237 | 10,18,pause spin,3535.1,2544800,1.0 238 | 11,18,pause spin,3661.6,2456800,1.0 239 | 12,18,pause spin,3598.9,2499400,1.0 240 | 13,18,pause spin,3719.1,2419800,1.0 241 | 14,18,pause spin,3594.7,2497200,1.0 242 | 0,17,yield spin,3462.0,2457200,1.0 243 | 1,17,yield spin,3473.5,2450800,1.0 244 | 2,17,yield spin,3469.6,2452600,1.0 245 | 3,17,yield spin,3465.2,2455000,1.0 246 | 4,17,yield spin,3467.7,2453800,1.0 247 | 5,17,yield spin,3467.3,2453200,1.0 248 | 6,17,yield spin,3467.6,2454200,1.0 249 | 7,17,yield spin,3465.1,2454800,1.0 250 | 8,17,yield spin,3462.5,2455800,1.0 251 | 9,17,yield spin,3470.9,2451000,1.0 252 | 10,17,yield spin,3473.2,2450200,1.0 253 | 11,17,yield spin,3470.4,2452200,1.0 254 | 12,17,yield spin,3472.4,2449400,1.0 255 | 13,17,yield spin,3472.6,2450000,1.0 256 | 14,17,yield spin,3462.3,2453600,1.0 257 | 0,18,yield spin,3783.5,2381600,1.0 258 | 1,18,yield spin,3790.4,2376800,1.0 259 | 2,18,yield spin,3783.8,2382800,1.0 260 | 3,18,yield spin,3782.4,2382200,1.0 261 | 4,18,yield spin,3715.3,2427400,1.0 262 | 5,18,yield spin,3675.5,2451600,1.0 263 | 6,18,yield spin,3674.0,2452800,1.0 264 | 7,18,yield spin,3674.6,2453000,1.0 265 | 8,18,yield spin,3680.4,2450600,1.0 266 | 9,18,yield spin,3677.2,2452200,1.0 267 | 10,18,yield spin,3669.5,2456800,1.0 268 | 11,18,yield spin,3672.2,2453000,1.0 269 | 12,18,yield spin,3680.3,2448400,1.0 270 | 13,18,yield spin,3675.1,2453600,1.0 271 | 14,18,yield spin,3663.1,2453600,1.0 272 | 0,17,ticket spin,1309310.1,61000,1.0 273 | 1,17,ticket spin,1124142.9,57800,1.0 274 | 2,17,ticket spin,1662684.5,61000,1.0 275 | 3,17,ticket spin,646044.1,61000,1.0 276 | 4,17,ticket spin,767537.9,61000,1.0 277 | 5,17,ticket spin,638262.6,57800,1.0 278 | 6,17,ticket spin,1680515.0,61000,1.0 279 | 7,17,ticket spin,1627009.3,61000,1.0 280 | 8,17,ticket spin,1378275.2,57800,1.0 281 | 9,17,ticket spin,1725113.7,61000,1.0 282 | 10,17,ticket spin,684276.8,57800,1.0 283 | 11,17,ticket spin,277112.4,57800,1.0 284 | 12,17,ticket spin,1377304.3,61000,1.0 285 | 13,17,ticket spin,460947.7,58000,1.0 286 | 14,17,ticket spin,392170.2,61000,1.0 287 | 0,18,ticket spin,2056636.9,45200,1.0 288 | 1,18,ticket spin,595100.2,61200,1.0 289 | 2,18,ticket spin,721316.2,45200,1.0 290 | 3,18,ticket spin,1363391.3,61200,1.0 291 | 4,18,ticket spin,1114246.3,61200,1.0 292 | 5,18,ticket spin,585946.3,61200,1.0 293 | 6,18,ticket spin,1823259.3,61200,1.0 294 | 7,18,ticket spin,493192.3,58200,1.0 295 | 8,18,ticket spin,631176.8,58000,1.0 296 | 9,18,ticket spin,1605543.6,61200,1.0 297 | 10,18,ticket spin,1178365.4,61200,1.0 298 | 11,18,ticket spin,441929.0,58000,1.0 299 | 12,18,ticket spin,312275.6,58000,1.0 300 | 13,18,ticket spin,529462.5,58200,1.0 301 | 14,18,ticket spin,425938.5,58000,1.0 302 | 0,17,ticket yield,10729.4,808000,1.0 303 | 1,17,ticket yield,10449.9,815000,1.0 304 | 2,17,ticket yield,10659.8,815200,1.0 305 | 3,17,ticket yield,10216.1,838800,1.0 306 | 4,17,ticket yield,10751.9,805000,1.0 307 | 5,17,ticket yield,10600.9,805800,1.0 308 | 6,17,ticket yield,10391.7,832000,1.0 309 | 7,17,ticket yield,10674.8,801800,1.0 310 | 8,17,ticket yield,10719.6,811600,1.0 311 | 9,17,ticket yield,10826.0,801200,1.0 312 | 10,17,ticket yield,10828.7,801200,1.0 313 | 11,17,ticket yield,10928.3,794400,1.0 314 | 12,17,ticket yield,10304.2,828600,1.0 315 | 13,17,ticket yield,10722.4,811600,1.0 316 | 14,17,ticket yield,10225.7,831800,1.0 317 | 0,18,ticket yield,13011.2,693800,1.0 318 | 1,18,ticket yield,12862.2,725600,1.0 319 | 2,18,ticket yield,13143.3,718800,1.0 320 | 3,18,ticket yield,12926.5,736000,1.0 321 | 4,18,ticket yield,13012.1,729000,1.0 322 | 5,18,ticket yield,13017.4,725800,1.0 323 | 6,18,ticket yield,12836.0,726000,1.0 324 | 7,18,ticket yield,12848.1,726000,1.0 325 | 8,18,ticket yield,12515.2,729400,1.0 326 | 9,18,ticket yield,12939.8,719000,1.0 327 | 10,18,ticket yield,13173.4,711600,1.0 328 | 11,18,ticket yield,12931.3,736000,1.0 329 | 12,18,ticket yield,12558.0,722000,1.0 330 | 13,18,ticket yield,12990.4,732800,1.0 331 | 14,18,ticket yield,12425.9,726000,1.0 332 | 0,17,ticket blocking,1314810.9,6800,1.0 333 | 1,17,ticket blocking,1230535.8,7000,1.0 334 | 2,17,ticket blocking,1248943.0,7000,1.0 335 | 3,17,ticket blocking,1228759.3,7000,1.0 336 | 4,17,ticket blocking,1342729.5,6800,1.0 337 | 5,17,ticket blocking,1314371.6,6800,1.0 338 | 6,17,ticket blocking,1302357.7,6800,1.0 339 | 7,17,ticket blocking,1316063.3,6800,1.0 340 | 8,17,ticket blocking,1310623.8,6800,1.0 341 | 9,17,ticket blocking,1306893.7,6800,1.0 342 | 10,17,ticket blocking,1329975.2,6800,1.0 343 | 11,17,ticket blocking,1282646.8,6800,1.0 344 | 12,17,ticket blocking,1303602.5,6800,1.0 345 | 13,17,ticket blocking,1245177.0,7000,1.0 346 | 14,17,ticket blocking,1249336.7,7000,1.0 347 | 0,18,ticket blocking,1483505.2,7200,1.0 348 | 1,18,ticket blocking,1501458.1,7200,1.0 349 | 2,18,ticket blocking,1493682.6,7200,1.0 350 | 3,18,ticket blocking,1476646.0,7200,1.0 351 | 4,18,ticket blocking,1437886.7,7200,1.0 352 | 5,18,ticket blocking,1534523.2,7200,1.0 353 | 6,18,ticket blocking,1494619.4,7200,1.0 354 | 7,18,ticket blocking,1476386.0,7200,1.0 355 | 8,18,ticket blocking,1431784.0,7200,1.0 356 | 9,18,ticket blocking,1445762.7,7200,1.0 357 | 10,18,ticket blocking,1483378.2,7200,1.0 358 | 11,18,ticket blocking,1433508.6,7200,1.0 359 | 12,18,ticket blocking,1501605.9,7200,1.0 360 | 13,18,ticket blocking,1482797.2,7200,1.0 361 | 14,18,ticket blocking,1402511.8,7200,1.0 362 | 0,17,queued fifo,229222.9,40800,1.0 363 | 1,17,queued fifo,230576.3,37400,1.0 364 | 2,17,queued fifo,218841.8,40800,1.0 365 | 3,17,queued fifo,224924.8,40800,1.0 366 | 4,17,queued fifo,226689.2,40800,1.0 367 | 5,17,queued fifo,220786.2,40800,1.0 368 | 6,17,queued fifo,228193.1,37400,1.0 369 | 7,17,queued fifo,223864.7,40800,1.0 370 | 8,17,queued fifo,228643.0,40800,1.0 371 | 9,17,queued fifo,230338.8,37400,1.0 372 | 10,17,queued fifo,227196.0,40800,1.0 373 | 11,17,queued fifo,226433.3,40800,1.0 374 | 12,17,queued fifo,220858.9,40800,1.0 375 | 13,17,queued fifo,220144.1,40800,1.0 376 | 14,17,queued fifo,221929.6,40800,1.0 377 | 0,18,queued fifo,247777.9,39600,1.0 378 | 1,18,queued fifo,249931.1,39600,1.0 379 | 2,18,queued fifo,237402.1,39600,1.0 380 | 3,18,queued fifo,252509.9,39600,1.0 381 | 4,18,queued fifo,243161.8,39600,1.0 382 | 5,18,queued fifo,244352.5,39600,1.0 383 | 6,18,queued fifo,236522.9,39600,1.0 384 | 7,18,queued fifo,245074.7,39600,1.0 385 | 8,18,queued fifo,247187.8,39600,1.0 386 | 9,18,queued fifo,244184.8,39600,1.0 387 | 10,18,queued fifo,240312.9,39600,1.0 388 | 11,18,queued fifo,242668.0,39600,1.0 389 | 12,18,queued fifo,244396.1,39600,1.0 390 | 13,18,queued fifo,237090.7,39600,1.0 391 | 14,18,queued fifo,234382.4,39600,1.0 392 | 0,17,mutex3,4725.4,1802600,1.0 393 | 1,17,mutex3,4742.6,1795600,1.0 394 | 2,17,mutex3,4730.0,1801200,1.0 395 | 3,17,mutex3,4717.9,1805200,1.0 396 | 4,17,mutex3,4730.5,1800200,1.0 397 | 5,17,mutex3,4731.8,1801000,1.0 398 | 6,17,mutex3,4732.9,1800400,1.0 399 | 7,17,mutex3,4737.1,1798600,1.0 400 | 8,17,mutex3,4740.6,1797400,1.0 401 | 9,17,mutex3,4739.9,1796800,1.0 402 | 10,17,mutex3,4734.0,1798800,1.0 403 | 11,17,mutex3,4736.8,1798000,1.0 404 | 12,17,mutex3,4723.7,1803800,1.0 405 | 13,17,mutex3,4751.3,1792600,1.0 406 | 14,17,mutex3,4738.8,1794600,1.0 407 | 0,18,mutex3,4961.5,1820600,1.0 408 | 1,18,mutex3,4934.0,1830600,1.0 409 | 2,18,mutex3,4947.0,1826000,1.0 410 | 3,18,mutex3,4955.4,1822000,1.0 411 | 4,18,mutex3,4941.4,1827200,1.0 412 | 5,18,mutex3,4938.3,1829200,1.0 413 | 6,18,mutex3,4941.3,1828800,1.0 414 | 7,18,mutex3,4946.2,1826000,1.0 415 | 8,18,mutex3,4933.1,1831200,1.0 416 | 9,18,mutex3,4952.3,1822200,1.0 417 | 10,18,mutex3,4951.6,1825000,1.0 418 | 11,18,mutex3,4937.7,1830600,1.0 419 | 12,18,mutex3,4941.7,1826800,1.0 420 | 13,18,mutex3,4965.4,1820000,1.0 421 | 14,18,mutex3,4935.1,1824600,1.0 422 | -------------------------------------------------------------------------------- /results/g2-16/data_slow.csv: -------------------------------------------------------------------------------- 1 | Trial,Cores,Implementation,Nanos/Op,Total,Rlen 2 | 0,17,plain add,40.3,211057800,1.0 3 | 1,17,plain add,40.2,211467600,1.0 4 | 2,17,plain add,40.2,211088400,1.0 5 | 3,17,plain add,40.2,211461400,1.0 6 | 4,17,plain add,40.2,211462400,1.0 7 | 5,17,plain add,40.2,211485200,1.0 8 | 6,17,plain add,40.2,211285000,1.0 9 | 7,17,plain add,40.2,211372600,1.0 10 | 8,17,plain add,40.2,211283400,1.0 11 | 9,17,plain add,40.2,211317400,1.0 12 | 10,17,plain add,40.1,211765400,1.0 13 | 11,17,plain add,40.1,211755400,1.0 14 | 12,17,plain add,40.2,211305200,1.0 15 | 13,17,plain add,40.2,211258800,1.0 16 | 14,17,plain add,40.1,211522400,1.0 17 | 0,18,plain add,42.6,211254800,1.0 18 | 1,18,plain add,42.6,211380000,1.0 19 | 2,18,plain add,42.6,211398600,1.0 20 | 3,18,plain add,42.6,211441800,1.0 21 | 4,18,plain add,42.6,211145000,1.0 22 | 5,18,plain add,42.5,211478800,1.0 23 | 6,18,plain add,42.6,211523400,1.0 24 | 7,18,plain add,42.6,211314800,1.0 25 | 8,18,plain add,42.5,211432600,1.0 26 | 9,18,plain add,42.5,211394400,1.0 27 | 10,18,plain add,42.5,211532800,1.0 28 | 11,18,plain add,42.6,211491800,1.0 29 | 12,18,plain add,42.6,211251800,1.0 30 | 13,18,plain add,42.6,211085400,1.0 31 | 14,18,plain add,42.5,211261000,1.0 32 | 0,17,tls add,3.6,2328209200,1.0 33 | 1,17,tls add,3.6,2328220200,1.0 34 | 2,17,tls add,3.6,2328451200,1.0 35 | 3,17,tls add,3.6,2327825600,1.0 36 | 4,17,tls add,3.6,2328381600,1.0 37 | 5,17,tls add,3.6,2328305400,1.0 38 | 6,17,tls add,3.6,2328865600,1.0 39 | 7,17,tls add,3.6,2328178000,1.0 40 | 8,17,tls add,3.6,2327876000,1.0 41 | 9,17,tls add,3.7,2328460000,1.0 42 | 10,17,tls add,3.6,2328352800,1.0 43 | 11,17,tls add,3.6,2328267400,1.0 44 | 12,17,tls add,3.6,2328383000,1.0 45 | 13,17,tls add,3.6,2328574000,1.0 46 | 14,17,tls add,3.6,2328611200,1.0 47 | 0,18,tls add,3.9,2328225800,1.0 48 | 1,18,tls add,3.9,2328126600,1.0 49 | 2,18,tls add,3.9,2328328200,1.0 50 | 3,18,tls add,3.9,2328370800,1.0 51 | 4,18,tls add,3.9,2328407000,1.0 52 | 5,18,tls add,3.8,2338031600,1.0 53 | 6,18,tls add,3.9,2328236400,1.0 54 | 7,18,tls add,3.9,2328028000,1.0 55 | 8,18,tls add,3.9,2327989600,1.0 56 | 9,18,tls add,3.9,2328693800,1.0 57 | 10,18,tls add,3.9,2328477400,1.0 58 | 11,18,tls add,3.9,2328555600,1.0 59 | 12,18,tls add,3.9,2328375200,1.0 60 | 13,18,tls add,3.9,2328561400,1.0 61 | 14,18,tls add,3.9,2328552800,1.0 62 | 0,17,atomic add,150.8,56337400,1.0 63 | 1,17,atomic add,150.8,56350200,1.0 64 | 2,17,atomic add,150.9,56288800,1.0 65 | 3,17,atomic add,150.8,56328200,1.0 66 | 4,17,atomic add,150.8,56365400,1.0 67 | 5,17,atomic add,150.8,56325600,1.0 68 | 6,17,atomic add,150.9,56284400,1.0 69 | 7,17,atomic add,151.0,56279400,1.0 70 | 8,17,atomic add,151.0,56274600,1.0 71 | 9,17,atomic add,150.8,56283000,1.0 72 | 10,17,atomic add,151.2,56223600,1.0 73 | 11,17,atomic add,150.8,56329400,1.0 74 | 12,17,atomic add,151.1,56261600,1.0 75 | 13,17,atomic add,151.0,56234400,1.0 76 | 14,17,atomic add,150.8,56244600,1.0 77 | 0,18,atomic add,160.0,56250800,1.0 78 | 1,18,atomic add,159.7,56316800,1.0 79 | 2,18,atomic add,159.7,56317400,1.0 80 | 3,18,atomic add,159.6,56278000,1.0 81 | 4,18,atomic add,159.6,56330200,1.0 82 | 5,18,atomic add,159.7,56248200,1.0 83 | 6,18,atomic add,159.6,56327200,1.0 84 | 7,18,atomic add,159.8,56330000,1.0 85 | 8,18,atomic add,159.7,56299200,1.0 86 | 9,18,atomic add,159.9,56261200,1.0 87 | 10,18,atomic add,159.9,56277000,1.0 88 | 11,18,atomic add,159.5,56290200,1.0 89 | 12,18,atomic add,159.7,56310800,1.0 90 | 13,18,atomic add,159.7,56264600,1.0 91 | 14,18,atomic add,159.3,56332200,1.0 92 | 0,17,cas add,569.3,14924000,1.0 93 | 1,17,cas add,570.0,14899200,1.0 94 | 2,17,cas add,569.9,14908000,1.0 95 | 3,17,cas add,571.3,14872400,1.0 96 | 4,17,cas add,570.4,14895200,1.0 97 | 5,17,cas add,570.0,14914200,1.0 98 | 6,17,cas add,569.4,14922000,1.0 99 | 7,17,cas add,570.3,14899600,1.0 100 | 8,17,cas add,569.1,14915400,1.0 101 | 9,17,cas add,569.5,14919400,1.0 102 | 10,17,cas add,570.2,14901000,1.0 103 | 11,17,cas add,569.6,14901600,1.0 104 | 12,17,cas add,569.8,14901000,1.0 105 | 13,17,cas add,568.7,14939600,1.0 106 | 14,17,cas add,569.9,14889000,1.0 107 | 0,18,cas add,604.1,14885400,1.0 108 | 1,18,cas add,603.3,14899600,1.0 109 | 2,18,cas add,604.7,14878400,1.0 110 | 3,18,cas add,603.0,14907600,1.0 111 | 4,18,cas add,602.2,14926200,1.0 112 | 5,18,cas add,604.7,14889800,1.0 113 | 6,18,cas add,603.0,14905600,1.0 114 | 7,18,cas add,603.7,14881600,1.0 115 | 8,18,cas add,603.6,14892400,1.0 116 | 9,18,cas add,603.8,14899600,1.0 117 | 10,18,cas add,603.6,14910200,1.0 118 | 11,18,cas add,602.5,14926200,1.0 119 | 12,18,cas add,604.2,14889800,1.0 120 | 13,18,cas add,603.0,14920400,1.0 121 | 14,18,cas add,601.5,14918400,1.0 122 | 0,17,cas multi,9.3,916923800,1.0 123 | 1,17,cas multi,9.3,916915000,1.0 124 | 2,17,cas multi,9.3,916962400,1.0 125 | 3,17,cas multi,9.3,916908400,1.0 126 | 4,17,cas multi,9.3,916959400,1.0 127 | 5,17,cas multi,9.3,916906400,1.0 128 | 6,17,cas multi,9.3,916957600,1.0 129 | 7,17,cas multi,9.3,916901800,1.0 130 | 8,17,cas multi,9.3,916949600,1.0 131 | 9,17,cas multi,9.3,916909200,1.0 132 | 10,17,cas multi,9.3,916955200,1.0 133 | 11,17,cas multi,9.3,916910000,1.0 134 | 12,17,cas multi,9.3,916950800,1.0 135 | 13,17,cas multi,9.3,916915600,1.0 136 | 14,17,cas multi,9.3,916960800,1.0 137 | 0,18,cas multi,9.8,916913800,1.0 138 | 1,18,cas multi,9.8,916896600,1.0 139 | 2,18,cas multi,9.8,916958000,1.0 140 | 3,18,cas multi,9.8,916900800,1.0 141 | 4,18,cas multi,9.8,916965600,1.0 142 | 5,18,cas multi,9.8,916887800,1.0 143 | 6,18,cas multi,9.8,916955600,1.0 144 | 7,18,cas multi,9.8,916896400,1.0 145 | 8,18,cas multi,9.8,916966600,1.0 146 | 9,18,cas multi,9.8,916901800,1.0 147 | 10,18,cas multi,9.8,916968600,1.0 148 | 11,18,cas multi,9.8,916892400,1.0 149 | 12,18,cas multi,9.8,916968000,1.0 150 | 13,18,cas multi,9.8,915823000,1.0 151 | 14,18,cas multi,9.8,916842000,1.0 152 | 0,17,mutex add,2292.3,3713800,1.0 153 | 1,17,mutex add,2298.6,3705600,1.0 154 | 2,17,mutex add,2291.3,3717400,1.0 155 | 3,17,mutex add,2296.2,3707800,1.0 156 | 4,17,mutex add,2295.4,3711200,1.0 157 | 5,17,mutex add,2298.4,3706000,1.0 158 | 6,17,mutex add,2291.8,3714800,1.0 159 | 7,17,mutex add,2294.9,3710000,1.0 160 | 8,17,mutex add,2299.9,3702000,1.0 161 | 9,17,mutex add,2295.7,3710600,1.0 162 | 10,17,mutex add,2294.7,3710200,1.0 163 | 11,17,mutex add,2295.8,3708200,1.0 164 | 12,17,mutex add,2292.9,3713000,1.0 165 | 13,17,mutex add,2295.4,3710800,1.0 166 | 14,17,mutex add,2289.3,3714400,1.0 167 | 0,18,mutex add,2425.3,3723200,1.0 168 | 1,18,mutex add,2423.5,3724200,1.0 169 | 2,18,mutex add,2426.5,3721600,1.0 170 | 3,18,mutex add,2428.6,3716600,1.0 171 | 4,18,mutex add,2433.9,3710400,1.0 172 | 5,18,mutex add,2427.7,3717800,1.0 173 | 6,18,mutex add,2423.7,3722800,1.0 174 | 7,18,mutex add,2426.8,3721000,1.0 175 | 8,18,mutex add,2427.2,3720600,1.0 176 | 9,18,mutex add,2428.1,3718600,1.0 177 | 10,18,mutex add,2425.8,3720800,1.0 178 | 11,18,mutex add,2428.6,3718200,1.0 179 | 12,18,mutex add,2429.9,3717800,1.0 180 | 13,18,mutex add,2428.5,3718400,1.0 181 | 14,18,mutex add,2428.3,3707800,1.0 182 | 0,17,pure spin,4386.7,1937800,1.0 183 | 1,17,pure spin,4995.0,1716200,1.0 184 | 2,17,pure spin,4474.6,1898800,1.0 185 | 3,17,pure spin,4898.9,1734400,1.0 186 | 4,17,pure spin,5141.8,1652400,1.0 187 | 5,17,pure spin,5196.6,1635000,1.0 188 | 6,17,pure spin,5044.8,1684200,1.0 189 | 7,17,pure spin,4157.6,2044600,1.0 190 | 8,17,pure spin,4356.6,1950200,1.0 191 | 9,17,pure spin,4353.0,1952800,1.0 192 | 10,17,pure spin,4579.4,1855400,1.0 193 | 11,17,pure spin,4351.9,1954200,1.0 194 | 12,17,pure spin,4340.6,1957400,1.0 195 | 13,17,pure spin,4619.7,1840000,1.0 196 | 14,17,pure spin,4429.1,1917600,1.0 197 | 0,18,pure spin,5241.6,1717200,1.0 198 | 1,18,pure spin,5510.9,1633200,1.0 199 | 2,18,pure spin,4928.2,1886400,1.0 200 | 3,18,pure spin,4639.6,1937400,1.0 201 | 4,18,pure spin,5613.9,1600600,1.0 202 | 5,18,pure spin,4776.0,1885400,1.0 203 | 6,18,pure spin,5859.9,1536600,1.0 204 | 7,18,pure spin,4901.7,1834600,1.0 205 | 8,18,pure spin,5140.2,1751000,1.0 206 | 9,18,pure spin,5623.1,1599200,1.0 207 | 10,18,pure spin,5502.8,1634200,1.0 208 | 11,18,pure spin,5514.8,1634200,1.0 209 | 12,18,pure spin,4738.6,1896800,1.0 210 | 13,18,pure spin,5281.8,1702600,1.0 211 | 14,18,pure spin,4757.6,1888600,1.0 212 | 0,17,pause spin,4150.4,2049200,1.0 213 | 1,17,pause spin,4150.7,2048000,1.0 214 | 2,17,pause spin,4166.1,2039400,1.0 215 | 3,17,pause spin,4170.3,2038400,1.0 216 | 4,17,pause spin,4166.4,2039400,1.0 217 | 5,17,pause spin,4166.6,2041200,1.0 218 | 6,17,pause spin,4158.7,2042200,1.0 219 | 7,17,pause spin,4158.7,2043000,1.0 220 | 8,17,pause spin,4161.6,2042600,1.0 221 | 9,17,pause spin,4165.5,2041800,1.0 222 | 10,17,pause spin,4158.0,2043400,1.0 223 | 11,17,pause spin,4166.8,2042000,1.0 224 | 12,17,pause spin,4171.1,2039000,1.0 225 | 13,17,pause spin,4160.4,2042200,1.0 226 | 14,17,pause spin,4168.4,2037600,1.0 227 | 0,18,pause spin,4405.4,2042200,1.0 228 | 1,18,pause spin,4321.8,2098200,1.0 229 | 2,18,pause spin,4196.8,2156000,1.0 230 | 3,18,pause spin,4203.6,2146200,1.0 231 | 4,18,pause spin,4646.5,1951000,1.0 232 | 5,18,pause spin,4741.5,1894800,1.0 233 | 6,18,pause spin,4812.6,1872800,1.0 234 | 7,18,pause spin,4540.1,1981600,1.0 235 | 8,18,pause spin,4331.3,2075200,1.0 236 | 9,18,pause spin,4416.8,2035800,1.0 237 | 10,18,pause spin,4416.7,2036000,1.0 238 | 11,18,pause spin,4586.4,1962800,1.0 239 | 12,18,pause spin,4426.3,2034400,1.0 240 | 13,18,pause spin,4416.4,2038000,1.0 241 | 14,18,pause spin,4407.9,2038000,1.0 242 | 0,17,yield spin,1700.6,4983200,1.0 243 | 1,17,yield spin,2557.1,3309600,1.0 244 | 2,17,yield spin,1989.0,4280000,1.0 245 | 3,17,yield spin,2718.0,3133800,1.0 246 | 4,17,yield spin,2699.0,3147000,1.0 247 | 5,17,yield spin,2794.5,3048400,1.0 248 | 6,17,yield spin,2825.1,3021600,1.0 249 | 7,17,yield spin,2300.4,3700200,1.0 250 | 8,17,yield spin,2771.1,3071200,1.0 251 | 9,17,yield spin,2839.5,2997000,1.0 252 | 10,17,yield spin,2864.5,2969000,1.0 253 | 11,17,yield spin,2870.9,2963400,1.0 254 | 12,17,yield spin,2869.3,2964600,1.0 255 | 13,17,yield spin,2865.3,2969000,1.0 256 | 14,17,yield spin,2861.3,2968800,1.0 257 | 0,18,yield spin,3040.3,2964000,1.0 258 | 1,18,yield spin,3038.8,2964600,1.0 259 | 2,18,yield spin,3048.0,2958800,1.0 260 | 3,18,yield spin,3048.9,2958000,1.0 261 | 4,18,yield spin,3045.7,2960800,1.0 262 | 5,18,yield spin,3044.1,2961200,1.0 263 | 6,18,yield spin,3044.2,2961400,1.0 264 | 7,18,yield spin,3043.7,2960400,1.0 265 | 8,18,yield spin,3039.3,2963800,1.0 266 | 9,18,yield spin,3042.6,2961600,1.0 267 | 10,18,yield spin,3041.9,2960000,1.0 268 | 11,18,yield spin,3040.4,2962600,1.0 269 | 12,18,yield spin,3046.3,2959400,1.0 270 | 13,18,yield spin,3040.2,2960600,1.0 271 | 14,18,yield spin,3041.3,2956600,1.0 272 | 0,17,ticket spin,2965472.4,22600,1.0 273 | 1,17,ticket spin,2821065.3,22600,1.0 274 | 2,17,ticket spin,628293.1,48200,1.0 275 | 3,17,ticket spin,2320277.4,29000,1.0 276 | 4,17,ticket spin,2003706.3,29000,1.0 277 | 5,17,ticket spin,1619194.6,29000,1.0 278 | 6,17,ticket spin,2845300.6,22600,1.0 279 | 7,17,ticket spin,1931023.7,29000,1.0 280 | 8,17,ticket spin,1855982.0,29000,1.0 281 | 9,17,ticket spin,1863039.5,29000,1.0 282 | 10,17,ticket spin,1975583.7,29000,1.0 283 | 11,17,ticket spin,1846624.5,29000,1.0 284 | 12,17,ticket spin,1874753.3,29000,1.0 285 | 13,17,ticket spin,1827860.8,29000,1.0 286 | 14,17,ticket spin,2981256.3,22600,1.0 287 | 0,18,ticket spin,10209935.9,10000,1.0 288 | 1,18,ticket spin,2443831.4,29200,1.0 289 | 2,18,ticket spin,3383663.9,22800,1.0 290 | 3,18,ticket spin,5172176.1,16400,1.0 291 | 4,18,ticket spin,5254132.2,16400,1.0 292 | 5,18,ticket spin,1967938.1,29200,1.0 293 | 6,18,ticket spin,3301206.2,22800,1.0 294 | 7,18,ticket spin,5375595.4,16400,1.0 295 | 8,18,ticket spin,5206809.7,16400,1.0 296 | 9,18,ticket spin,5220961.0,16400,1.0 297 | 10,18,ticket spin,2029033.6,29200,1.0 298 | 11,18,ticket spin,3312082.8,22800,1.0 299 | 12,18,ticket spin,2176427.3,29200,1.0 300 | 13,18,ticket spin,5338999.1,16400,1.0 301 | 14,18,ticket spin,5226841.3,16400,1.0 302 | 0,17,ticket yield,9609.1,886200,1.0 303 | 1,17,ticket yield,9634.2,883600,1.0 304 | 2,17,ticket yield,9618.6,910200,1.0 305 | 3,17,ticket yield,9399.9,907000,1.0 306 | 4,17,ticket yield,9548.0,910200,1.0 307 | 5,17,ticket yield,9510.7,913200,1.0 308 | 6,17,ticket yield,9395.6,907000,1.0 309 | 7,17,ticket yield,9348.0,910600,1.0 310 | 8,17,ticket yield,9552.0,909800,1.0 311 | 9,17,ticket yield,9513.2,913200,1.0 312 | 10,17,ticket yield,9296.1,917000,1.0 313 | 11,17,ticket yield,9623.0,910400,1.0 314 | 12,17,ticket yield,9366.2,910200,1.0 315 | 13,17,ticket yield,9361.9,910000,1.0 316 | 14,17,ticket yield,9177.8,926800,1.0 317 | 0,18,ticket yield,10730.5,866800,1.0 318 | 1,18,ticket yield,10319.3,898800,1.0 319 | 2,18,ticket yield,10664.5,884200,1.0 320 | 3,18,ticket yield,10725.9,887600,1.0 321 | 4,18,ticket yield,10101.2,895400,1.0 322 | 5,18,ticket yield,10485.3,887200,1.0 323 | 6,18,ticket yield,10527.5,869600,1.0 324 | 7,18,ticket yield,10500.1,884400,1.0 325 | 8,18,ticket yield,11036.0,862800,1.0 326 | 9,18,ticket yield,10599.4,891600,1.0 327 | 10,18,ticket yield,10503.0,880600,1.0 328 | 11,18,ticket yield,10394.2,884600,1.0 329 | 12,18,ticket yield,10261.8,881000,1.0 330 | 13,18,ticket yield,10262.4,898800,1.0 331 | 14,18,ticket yield,10159.3,888000,1.0 332 | 0,17,ticket blocking,589540.6,17000,1.0 333 | 1,17,ticket blocking,579446.1,17000,1.0 334 | 2,17,ticket blocking,586682.1,17000,1.0 335 | 3,17,ticket blocking,589443.6,17000,1.0 336 | 4,17,ticket blocking,586999.3,17000,1.0 337 | 5,17,ticket blocking,578991.7,17000,1.0 338 | 6,17,ticket blocking,598348.1,17000,1.0 339 | 7,17,ticket blocking,593431.0,17000,1.0 340 | 8,17,ticket blocking,590078.9,17000,1.0 341 | 9,17,ticket blocking,579197.0,17000,1.0 342 | 10,17,ticket blocking,586256.3,17000,1.0 343 | 11,17,ticket blocking,574964.0,17000,1.0 344 | 12,17,ticket blocking,582068.7,17000,1.0 345 | 13,17,ticket blocking,594566.2,17000,1.0 346 | 14,17,ticket blocking,581051.0,17000,1.0 347 | 0,18,ticket blocking,645870.9,14600,1.0 348 | 1,18,ticket blocking,666466.6,14400,1.0 349 | 2,18,ticket blocking,673800.7,14400,1.0 350 | 3,18,ticket blocking,673027.4,14400,1.0 351 | 4,18,ticket blocking,633917.0,14600,1.0 352 | 5,18,ticket blocking,662499.6,14400,1.0 353 | 6,18,ticket blocking,674440.8,14400,1.0 354 | 7,18,ticket blocking,663060.6,14400,1.0 355 | 8,18,ticket blocking,638565.7,14600,1.0 356 | 9,18,ticket blocking,625995.1,14600,1.0 357 | 10,18,ticket blocking,677253.4,14400,1.0 358 | 11,18,ticket blocking,676495.2,14400,1.0 359 | 12,18,ticket blocking,686041.7,14400,1.0 360 | 13,18,ticket blocking,650377.2,14400,1.0 361 | 14,18,ticket blocking,646787.0,14400,1.0 362 | 0,17,queued fifo,85262.8,102000,1.0 363 | 1,17,queued fifo,85798.4,101400,1.0 364 | 2,17,queued fifo,87303.2,101800,1.0 365 | 3,17,queued fifo,85930.2,101400,1.0 366 | 4,17,queued fifo,87357.2,102000,1.0 367 | 5,17,queued fifo,86526.6,102000,1.0 368 | 6,17,queued fifo,87123.6,102000,1.0 369 | 7,17,queued fifo,87217.1,102000,1.0 370 | 8,17,queued fifo,83584.0,102200,1.0 371 | 9,17,queued fifo,85255.1,102000,1.0 372 | 10,17,queued fifo,85781.4,102000,1.0 373 | 11,17,queued fifo,86022.5,100800,1.0 374 | 12,17,queued fifo,84304.0,104600,1.0 375 | 13,17,queued fifo,87777.2,101600,1.0 376 | 14,17,queued fifo,85446.2,102000,1.0 377 | 0,18,queued fifo,93369.8,100800,1.0 378 | 1,18,queued fifo,92700.6,100800,1.0 379 | 2,18,queued fifo,91964.7,100800,1.0 380 | 3,18,queued fifo,91038.5,100800,1.0 381 | 4,18,queued fifo,92162.0,100800,1.0 382 | 5,18,queued fifo,91593.6,104400,1.0 383 | 6,18,queued fifo,90166.3,100800,1.0 384 | 7,18,queued fifo,92859.9,100800,1.0 385 | 8,18,queued fifo,91166.5,100800,1.0 386 | 9,18,queued fifo,93257.0,100800,1.0 387 | 10,18,queued fifo,91694.0,100800,1.0 388 | 11,18,queued fifo,93968.7,100800,1.0 389 | 12,18,queued fifo,92254.9,100800,1.0 390 | 13,18,queued fifo,91119.8,100800,1.0 391 | 14,18,queued fifo,88002.7,104400,1.0 392 | 0,17,mutex3,2114.1,4028800,1.0 393 | 1,17,mutex3,2108.4,4040400,1.0 394 | 2,17,mutex3,2111.3,4033200,1.0 395 | 3,17,mutex3,2106.4,4042400,1.0 396 | 4,17,mutex3,2113.4,4028400,1.0 397 | 5,17,mutex3,2118.4,4019600,1.0 398 | 6,17,mutex3,2109.5,4036600,1.0 399 | 7,17,mutex3,2112.1,4031000,1.0 400 | 8,17,mutex3,2111.3,4034400,1.0 401 | 9,17,mutex3,2106.1,4044400,1.0 402 | 10,17,mutex3,2102.1,4050600,1.0 403 | 11,17,mutex3,2113.0,4031000,1.0 404 | 12,17,mutex3,2112.3,4030400,1.0 405 | 13,17,mutex3,2106.9,4041400,1.0 406 | 14,17,mutex3,2113.7,4023000,1.0 407 | 0,18,mutex3,2234.5,4041400,1.0 408 | 1,18,mutex3,2223.0,4062600,1.0 409 | 2,18,mutex3,2224.5,4057200,1.0 410 | 3,18,mutex3,2227.1,4053000,1.0 411 | 4,18,mutex3,2227.6,4054000,1.0 412 | 5,18,mutex3,2223.6,4063000,1.0 413 | 6,18,mutex3,2227.1,4052400,1.0 414 | 7,18,mutex3,2217.9,4069600,1.0 415 | 8,18,mutex3,2230.1,4049600,1.0 416 | 9,18,mutex3,2226.8,4053000,1.0 417 | 10,18,mutex3,2231.1,4047400,1.0 418 | 11,18,mutex3,2223.5,4061200,1.0 419 | 12,18,mutex3,2236.1,4040600,1.0 420 | 13,18,mutex3,2223.1,4060000,1.0 421 | 14,18,mutex3,2223.4,4049800,1.0 422 | -------------------------------------------------------------------------------- /results/icl/data_slow.csv: -------------------------------------------------------------------------------- 1 | Trial,Cores,Implementation,Nanos/Op,Total,Rlen 2 | 0,5,plain add,3.1,799141000,1.0 3 | 1,5,plain add,3.1,799338200,1.0 4 | 2,5,plain add,3.1,799887000,1.0 5 | 3,5,plain add,3.1,799644200,1.0 6 | 4,5,plain add,3.1,799483000,1.0 7 | 5,5,plain add,3.1,799187600,1.0 8 | 6,5,plain add,3.1,799137200,1.0 9 | 7,5,plain add,3.1,798842200,1.0 10 | 8,5,plain add,3.1,799122800,1.0 11 | 9,5,plain add,3.1,799215000,1.0 12 | 10,5,plain add,3.1,799548400,1.0 13 | 11,5,plain add,3.1,799383800,1.0 14 | 12,5,plain add,3.1,799017000,1.0 15 | 13,5,plain add,3.1,798522400,1.0 16 | 14,5,plain add,3.1,798092800,1.0 17 | 0,6,plain add,3.8,798516200,1.0 18 | 1,6,plain add,3.8,798730400,1.0 19 | 2,6,plain add,3.7,798822200,1.0 20 | 3,6,plain add,3.8,798009400,1.0 21 | 4,6,plain add,3.8,797446600,1.0 22 | 5,6,plain add,3.8,797567600,1.0 23 | 6,6,plain add,3.8,797278800,1.0 24 | 7,6,plain add,3.8,797426600,1.0 25 | 8,6,plain add,3.7,795932200,1.0 26 | 9,6,plain add,3.8,797179800,1.0 27 | 10,6,plain add,3.8,791025000,1.0 28 | 11,6,plain add,3.8,792700800,1.0 29 | 12,6,plain add,3.8,791907600,1.0 30 | 13,6,plain add,3.8,791289400,1.0 31 | 14,6,plain add,3.8,788820800,1.0 32 | 0,5,tls add,2.2,1154084400,1.0 33 | 1,5,tls add,2.2,1148256000,1.0 34 | 2,5,tls add,2.2,1156987200,1.0 35 | 3,5,tls add,2.2,1155530600,1.0 36 | 4,5,tls add,2.2,1156300800,1.0 37 | 5,5,tls add,2.2,1150846400,1.0 38 | 6,5,tls add,2.2,1155870600,1.0 39 | 7,5,tls add,2.2,1151910400,1.0 40 | 8,5,tls add,2.2,1149302400,1.0 41 | 9,5,tls add,2.2,1149134600,1.0 42 | 10,5,tls add,2.2,1146085200,1.0 43 | 11,5,tls add,2.2,1148589800,1.0 44 | 12,5,tls add,2.2,1133443000,1.0 45 | 13,5,tls add,2.2,1144942000,1.0 46 | 14,5,tls add,2.2,1140821200,1.0 47 | 0,6,tls add,2.6,1136925200,1.0 48 | 1,6,tls add,2.6,1131783000,1.0 49 | 2,6,tls add,2.6,1131185800,1.0 50 | 3,6,tls add,2.7,1127877600,1.0 51 | 4,6,tls add,2.6,1135009400,1.0 52 | 5,6,tls add,2.6,1129887800,1.0 53 | 6,6,tls add,2.7,1124557000,1.0 54 | 7,6,tls add,2.7,1124092000,1.0 55 | 8,6,tls add,2.7,1122746400,1.0 56 | 9,6,tls add,2.9,1015518200,1.0 57 | 10,6,tls add,3.0,1004172200,1.0 58 | 11,6,tls add,2.9,1015962000,1.0 59 | 12,6,tls add,2.9,1018707600,1.0 60 | 13,6,tls add,2.9,1024240600,1.0 61 | 14,6,tls add,2.9,1013338200,1.0 62 | 0,5,atomic add,80.4,30997400,1.0 63 | 1,5,atomic add,80.3,31087800,1.0 64 | 2,5,atomic add,80.4,31089000,1.0 65 | 3,5,atomic add,80.2,31123400,1.0 66 | 4,5,atomic add,80.4,31106000,1.0 67 | 5,5,atomic add,80.1,31122800,1.0 68 | 6,5,atomic add,80.2,31156600,1.0 69 | 7,5,atomic add,80.1,31101200,1.0 70 | 8,5,atomic add,80.2,31184000,1.0 71 | 9,5,atomic add,80.1,31148200,1.0 72 | 10,5,atomic add,79.9,31200600,1.0 73 | 11,5,atomic add,79.7,31274000,1.0 74 | 12,5,atomic add,79.8,31238200,1.0 75 | 13,5,atomic add,80.0,31166600,1.0 76 | 14,5,atomic add,79.4,31319600,1.0 77 | 0,6,atomic add,96.2,31150400,1.0 78 | 1,6,atomic add,96.1,31175600,1.0 79 | 2,6,atomic add,96.2,31218200,1.0 80 | 3,6,atomic add,95.9,31241400,1.0 81 | 4,6,atomic add,96.0,31234600,1.0 82 | 5,6,atomic add,95.9,31212400,1.0 83 | 6,6,atomic add,95.7,31362400,1.0 84 | 7,6,atomic add,96.0,31245000,1.0 85 | 8,6,atomic add,95.7,31258000,1.0 86 | 9,6,atomic add,95.8,31272800,1.0 87 | 10,6,atomic add,95.7,31334400,1.0 88 | 11,6,atomic add,95.9,31244600,1.0 89 | 12,6,atomic add,96.0,31213000,1.0 90 | 13,6,atomic add,95.7,31270000,1.0 91 | 14,6,atomic add,95.3,31270000,1.0 92 | 0,5,cas add,286.8,8717200,1.0 93 | 1,5,cas add,285.1,8754000,1.0 94 | 2,5,cas add,285.0,8744800,1.0 95 | 3,5,cas add,285.8,8720600,1.0 96 | 4,5,cas add,287.1,8707400,1.0 97 | 5,5,cas add,285.2,8752200,1.0 98 | 6,5,cas add,285.9,8729000,1.0 99 | 7,5,cas add,284.9,8747000,1.0 100 | 8,5,cas add,286.0,8714600,1.0 101 | 9,5,cas add,285.5,8729600,1.0 102 | 10,5,cas add,285.5,8730200,1.0 103 | 11,5,cas add,285.3,8735400,1.0 104 | 12,5,cas add,285.5,8729000,1.0 105 | 13,5,cas add,285.5,8728000,1.0 106 | 14,5,cas add,285.1,8727800,1.0 107 | 0,6,cas add,341.3,8755200,1.0 108 | 1,6,cas add,346.0,8670400,1.0 109 | 2,6,cas add,342.5,8748000,1.0 110 | 3,6,cas add,346.8,8639200,1.0 111 | 4,6,cas add,341.7,8778600,1.0 112 | 5,6,cas add,325.1,9215200,1.0 113 | 6,6,cas add,328.7,9140400,1.0 114 | 7,6,cas add,327.3,9153600,1.0 115 | 8,6,cas add,323.9,9262200,1.0 116 | 9,6,cas add,324.5,9244400,1.0 117 | 10,6,cas add,333.0,8960600,1.0 118 | 11,6,cas add,341.2,8769800,1.0 119 | 12,6,cas add,341.9,8774800,1.0 120 | 13,6,cas add,336.3,8873000,1.0 121 | 14,6,cas add,339.3,8772800,1.0 122 | 0,5,cas multi,14.9,166772000,1.0 123 | 1,5,cas multi,15.0,166438000,1.0 124 | 2,5,cas multi,15.0,166223800,1.0 125 | 3,5,cas multi,15.0,166576000,1.0 126 | 4,5,cas multi,15.0,166846600,1.0 127 | 5,5,cas multi,15.0,166149800,1.0 128 | 6,5,cas multi,15.0,166633200,1.0 129 | 7,5,cas multi,15.0,166466600,1.0 130 | 8,5,cas multi,15.0,166341600,1.0 131 | 9,5,cas multi,15.0,166312400,1.0 132 | 10,5,cas multi,15.1,166071800,1.0 133 | 11,5,cas multi,15.0,166063800,1.0 134 | 12,5,cas multi,15.0,166838000,1.0 135 | 13,5,cas multi,15.1,166061000,1.0 136 | 14,5,cas multi,15.0,166075400,1.0 137 | 0,6,cas multi,18.0,166051000,1.0 138 | 1,6,cas multi,18.1,166423800,1.0 139 | 2,6,cas multi,18.0,166199000,1.0 140 | 3,6,cas multi,18.0,166089600,1.0 141 | 4,6,cas multi,18.0,166742800,1.0 142 | 5,6,cas multi,18.1,166074200,1.0 143 | 6,6,cas multi,18.0,166082000,1.0 144 | 7,6,cas multi,18.0,166347600,1.0 145 | 8,6,cas multi,18.0,166320600,1.0 146 | 9,6,cas multi,18.0,166136800,1.0 147 | 10,6,cas multi,17.9,166930800,1.0 148 | 11,6,cas multi,18.0,166150800,1.0 149 | 12,6,cas multi,18.0,166094000,1.0 150 | 13,6,cas multi,17.9,166897000,1.0 151 | 14,6,cas multi,17.9,166094600,1.0 152 | 0,5,mutex add,889.8,2823200,1.0 153 | 1,5,mutex add,879.0,2857800,1.0 154 | 2,5,mutex add,894.6,2808200,1.0 155 | 3,5,mutex add,883.3,2843800,1.0 156 | 4,5,mutex add,888.3,2827800,1.0 157 | 5,5,mutex add,880.9,2851800,1.0 158 | 6,5,mutex add,897.4,2799200,1.0 159 | 7,5,mutex add,892.0,2816400,1.0 160 | 8,5,mutex add,880.6,2848200,1.0 161 | 9,5,mutex add,897.7,2798400,1.0 162 | 10,5,mutex add,898.4,2796200,1.0 163 | 11,5,mutex add,884.9,2834400,1.0 164 | 12,5,mutex add,897.0,2800800,1.0 165 | 13,5,mutex add,883.2,2844400,1.0 166 | 14,5,mutex add,890.4,2807800,1.0 167 | 0,6,mutex add,1039.8,2904400,1.0 168 | 1,6,mutex add,1039.7,2904600,1.0 169 | 2,6,mutex add,1035.6,2916200,1.0 170 | 3,6,mutex add,1040.2,2907400,1.0 171 | 4,6,mutex add,875.6,3449400,1.0 172 | 5,6,mutex add,1036.9,2909000,1.0 173 | 6,6,mutex add,1037.1,2915800,1.0 174 | 7,6,mutex add,1039.8,2908200,1.0 175 | 8,6,mutex add,1034.9,2918400,1.0 176 | 9,6,mutex add,1038.3,2908600,1.0 177 | 10,6,mutex add,1042.6,2896600,1.0 178 | 11,6,mutex add,1035.0,2918000,1.0 179 | 12,6,mutex add,1033.6,2918400,1.0 180 | 13,6,mutex add,1035.1,2917600,1.0 181 | 14,6,mutex add,1037.5,2891800,1.0 182 | 0,5,pure spin,563.5,4429800,1.0 183 | 1,5,pure spin,550.8,4524200,1.0 184 | 2,5,pure spin,566.6,4405600,1.0 185 | 3,5,pure spin,605.6,4115200,1.0 186 | 4,5,pure spin,776.6,3214200,1.0 187 | 5,5,pure spin,569.3,4377800,1.0 188 | 6,5,pure spin,872.9,2864000,1.0 189 | 7,5,pure spin,562.4,4452400,1.0 190 | 8,5,pure spin,738.5,3374800,1.0 191 | 9,5,pure spin,728.8,3458200,1.0 192 | 10,5,pure spin,755.0,3311600,1.0 193 | 11,5,pure spin,691.9,3601600,1.0 194 | 12,5,pure spin,747.7,3338200,1.0 195 | 13,5,pure spin,882.2,2825000,1.0 196 | 14,5,pure spin,777.7,3199800,1.0 197 | 0,6,pure spin,1125.8,2664800,1.0 198 | 1,6,pure spin,1187.5,2513000,1.0 199 | 2,6,pure spin,1106.4,2700800,1.0 200 | 3,6,pure spin,1184.6,2532600,1.0 201 | 4,6,pure spin,1161.3,2649000,1.0 202 | 5,6,pure spin,1180.3,2531600,1.0 203 | 6,6,pure spin,1193.4,2510600,1.0 204 | 7,6,pure spin,1083.1,2766200,1.0 205 | 8,6,pure spin,1180.3,2535000,1.0 206 | 9,6,pure spin,1150.1,2633000,1.0 207 | 10,6,pure spin,1213.2,2525600,1.0 208 | 11,6,pure spin,1100.8,2725400,1.0 209 | 12,6,pure spin,1096.8,2739000,1.0 210 | 13,6,pure spin,954.1,3140400,1.0 211 | 14,6,pure spin,1019.6,2927000,1.0 212 | 0,5,pause spin,596.7,4250400,1.0 213 | 1,5,pause spin,586.1,4266000,1.0 214 | 2,5,pause spin,634.6,3939800,1.0 215 | 3,5,pause spin,617.5,4081400,1.0 216 | 4,5,pause spin,599.1,4160000,1.0 217 | 5,5,pause spin,549.6,4658000,1.0 218 | 6,5,pause spin,524.6,4750400,1.0 219 | 7,5,pause spin,634.0,3943200,1.0 220 | 8,5,pause spin,601.6,4142600,1.0 221 | 9,5,pause spin,560.6,4452400,1.0 222 | 10,5,pause spin,536.9,4767800,1.0 223 | 11,5,pause spin,598.3,4165200,1.0 224 | 12,5,pause spin,598.8,4161600,1.0 225 | 13,5,pause spin,595.0,4188200,1.0 226 | 14,5,pause spin,628.4,3991200,1.0 227 | 0,6,pause spin,761.0,3973600,1.0 228 | 1,6,pause spin,762.7,3917800,1.0 229 | 2,6,pause spin,880.5,3434600,1.0 230 | 3,6,pause spin,717.4,4165400,1.0 231 | 4,6,pause spin,905.4,3309200,1.0 232 | 5,6,pause spin,868.4,3445600,1.0 233 | 6,6,pause spin,835.6,3619200,1.0 234 | 7,6,pause spin,842.4,3599600,1.0 235 | 8,6,pause spin,783.8,3807400,1.0 236 | 9,6,pause spin,814.9,3661800,1.0 237 | 10,6,pause spin,760.0,3937000,1.0 238 | 11,6,pause spin,819.4,3661400,1.0 239 | 12,6,pause spin,836.7,3566400,1.0 240 | 13,6,pause spin,846.7,3567000,1.0 241 | 14,6,pause spin,759.9,3921600,1.0 242 | 0,5,yield spin,263.4,9495600,1.0 243 | 1,5,yield spin,262.8,9517000,1.0 244 | 2,5,yield spin,263.6,9503000,1.0 245 | 3,5,yield spin,263.2,9521600,1.0 246 | 4,5,yield spin,262.3,9553800,1.0 247 | 5,5,yield spin,262.9,9515600,1.0 248 | 6,5,yield spin,262.3,9537400,1.0 249 | 7,5,yield spin,262.9,9515800,1.0 250 | 8,5,yield spin,262.7,9522800,1.0 251 | 9,5,yield spin,263.5,9503800,1.0 252 | 10,5,yield spin,262.8,9532800,1.0 253 | 11,5,yield spin,262.9,9531400,1.0 254 | 12,5,yield spin,262.6,9526400,1.0 255 | 13,5,yield spin,262.6,9538200,1.0 256 | 14,5,yield spin,262.7,9493200,1.0 257 | 0,6,yield spin,252.3,11932400,1.0 258 | 1,6,yield spin,251.9,11937000,1.0 259 | 2,6,yield spin,251.7,11921600,1.0 260 | 3,6,yield spin,252.4,11931600,1.0 261 | 4,6,yield spin,251.8,11969400,1.0 262 | 5,6,yield spin,252.0,11950200,1.0 263 | 6,6,yield spin,251.9,11915600,1.0 264 | 7,6,yield spin,249.4,12050400,1.0 265 | 8,6,yield spin,251.7,11968800,1.0 266 | 9,6,yield spin,252.3,11907800,1.0 267 | 10,6,yield spin,251.8,11934400,1.0 268 | 11,6,yield spin,252.0,11945200,1.0 269 | 12,6,yield spin,251.3,11959400,1.0 270 | 13,6,yield spin,253.3,11869200,1.0 271 | 14,6,yield spin,249.6,11966800,1.0 272 | 0,5,ticket spin,231244.4,37000,1.0 273 | 1,5,ticket spin,313445.2,35400,1.0 274 | 2,5,ticket spin,623092.7,36200,1.0 275 | 3,5,ticket spin,349706.9,34600,1.0 276 | 4,5,ticket spin,80813.4,34600,1.0 277 | 5,5,ticket spin,182544.0,34600,1.0 278 | 6,5,ticket spin,237456.8,34600,1.0 279 | 7,5,ticket spin,313445.5,35400,1.0 280 | 8,5,ticket spin,126440.6,35400,1.0 281 | 9,5,ticket spin,501546.4,36200,1.0 282 | 10,5,ticket spin,452929.3,36200,1.0 283 | 11,5,ticket spin,536905.1,36200,1.0 284 | 12,5,ticket spin,217966.0,35400,1.0 285 | 13,5,ticket spin,367515.5,34600,1.0 286 | 14,5,ticket spin,482541.1,36200,1.0 287 | 0,6,ticket spin,608704.5,12400,1.0 288 | 1,6,ticket spin,736457.2,23800,1.0 289 | 2,6,ticket spin,375087.8,24400,1.0 290 | 3,6,ticket spin,1480992.5,11800,1.0 291 | 4,6,ticket spin,976363.3,19800,1.0 292 | 5,6,ticket spin,920010.9,12400,1.0 293 | 6,6,ticket spin,2229655.7,12400,1.0 294 | 7,6,ticket spin,735904.6,24400,1.0 295 | 8,6,ticket spin,620816.9,24400,1.0 296 | 9,6,ticket spin,768919.8,37200,1.0 297 | 10,6,ticket spin,271808.8,24400,1.0 298 | 11,6,ticket spin,401904.1,35600,1.0 299 | 12,6,ticket spin,688958.5,11600,1.0 300 | 13,6,ticket spin,1849011.6,12400,1.0 301 | 14,6,ticket spin,1011745.7,25200,1.0 302 | 0,5,ticket yield,3006.6,841000,1.0 303 | 1,5,ticket yield,3027.8,833600,1.0 304 | 2,5,ticket yield,3041.4,835800,1.0 305 | 3,5,ticket yield,3059.3,825800,1.0 306 | 4,5,ticket yield,3079.7,819800,1.0 307 | 5,5,ticket yield,3070.8,817000,1.0 308 | 6,5,ticket yield,3075.6,820800,1.0 309 | 7,5,ticket yield,3060.2,824800,1.0 310 | 8,5,ticket yield,3018.1,836600,1.0 311 | 9,5,ticket yield,3064.9,823800,1.0 312 | 10,5,ticket yield,3024.9,839800,1.0 313 | 11,5,ticket yield,2987.7,847000,1.0 314 | 12,5,ticket yield,3086.3,823800,1.0 315 | 13,5,ticket yield,3104.5,813800,1.0 316 | 14,5,ticket yield,3009.7,828800,1.0 317 | 0,6,ticket yield,4227.9,718400,1.0 318 | 1,6,ticket yield,4224.1,722800,1.0 319 | 2,6,ticket yield,4283.1,709800,1.0 320 | 3,6,ticket yield,4308.3,718200,1.0 321 | 4,6,ticket yield,4327.2,715600,1.0 322 | 5,6,ticket yield,4275.8,715800,1.0 323 | 6,6,ticket yield,4294.3,714000,1.0 324 | 7,6,ticket yield,4276.9,735600,1.0 325 | 8,6,ticket yield,4193.4,733800,1.0 326 | 9,6,ticket yield,4280.5,718600,1.0 327 | 10,6,ticket yield,4206.9,730800,1.0 328 | 11,6,ticket yield,4342.0,716800,1.0 329 | 12,6,ticket yield,4241.4,713000,1.0 330 | 13,6,ticket yield,4307.5,710400,1.0 331 | 14,6,ticket yield,4198.3,712600,1.0 332 | 0,5,ticket blocking,35148.4,73000,1.0 333 | 1,5,ticket blocking,35191.6,73000,1.0 334 | 2,5,ticket blocking,35169.3,73000,1.0 335 | 3,5,ticket blocking,34764.9,72400,1.0 336 | 4,5,ticket blocking,35449.7,72000,1.0 337 | 5,5,ticket blocking,35385.6,72000,1.0 338 | 6,5,ticket blocking,34758.8,73000,1.0 339 | 7,5,ticket blocking,35672.4,72000,1.0 340 | 8,5,ticket blocking,35180.3,73000,1.0 341 | 9,5,ticket blocking,35424.1,72000,1.0 342 | 10,5,ticket blocking,35173.9,73000,1.0 343 | 11,5,ticket blocking,35409.0,72000,1.0 344 | 12,5,ticket blocking,35360.7,72000,1.0 345 | 13,5,ticket blocking,35417.8,72000,1.0 346 | 14,5,ticket blocking,34588.9,72400,1.0 347 | 0,6,ticket blocking,54582.1,57600,1.0 348 | 1,6,ticket blocking,53004.5,57600,1.0 349 | 2,6,ticket blocking,54793.7,57600,1.0 350 | 3,6,ticket blocking,53125.2,58800,1.0 351 | 4,6,ticket blocking,53772.7,57600,1.0 352 | 5,6,ticket blocking,54402.8,57600,1.0 353 | 6,6,ticket blocking,53213.6,58000,1.0 354 | 7,6,ticket blocking,54048.5,57600,1.0 355 | 8,6,ticket blocking,54187.5,57600,1.0 356 | 9,6,ticket blocking,53575.2,57600,1.0 357 | 10,6,ticket blocking,54429.7,57600,1.0 358 | 11,6,ticket blocking,53989.1,57600,1.0 359 | 12,6,ticket blocking,53781.4,57600,1.0 360 | 13,6,ticket blocking,54800.1,57600,1.0 361 | 14,6,ticket blocking,52174.5,57600,1.0 362 | 0,5,queued fifo,14898.0,171000,1.0 363 | 1,5,queued fifo,14512.8,172600,1.0 364 | 2,5,queued fifo,14660.6,173000,1.0 365 | 3,5,queued fifo,15011.7,172000,1.0 366 | 4,5,queued fifo,14830.7,174000,1.0 367 | 5,5,queued fifo,14499.4,175000,1.0 368 | 6,5,queued fifo,14428.9,173400,1.0 369 | 7,5,queued fifo,13300.3,189000,1.0 370 | 8,5,queued fifo,14752.1,175000,1.0 371 | 9,5,queued fifo,13093.4,192000,1.0 372 | 10,5,queued fifo,13401.1,189000,1.0 373 | 11,5,queued fifo,14826.6,173000,1.0 374 | 12,5,queued fifo,14591.9,173000,1.0 375 | 13,5,queued fifo,13567.5,188000,1.0 376 | 14,5,queued fifo,14385.7,174000,1.0 377 | 0,6,queued fifo,18093.1,172800,1.0 378 | 1,6,queued fifo,16854.6,186000,1.0 379 | 2,6,queued fifo,17837.3,175200,1.0 380 | 3,6,queued fifo,16155.5,188400,1.0 381 | 4,6,queued fifo,16317.3,186000,1.0 382 | 5,6,queued fifo,15528.4,199200,1.0 383 | 6,6,queued fifo,17625.9,176400,1.0 384 | 7,6,queued fifo,17439.6,175200,1.0 385 | 8,6,queued fifo,16710.8,184800,1.0 386 | 9,6,queued fifo,16397.1,184800,1.0 387 | 10,6,queued fifo,16715.5,186000,1.0 388 | 11,6,queued fifo,18010.3,175200,1.0 389 | 12,6,queued fifo,18293.3,174000,1.0 390 | 13,6,queued fifo,17452.1,174000,1.0 391 | 14,6,queued fifo,17333.0,173200,1.0 392 | 0,5,mutex3,392.6,6398000,1.0 393 | 1,5,mutex3,394.1,6374200,1.0 394 | 2,5,mutex3,393.4,6386200,1.0 395 | 3,5,mutex3,393.0,6391800,1.0 396 | 4,5,mutex3,392.5,6400800,1.0 397 | 5,5,mutex3,394.7,6365200,1.0 398 | 6,5,mutex3,391.2,6411000,1.0 399 | 7,5,mutex3,392.6,6398000,1.0 400 | 8,5,mutex3,392.5,6400600,1.0 401 | 9,5,mutex3,392.2,6395200,1.0 402 | 10,5,mutex3,392.3,6404200,1.0 403 | 11,5,mutex3,393.5,6383600,1.0 404 | 12,5,mutex3,391.7,6402600,1.0 405 | 13,5,mutex3,391.6,6404000,1.0 406 | 14,5,mutex3,390.7,6399200,1.0 407 | 0,6,mutex3,477.4,6326400,1.0 408 | 1,6,mutex3,478.1,6324600,1.0 409 | 2,6,mutex3,474.3,6359000,1.0 410 | 3,6,mutex3,476.6,6345800,1.0 411 | 4,6,mutex3,480.2,6297200,1.0 412 | 5,6,mutex3,489.2,6181800,1.0 413 | 6,6,mutex3,483.4,6256200,1.0 414 | 7,6,mutex3,475.8,6355400,1.0 415 | 8,6,mutex3,477.2,6319600,1.0 416 | 9,6,mutex3,476.0,6344600,1.0 417 | 10,6,mutex3,477.6,6323200,1.0 418 | 11,6,mutex3,480.9,6288400,1.0 419 | 12,6,mutex3,476.4,6348200,1.0 420 | 13,6,mutex3,476.1,6343400,1.0 421 | 14,6,mutex3,474.1,6327600,1.0 422 | -------------------------------------------------------------------------------- /results/skl/data_slow.csv: -------------------------------------------------------------------------------- 1 | Trial,Cores,Implementation,Nanos/Op,Total,Rlen 2 | 0,5,plain add,4.2,600548600,1.0 3 | 1,5,plain add,4.2,598844400,1.0 4 | 2,5,plain add,4.2,597451600,1.0 5 | 3,5,plain add,4.2,599854200,1.0 6 | 4,5,plain add,4.2,600000600,1.0 7 | 5,5,plain add,4.2,600302600,1.0 8 | 6,5,plain add,4.2,600779600,1.0 9 | 7,5,plain add,4.2,598297000,1.0 10 | 8,5,plain add,4.2,599752200,1.0 11 | 9,5,plain add,4.1,602100600,1.0 12 | 10,5,plain add,4.2,599895000,1.0 13 | 11,5,plain add,4.1,602064200,1.0 14 | 12,5,plain add,4.2,597706800,1.0 15 | 13,5,plain add,4.2,601483000,1.0 16 | 14,5,plain add,4.1,601149200,1.0 17 | 0,6,plain add,5.0,600860000,1.0 18 | 1,6,plain add,5.0,603483400,1.0 19 | 2,6,plain add,5.0,601379000,1.0 20 | 3,6,plain add,5.0,595981000,1.0 21 | 4,6,plain add,5.0,600511800,1.0 22 | 5,6,plain add,5.0,601115000,1.0 23 | 6,6,plain add,5.0,599976400,1.0 24 | 7,6,plain add,5.0,603260000,1.0 25 | 8,6,plain add,5.0,602113200,1.0 26 | 9,6,plain add,5.0,599514200,1.0 27 | 10,6,plain add,5.0,601205000,1.0 28 | 11,6,plain add,5.0,601992200,1.0 29 | 12,6,plain add,5.0,601376600,1.0 30 | 13,6,plain add,5.0,602026000,1.0 31 | 14,6,plain add,5.0,600234400,1.0 32 | 0,5,tls add,2.6,952469400,1.0 33 | 1,5,tls add,2.6,957196000,1.0 34 | 2,5,tls add,2.6,959102000,1.0 35 | 3,5,tls add,2.6,955558000,1.0 36 | 4,5,tls add,2.6,949164400,1.0 37 | 5,5,tls add,2.6,954752000,1.0 38 | 6,5,tls add,2.6,955555400,1.0 39 | 7,5,tls add,2.6,955570800,1.0 40 | 8,5,tls add,2.6,955724800,1.0 41 | 9,5,tls add,2.6,955828200,1.0 42 | 10,5,tls add,2.6,957004000,1.0 43 | 11,5,tls add,2.6,956710000,1.0 44 | 12,5,tls add,2.6,953932600,1.0 45 | 13,5,tls add,2.6,956688000,1.0 46 | 14,5,tls add,2.6,959049600,1.0 47 | 0,6,tls add,3.1,956684400,1.0 48 | 1,6,tls add,3.1,958357200,1.0 49 | 2,6,tls add,3.1,956636800,1.0 50 | 3,6,tls add,3.1,956008200,1.0 51 | 4,6,tls add,3.1,959190600,1.0 52 | 5,6,tls add,3.1,956376400,1.0 53 | 6,6,tls add,3.1,958248600,1.0 54 | 7,6,tls add,3.1,955097800,1.0 55 | 8,6,tls add,3.2,950183800,1.0 56 | 9,6,tls add,3.2,953410800,1.0 57 | 10,6,tls add,3.1,960073600,1.0 58 | 11,6,tls add,3.1,957054400,1.0 59 | 12,6,tls add,3.1,956718800,1.0 60 | 13,6,tls add,3.1,955526200,1.0 61 | 14,6,tls add,3.1,958251200,1.0 62 | 0,5,atomic add,140.1,17793200,1.0 63 | 1,5,atomic add,140.3,17791400,1.0 64 | 2,5,atomic add,140.4,17780000,1.0 65 | 3,5,atomic add,140.6,17777400,1.0 66 | 4,5,atomic add,140.5,17787600,1.0 67 | 5,5,atomic add,140.3,17785000,1.0 68 | 6,5,atomic add,140.6,17786800,1.0 69 | 7,5,atomic add,140.8,17787200,1.0 70 | 8,5,atomic add,140.4,17782600,1.0 71 | 9,5,atomic add,140.4,17784000,1.0 72 | 10,5,atomic add,140.5,17768800,1.0 73 | 11,5,atomic add,140.6,17775800,1.0 74 | 12,5,atomic add,140.1,17787600,1.0 75 | 13,5,atomic add,140.4,17778800,1.0 76 | 14,5,atomic add,140.0,17776600,1.0 77 | 0,6,atomic add,168.5,17777200,1.0 78 | 1,6,atomic add,168.4,17790600,1.0 79 | 2,6,atomic add,168.9,17783000,1.0 80 | 3,6,atomic add,168.4,17795400,1.0 81 | 4,6,atomic add,168.6,17792000,1.0 82 | 5,6,atomic add,169.0,17776000,1.0 83 | 6,6,atomic add,168.4,17792800,1.0 84 | 7,6,atomic add,169.0,17796600,1.0 85 | 8,6,atomic add,168.7,17786200,1.0 86 | 9,6,atomic add,168.5,17781800,1.0 87 | 10,6,atomic add,168.5,17784200,1.0 88 | 11,6,atomic add,168.7,17787000,1.0 89 | 12,6,atomic add,167.9,17792000,1.0 90 | 13,6,atomic add,168.1,17777600,1.0 91 | 14,6,atomic add,167.3,17786600,1.0 92 | 0,5,cas add,483.3,5156000,1.0 93 | 1,5,cas add,484.0,5157200,1.0 94 | 2,5,cas add,482.9,5168800,1.0 95 | 3,5,cas add,483.0,5159200,1.0 96 | 4,5,cas add,485.1,5154000,1.0 97 | 5,5,cas add,482.1,5167800,1.0 98 | 6,5,cas add,484.4,5161800,1.0 99 | 7,5,cas add,483.9,5158200,1.0 100 | 8,5,cas add,482.7,5162800,1.0 101 | 9,5,cas add,484.1,5164200,1.0 102 | 10,5,cas add,483.6,5169800,1.0 103 | 11,5,cas add,483.0,5159600,1.0 104 | 12,5,cas add,483.1,5174800,1.0 105 | 13,5,cas add,483.4,5164000,1.0 106 | 14,5,cas add,481.6,5166400,1.0 107 | 0,6,cas add,580.1,5164800,1.0 108 | 1,6,cas add,579.9,5166600,1.0 109 | 2,6,cas add,582.2,5153400,1.0 110 | 3,6,cas add,579.3,5164800,1.0 111 | 4,6,cas add,580.0,5172800,1.0 112 | 5,6,cas add,580.5,5154600,1.0 113 | 6,6,cas add,581.4,5163200,1.0 114 | 7,6,cas add,578.0,5162400,1.0 115 | 8,6,cas add,579.6,5155200,1.0 116 | 9,6,cas add,581.8,5157000,1.0 117 | 10,6,cas add,580.8,5165800,1.0 118 | 11,6,cas add,578.9,5162000,1.0 119 | 12,6,cas add,579.7,5161000,1.0 120 | 13,6,cas add,579.8,5167800,1.0 121 | 14,6,cas add,578.2,5154200,1.0 122 | 0,5,cas multi,15.1,164997600,1.0 123 | 1,5,cas multi,15.1,164658200,1.0 124 | 2,5,cas multi,15.2,163694800,1.0 125 | 3,5,cas multi,15.1,165468600,1.0 126 | 4,5,cas multi,15.1,165078200,1.0 127 | 5,5,cas multi,15.1,164516200,1.0 128 | 6,5,cas multi,15.1,164918000,1.0 129 | 7,5,cas multi,15.1,164733600,1.0 130 | 8,5,cas multi,15.1,165516800,1.0 131 | 9,5,cas multi,15.1,165494800,1.0 132 | 10,5,cas multi,15.2,164215600,1.0 133 | 11,5,cas multi,15.1,165257800,1.0 134 | 12,5,cas multi,15.1,165536200,1.0 135 | 13,5,cas multi,15.2,164652600,1.0 136 | 14,5,cas multi,15.1,165196200,1.0 137 | 0,6,cas multi,18.1,165505000,1.0 138 | 1,6,cas multi,18.2,164972600,1.0 139 | 2,6,cas multi,18.2,165237000,1.0 140 | 3,6,cas multi,18.2,164960000,1.0 141 | 4,6,cas multi,18.1,165512600,1.0 142 | 5,6,cas multi,18.1,165531200,1.0 143 | 6,6,cas multi,18.2,163973200,1.0 144 | 7,6,cas multi,18.3,164203800,1.0 145 | 8,6,cas multi,18.1,165631800,1.0 146 | 9,6,cas multi,18.1,165204400,1.0 147 | 10,6,cas multi,18.1,165716400,1.0 148 | 11,6,cas multi,18.1,164686800,1.0 149 | 12,6,cas multi,18.1,165301200,1.0 150 | 13,6,cas multi,18.1,165556000,1.0 151 | 14,6,cas multi,18.0,165601400,1.0 152 | 0,5,mutex add,331.9,7567200,1.0 153 | 1,5,mutex add,331.7,7561200,1.0 154 | 2,5,mutex add,332.2,7548800,1.0 155 | 3,5,mutex add,334.1,7519800,1.0 156 | 4,5,mutex add,334.2,7516600,1.0 157 | 5,5,mutex add,331.5,7565200,1.0 158 | 6,5,mutex add,332.3,7560000,1.0 159 | 7,5,mutex add,332.8,7549000,1.0 160 | 8,5,mutex add,331.9,7569600,1.0 161 | 9,5,mutex add,330.1,7598200,1.0 162 | 10,5,mutex add,332.6,7540600,1.0 163 | 11,5,mutex add,333.0,7543600,1.0 164 | 12,5,mutex add,331.9,7557600,1.0 165 | 13,5,mutex add,332.1,7551600,1.0 166 | 14,5,mutex add,330.9,7555400,1.0 167 | 0,6,mutex add,404.2,7471600,1.0 168 | 1,6,mutex add,404.6,7449400,1.0 169 | 2,6,mutex add,405.4,7449400,1.0 170 | 3,6,mutex add,405.6,7445800,1.0 171 | 4,6,mutex add,405.7,7453800,1.0 172 | 5,6,mutex add,404.3,7470200,1.0 173 | 6,6,mutex add,404.3,7479000,1.0 174 | 7,6,mutex add,405.9,7450200,1.0 175 | 8,6,mutex add,408.1,7409800,1.0 176 | 9,6,mutex add,404.4,7466800,1.0 177 | 10,6,mutex add,404.5,7466600,1.0 178 | 11,6,mutex add,405.2,7452800,1.0 179 | 12,6,mutex add,404.8,7470200,1.0 180 | 13,6,mutex add,402.3,7507200,1.0 181 | 14,6,mutex add,401.1,7479600,1.0 182 | 0,5,pure spin,906.2,2750200,1.0 183 | 1,5,pure spin,1044.7,2385400,1.0 184 | 2,5,pure spin,1058.3,2362400,1.0 185 | 3,5,pure spin,1062.5,2349200,1.0 186 | 4,5,pure spin,996.5,2505000,1.0 187 | 5,5,pure spin,1053.9,2368400,1.0 188 | 6,5,pure spin,1014.0,2457800,1.0 189 | 7,5,pure spin,956.0,2615000,1.0 190 | 8,5,pure spin,1104.4,2263800,1.0 191 | 9,5,pure spin,969.4,2579000,1.0 192 | 10,5,pure spin,1069.1,2332600,1.0 193 | 11,5,pure spin,1058.2,2355000,1.0 194 | 12,5,pure spin,976.3,2560800,1.0 195 | 13,5,pure spin,1009.9,2471600,1.0 196 | 14,5,pure spin,999.7,2489000,1.0 197 | 0,6,pure spin,1459.4,2061200,1.0 198 | 1,6,pure spin,1419.8,2110200,1.0 199 | 2,6,pure spin,1626.7,1844200,1.0 200 | 3,6,pure spin,1430.1,2087600,1.0 201 | 4,6,pure spin,1582.6,1888000,1.0 202 | 5,6,pure spin,1505.1,1974600,1.0 203 | 6,6,pure spin,1433.6,2087000,1.0 204 | 7,6,pure spin,1466.3,2062200,1.0 205 | 8,6,pure spin,1402.8,2135800,1.0 206 | 9,6,pure spin,1353.9,2215800,1.0 207 | 10,6,pure spin,1403.3,2135200,1.0 208 | 11,6,pure spin,1266.7,2362200,1.0 209 | 12,6,pure spin,1428.2,2151000,1.0 210 | 13,6,pure spin,1343.1,2236600,1.0 211 | 14,6,pure spin,1456.1,2079600,1.0 212 | 0,5,pause spin,494.5,5095800,1.0 213 | 1,5,pause spin,527.9,4735600,1.0 214 | 2,5,pause spin,505.8,5061400,1.0 215 | 3,5,pause spin,488.0,5204600,1.0 216 | 4,5,pause spin,533.4,4687200,1.0 217 | 5,5,pause spin,525.2,4759800,1.0 218 | 6,5,pause spin,505.1,5049400,1.0 219 | 7,5,pause spin,496.4,5028400,1.0 220 | 8,5,pause spin,487.7,5238200,1.0 221 | 9,5,pause spin,478.0,5221400,1.0 222 | 10,5,pause spin,501.3,4981600,1.0 223 | 11,5,pause spin,495.3,5031600,1.0 224 | 12,5,pause spin,541.1,4694400,1.0 225 | 13,5,pause spin,512.9,4858800,1.0 226 | 14,5,pause spin,520.5,4857200,1.0 227 | 0,6,pause spin,619.2,4831800,1.0 228 | 1,6,pause spin,628.0,4764400,1.0 229 | 2,6,pause spin,619.7,4828400,1.0 230 | 3,6,pause spin,612.0,4895400,1.0 231 | 4,6,pause spin,650.4,4612400,1.0 232 | 5,6,pause spin,612.3,4899800,1.0 233 | 6,6,pause spin,569.4,5240800,1.0 234 | 7,6,pause spin,618.5,4833600,1.0 235 | 8,6,pause spin,620.2,4867400,1.0 236 | 9,6,pause spin,612.9,4901400,1.0 237 | 10,6,pause spin,605.9,4997400,1.0 238 | 11,6,pause spin,619.1,4845400,1.0 239 | 12,6,pause spin,612.3,4978200,1.0 240 | 13,6,pause spin,635.0,4724800,1.0 241 | 14,6,pause spin,628.8,4739400,1.0 242 | 0,5,yield spin,80.5,31113400,1.0 243 | 1,5,yield spin,80.4,31097600,1.0 244 | 2,5,yield spin,79.8,31385000,1.0 245 | 3,5,yield spin,79.6,31422800,1.0 246 | 4,5,yield spin,79.7,31452600,1.0 247 | 5,5,yield spin,80.1,31280800,1.0 248 | 6,5,yield spin,79.8,31406000,1.0 249 | 7,5,yield spin,79.6,31417400,1.0 250 | 8,5,yield spin,79.3,31510600,1.0 251 | 9,5,yield spin,79.4,31482200,1.0 252 | 10,5,yield spin,81.4,30766600,1.0 253 | 11,5,yield spin,79.5,31434600,1.0 254 | 12,5,yield spin,80.5,31126800,1.0 255 | 13,5,yield spin,80.3,31140200,1.0 256 | 14,5,yield spin,80.0,31171200,1.0 257 | 0,6,yield spin,94.8,31747800,1.0 258 | 1,6,yield spin,93.8,32097800,1.0 259 | 2,6,yield spin,93.5,32152000,1.0 260 | 3,6,yield spin,95.8,31460200,1.0 261 | 4,6,yield spin,94.3,31884600,1.0 262 | 5,6,yield spin,93.7,32091200,1.0 263 | 6,6,yield spin,93.6,32120400,1.0 264 | 7,6,yield spin,94.1,31975800,1.0 265 | 8,6,yield spin,94.5,31877600,1.0 266 | 9,6,yield spin,93.9,32044200,1.0 267 | 10,6,yield spin,93.3,32285400,1.0 268 | 11,6,yield spin,93.7,32056400,1.0 269 | 12,6,yield spin,94.7,31773200,1.0 270 | 13,6,yield spin,93.6,32151800,1.0 271 | 14,6,yield spin,93.7,31898000,1.0 272 | 0,5,ticket spin,115881.9,100200,1.0 273 | 1,5,ticket spin,207494.5,103400,1.0 274 | 2,5,ticket spin,68483.9,101000,1.0 275 | 3,5,ticket spin,234545.0,81800,1.0 276 | 4,5,ticket spin,231245.5,100200,1.0 277 | 5,5,ticket spin,111575.5,105000,1.0 278 | 6,5,ticket spin,98006.6,102600,1.0 279 | 7,5,ticket spin,111020.6,99400,1.0 280 | 8,5,ticket spin,90734.3,100200,1.0 281 | 9,5,ticket spin,105854.9,109200,1.0 282 | 10,5,ticket spin,184905.4,105000,1.0 283 | 11,5,ticket spin,166891.7,102600,1.0 284 | 12,5,ticket spin,124906.2,102600,1.0 285 | 13,5,ticket spin,138160.5,102600,1.0 286 | 14,5,ticket spin,171146.0,95400,1.0 287 | 0,6,ticket spin,288090.7,90800,1.0 288 | 1,6,ticket spin,209505.1,66000,1.0 289 | 2,6,ticket spin,145404.1,102000,1.0 290 | 3,6,ticket spin,179684.6,98800,1.0 291 | 4,6,ticket spin,109654.5,69200,1.0 292 | 5,6,ticket spin,479187.1,35600,1.0 293 | 6,6,ticket spin,416510.4,69200,1.0 294 | 7,6,ticket spin,141279.2,66800,1.0 295 | 8,6,ticket spin,74137.9,58000,1.0 296 | 9,6,ticket spin,1012300.4,34000,1.0 297 | 10,6,ticket spin,202356.8,67600,1.0 298 | 11,6,ticket spin,374587.3,35600,1.0 299 | 12,6,ticket spin,786547.1,34000,1.0 300 | 13,6,ticket spin,285492.7,35600,1.0 301 | 14,6,ticket spin,647198.8,33200,1.0 302 | 0,5,ticket yield,4632.8,548200,1.0 303 | 1,5,ticket yield,4591.6,546000,1.0 304 | 2,5,ticket yield,2915.5,871600,1.0 305 | 3,5,ticket yield,4433.4,573200,1.0 306 | 4,5,ticket yield,4598.4,553400,1.0 307 | 5,5,ticket yield,3393.1,737800,1.0 308 | 6,5,ticket yield,4541.1,559200,1.0 309 | 7,5,ticket yield,2743.2,921000,1.0 310 | 8,5,ticket yield,4444.4,564200,1.0 311 | 9,5,ticket yield,3168.9,802200,1.0 312 | 10,5,ticket yield,3225.2,783200,1.0 313 | 11,5,ticket yield,4510.7,556000,1.0 314 | 12,5,ticket yield,4594.4,550000,1.0 315 | 13,5,ticket yield,4547.0,555400,1.0 316 | 14,5,ticket yield,2782.7,896800,1.0 317 | 0,6,ticket yield,5029.0,605400,1.0 318 | 1,6,ticket yield,5770.6,527600,1.0 319 | 2,6,ticket yield,6230.6,488600,1.0 320 | 3,6,ticket yield,5490.3,554600,1.0 321 | 4,6,ticket yield,5243.1,587600,1.0 322 | 5,6,ticket yield,5095.7,604600,1.0 323 | 6,6,ticket yield,5427.9,560800,1.0 324 | 7,6,ticket yield,5180.1,588200,1.0 325 | 8,6,ticket yield,6087.8,505800,1.0 326 | 9,6,ticket yield,5806.2,525000,1.0 327 | 10,6,ticket yield,5932.7,527000,1.0 328 | 11,6,ticket yield,5945.3,510600,1.0 329 | 12,6,ticket yield,5185.5,587400,1.0 330 | 13,6,ticket yield,5501.3,559600,1.0 331 | 14,6,ticket yield,5907.6,505400,1.0 332 | 0,5,ticket blocking,43383.4,58600,1.0 333 | 1,5,ticket blocking,43053.9,58600,1.0 334 | 2,5,ticket blocking,43220.6,59200,1.0 335 | 3,5,ticket blocking,43562.2,58600,1.0 336 | 4,5,ticket blocking,42427.5,59600,1.0 337 | 5,5,ticket blocking,43212.2,59200,1.0 338 | 6,5,ticket blocking,42972.2,58600,1.0 339 | 7,5,ticket blocking,43254.6,58600,1.0 340 | 8,5,ticket blocking,43160.4,58600,1.0 341 | 9,5,ticket blocking,43094.3,58600,1.0 342 | 10,5,ticket blocking,42325.3,59600,1.0 343 | 11,5,ticket blocking,42976.6,58600,1.0 344 | 12,5,ticket blocking,43376.2,58200,1.0 345 | 13,5,ticket blocking,44511.5,57200,1.0 346 | 14,5,ticket blocking,42381.6,59200,1.0 347 | 0,6,ticket blocking,64531.6,48000,1.0 348 | 1,6,ticket blocking,63026.1,49200,1.0 349 | 2,6,ticket blocking,63219.3,49200,1.0 350 | 3,6,ticket blocking,62722.6,49200,1.0 351 | 4,6,ticket blocking,62401.9,50400,1.0 352 | 5,6,ticket blocking,61902.8,49200,1.0 353 | 6,6,ticket blocking,60920.7,51600,1.0 354 | 7,6,ticket blocking,62180.5,50400,1.0 355 | 8,6,ticket blocking,63190.4,49200,1.0 356 | 9,6,ticket blocking,61823.5,50400,1.0 357 | 10,6,ticket blocking,62189.9,50400,1.0 358 | 11,6,ticket blocking,61754.1,50400,1.0 359 | 12,6,ticket blocking,61964.0,50400,1.0 360 | 13,6,ticket blocking,61289.2,50400,1.0 361 | 14,6,ticket blocking,59865.8,50400,1.0 362 | 0,5,queued fifo,21578.4,117000,1.0 363 | 1,5,queued fifo,22355.8,114000,1.0 364 | 2,5,queued fifo,22067.5,114000,1.0 365 | 3,5,queued fifo,21764.0,118000,1.0 366 | 4,5,queued fifo,22069.7,117000,1.0 367 | 5,5,queued fifo,22175.3,115000,1.0 368 | 6,5,queued fifo,21823.6,116000,1.0 369 | 7,5,queued fifo,22441.8,115000,1.0 370 | 8,5,queued fifo,21892.7,116000,1.0 371 | 9,5,queued fifo,22112.6,116000,1.0 372 | 10,5,queued fifo,21829.2,117000,1.0 373 | 11,5,queued fifo,22003.6,115000,1.0 374 | 12,5,queued fifo,21532.9,117000,1.0 375 | 13,5,queued fifo,21877.1,115000,1.0 376 | 14,5,queued fifo,21714.0,116000,1.0 377 | 0,6,queued fifo,25787.0,117600,1.0 378 | 1,6,queued fifo,26938.2,115200,1.0 379 | 2,6,queued fifo,28093.4,111600,1.0 380 | 3,6,queued fifo,26651.9,115200,1.0 381 | 4,6,queued fifo,26399.6,116400,1.0 382 | 5,6,queued fifo,26700.5,116400,1.0 383 | 6,6,queued fifo,26624.7,117600,1.0 384 | 7,6,queued fifo,26527.2,114000,1.0 385 | 8,6,queued fifo,26494.5,117600,1.0 386 | 9,6,queued fifo,26615.1,117600,1.0 387 | 10,6,queued fifo,26737.4,116400,1.0 388 | 11,6,queued fifo,26224.1,116800,1.0 389 | 12,6,queued fifo,26546.1,115200,1.0 390 | 13,6,queued fifo,27033.5,116400,1.0 391 | 14,6,queued fifo,25837.1,116400,1.0 392 | 0,5,mutex3,345.1,7266600,1.0 393 | 1,5,mutex3,345.4,7272000,1.0 394 | 2,5,mutex3,345.9,7262400,1.0 395 | 3,5,mutex3,345.7,7265400,1.0 396 | 4,5,mutex3,346.9,7241800,1.0 397 | 5,5,mutex3,345.3,7262800,1.0 398 | 6,5,mutex3,346.3,7241800,1.0 399 | 7,5,mutex3,344.2,7298400,1.0 400 | 8,5,mutex3,345.2,7265200,1.0 401 | 9,5,mutex3,345.6,7266800,1.0 402 | 10,5,mutex3,345.4,7273200,1.0 403 | 11,5,mutex3,344.2,7298400,1.0 404 | 12,5,mutex3,346.7,7246600,1.0 405 | 13,5,mutex3,345.3,7273200,1.0 406 | 14,5,mutex3,343.1,7286400,1.0 407 | 0,6,mutex3,420.7,7169600,1.0 408 | 1,6,mutex3,417.1,7230000,1.0 409 | 2,6,mutex3,418.8,7210800,1.0 410 | 3,6,mutex3,418.1,7223600,1.0 411 | 4,6,mutex3,419.0,7217600,1.0 412 | 5,6,mutex3,418.8,7219800,1.0 413 | 6,6,mutex3,417.2,7214800,1.0 414 | 7,6,mutex3,417.1,7239000,1.0 415 | 8,6,mutex3,419.1,7205400,1.0 416 | 9,6,mutex3,419.4,7209400,1.0 417 | 10,6,mutex3,419.8,7202000,1.0 418 | 11,6,mutex3,419.1,7215400,1.0 419 | 12,6,mutex3,418.8,7220400,1.0 420 | 13,6,mutex3,418.3,7219000,1.0 421 | 14,6,mutex3,415.4,7221400,1.0 422 | -------------------------------------------------------------------------------- /scripts/all-plots.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # shellcheck disable=SC2162 3 | 4 | set -euo pipefail 5 | 6 | 7 | echo "RBASE=${RBASE:=./results}" 8 | echo "ADIR=${ADIR:=../travisdowns.github.io/assets/concurrency-costs}" 9 | echo "TDIR=${TDIR:=../travisdowns.github.io/misc/tables/concurrency-costs}" 10 | 11 | uarches=(skl,4 g2-16,16 g1-16,16 icl,4) 12 | 13 | 14 | for pair in "${uarches[@]}"; do 15 | IFS=',' read u _ <<< "${pair}" 16 | mkdir -p "$ADIR/$u" 17 | mkdir -p "$TDIR/$u" 18 | done 19 | 20 | for pair in "${uarches[@]}"; do 21 | IFS=',' read u procs <<< "${pair}" 22 | echo "uarch=${u} with $procs CPUs" 23 | scripts/plot-bar.py "$RBASE/${u}/combined.csv" --out "$ADIR/${u}" --table-out="$TDIR/${u}" --procs=${procs} 24 | done -------------------------------------------------------------------------------- /scripts/data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -euo pipefail 3 | 4 | 5 | # echo "FAST_ITERS=${FAST_ITERS:=1000}" 6 | # echo "SLOW_ITERS=${SLOW_ITERS:=10}" 7 | echo "RDIR=${RDIR:=./results}" 8 | echo "CPUS=${CPUS:=$(nproc)}" 9 | 10 | mkdir -p "$RDIR" 11 | 12 | # up to CPU-count threads 13 | echo "Collecting fast data" 14 | ./bench --progress --csv > "$RDIR/data_fast.csv" 15 | echo "Collecting slow data" 16 | ./bench --progress --csv --min-threads=$(($CPUS + 1)) --max-threads=$(($CPUS + 2)) --batch=200 --trial-time=500 > "$RDIR/data_slow.csv" 17 | 18 | cat $RDIR/data_fast.csv <(tail +2 $RDIR/data_slow.csv) > $RDIR/combined.csv -------------------------------------------------------------------------------- /scripts/details.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # run each algorithm alone to get instruction and atomic counts 4 | # uses --no-barrier and --warmup-ms so that the function under 5 | # test dominates the time 6 | 7 | set -euo pipefail 8 | 9 | echo "EVENTS=${EVENTS:=instructions:u,mem_inst_retired.lock_loads:u}" 10 | echo "ITERS=${ITERS=$(((33333333 + 17)/17))}" # default should result in 100,000,000 iters 11 | 12 | if [[ -z ${1+x} ]]; then 13 | algos=("mutex add" "atomic add" "cas add" "ticket yield" "ticket blocking" "queued fifo" "ticket spin" "mutex3" "cas multi" "tls add") 14 | else 15 | algos=("$@") 16 | fi 17 | 18 | for algo in "${algos[@]}"; do 19 | echo "ALGO: $algo" 20 | perf stat -e $EVENTS ./bench --iters=$ITERS --max-threads=1 --algos="$algo" --no-barrier --warmup-ms=0 |& egrep 'instructions|lock_loads' 21 | done 22 | 23 | 24 | -------------------------------------------------------------------------------- /scripts/plot-bar.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import matplotlib as mpl 4 | import matplotlib.pyplot as plt 5 | import pandas as pd 6 | import numpy as np 7 | import argparse 8 | import sys 9 | import os 10 | 11 | # for arguments that should be comma-separate lists, we use splitlsit as the type 12 | splitlist = lambda x: x.split(',') 13 | 14 | p = argparse.ArgumentParser(usage='plot output from ./bench') 15 | 16 | p.add_argument('--procs', help='Number of processors used (CPUS in data.sh)', default=4, type=int) 17 | 18 | # input and output file configuration 19 | p.add_argument('input', help='CSV file to plot (or stdin)', type=argparse.FileType('r'), default=[ sys.stdin ]) 20 | p.add_argument('--out', help='output directory') 21 | p.add_argument('--table-out', help='output directory for HTML tables') 22 | p.add_argument('--show', help='also show output interactively (default if --out is not specified)', nargs='?', const='all') 23 | 24 | # input parsing configuration 25 | p.add_argument('--sep', help='separator character (or regex) for input', default=',') 26 | 27 | # chart type 28 | 29 | # column selection and configuration 30 | 31 | # chart labels and text 32 | p.add_argument('--clabels', help="Comma separated list of column names used as label for data series (default: column header)", 33 | type=splitlist) 34 | p.add_argument('--xlabel', help='Set x axis label', default='Active Cores') 35 | p.add_argument('--ylabel', help='Set y axis label', default='Nanoseconds per increment') 36 | p.add_argument('--ylabel2', help='Set the secondary y axis label') 37 | 38 | # legend 39 | p.add_argument('--legend-loc', help='Set the legend location explicitly', type=str) 40 | 41 | # data manipulation 42 | p.add_argument('--jitter', help='Apply horizontal (x-axis) jitter of the given relative amount (default 0.1)', 43 | nargs='?', type=float, const=0.1) 44 | p.add_argument('--group', help='Group data by the first column, with new min/median/max columns with one row per group') 45 | 46 | # axis and line/point configuration 47 | p.add_argument('--ylim', help='Set the y axis limits explicitly (e.g., to cross at zero)', type=float, nargs='+') 48 | p.add_argument('--xrotate', help='rotate the xlablels by this amount', default=0) 49 | p.add_argument('--tick-interval', help='use the given x-axis tick spacing (in x axis units)', type=int) 50 | p.add_argument('--alpha', help='use the given alpha for marker/line', type=float) 51 | p.add_argument('--linewidth', help='use the given line width', type=float) 52 | p.add_argument('--tight', help='use tight_layout for less space around chart', action='store_true', default=True) 53 | 54 | # debugging 55 | p.add_argument('--verbose', '-v', help='enable verbose logging', action='store_true') 56 | cargs = p.parse_args() 57 | 58 | vprint = print if cargs.verbose else lambda *a: None 59 | vprint("cargs = ", cargs) 60 | 61 | plt.rcParams['axes.labelsize'] = 'large' 62 | plt.rcParams['axes.titlesize'] = 'large' 63 | 64 | # fix various random seeds so we get reproducible plots 65 | # fix the mpl seed used to generate SVG IDs 66 | mpl.rcParams['svg.hashsalt'] = 'foobar' 67 | 68 | # numpy random seeds (used by e.g., jitter function below) 69 | np.random.seed(123) 70 | 71 | 72 | kwargs = {} 73 | 74 | if (cargs.alpha): 75 | kwargs['alpha'] = cargs.alpha 76 | 77 | # these are args that are basically just passed directly through to the plot command 78 | # and generally correspond to matplotlib plot argumnets. 79 | passthru_args = ['markersize', 'marker', 'color'] 80 | passthru_args2 = ['markersize2', 'marker2', 'color2'] 81 | argsdict = vars(cargs) 82 | 83 | 84 | # populate the per-series arguments, based on the series index 85 | def populate_args(idx, base, secondary = False): 86 | assert idx > 0 87 | idx = idx - 1 # because the columns are effectively 1-based (col 0 are the x values) 88 | arglist = passthru_args2 if secondary else passthru_args 89 | kwargs = base.copy() 90 | for arg in arglist: 91 | argval = argsdict[arg] 92 | argname = arg[:-1] if secondary and arg.endswith('2') else arg 93 | if (argval): 94 | kwargs[argname] = argval[idx % len(argval)] 95 | vprint("set {} for {} col {} to {} (list: {})".format(argname, "secondary" if secondary else "primary", idx, kwargs[argname], argval)) 96 | else: 97 | vprint("not set {} for col {}".format(arg, idx)) 98 | return kwargs 99 | 100 | 101 | fullargs = {} 102 | vprint("kwargs: {}".format(fullargs)) 103 | 104 | df = pd.read_csv(cargs.input, sep=cargs.sep, index_col=[0, 1, 2]) 105 | df.sort_index(level=0, inplace=True) 106 | vprint("----- from file -------\n", df.head(), "\n---------------------") 107 | 108 | 109 | def make_plot(filename, title, cols, minthreads=1, maxthreads=cargs.procs, overlay=[], metrics='Nanos/Op'): 110 | subf = df[metrics].copy() 111 | vprint("----- after metric slice ------\n", subf.head(), "\n---------------------") 112 | subf = subf.unstack() 113 | vprint("----- after reshape ------\n", subf.head(), "\n---------------------") 114 | subf = subf[cols] 115 | vprint("----- after col selection ------\n", subf.head(), "\n---------------------") 116 | # subf = subf.unstack().droplevel(axis='columns', level=0) 117 | vprint("----- columns ------\n", subf.columns, "\n---------------------") 118 | 119 | iv = subf.index.get_level_values('Cores') 120 | vprint('iv:', iv) 121 | subf = subf.loc[(iv >= minthreads) & (iv <= maxthreads), :] 122 | vprint("----- after core filter ------\n", subf.head(), "\n---------------------") 123 | 124 | gb = subf.groupby(by=['Cores']); 125 | median = gb.median() 126 | vprint("----- after groupby ------\n", median.head(n=20), "\n---------------------") 127 | 128 | p10 = median - gb.quantile(0.10) 129 | p90 = gb.quantile(0.90) - median 130 | vprint("----- p10 ------\n", p10.head(), "\n---------------------") 131 | vprint("----- p90 ------\n", p90.head(), "\n---------------------") 132 | 133 | 134 | p10v = p10[cols[0]].values; 135 | vprint('p10v: ', p10v) 136 | vprint('p10v shape:', np.shape(p10v)) 137 | 138 | # https://stackoverflow.com/a/37139647 139 | # y error specification should have shape 140 | # ( number of columns, 2, number of rows ) 141 | # or equivalently 142 | # ( number of bars within a group, 2, number of groups ) 143 | err = [] 144 | for col in median: 145 | err.append([p10[col].values, p90[col].values]) 146 | # vprint('err:', err) 147 | vprint('err shape:', np.shape(err)) 148 | 149 | ax = median.plot.bar(title=title, figsize=(9,6), rot=0, yerr=err, **fullargs) 150 | 151 | vprint('>>>>', ax.containers) 152 | 153 | # # add overlay to the bars 154 | if overlay: 155 | idx = 0 156 | for bars in ax.containers: 157 | # print('bars: ', bars) 158 | # print('type: ', type(bars)) 159 | if isinstance(bars, mpl.container.BarContainer): 160 | for rect in bars: 161 | height = rect.get_height() 162 | ax.text(rect.get_x() + rect.get_width()/2., height/2, 163 | overlay[idx], 164 | ha='center', va='bottom', rotation=0, fontsize=16, weight='bold') 165 | idx = idx + 1 166 | 167 | if cargs.xrotate: 168 | plt.xticks(rotation=cargs.xrotate) 169 | 170 | if cargs.ylabel: 171 | ax.set_ylabel(cargs.ylabel) 172 | 173 | if cargs.ylim: 174 | if len(cargs.ylim) == 1: 175 | ax.set_ylim(cargs.ylim[0]) 176 | elif len(cargs.ylim) == 2: 177 | ax.set_ylim(cargs.ylim[0], cargs.ylim[1]) 178 | else: 179 | sys.exit('provide one or two args to --ylim') 180 | 181 | # this needs to go after the ax2 handling, or else secondary axis x label will override 182 | if cargs.xlabel: 183 | ax.set_xlabel(cargs.xlabel) 184 | 185 | legargs = {} 186 | if cargs.legend_loc: 187 | legargs['loc'] = cargs.legend_loc 188 | 189 | if cargs.tight: 190 | plt.tight_layout() 191 | 192 | if cargs.out: 193 | outpath = os.path.join(cargs.out, filename + '.svg') 194 | vprint("Saving figure to ", outpath, "...") 195 | plt.savefig(outpath) 196 | 197 | if cargs.show and (cargs.show == 'all' or filename in cargs.show.split(',')): 198 | vprint("Showing interactive plot...") 199 | plt.show() 200 | 201 | plt.close() 202 | 203 | if cargs.table_out: 204 | # this line moves the index name to be the first column name instead 205 | # subf = subf.rename_axis(index=None, columns=subf.index.name) 206 | header = "---\nlayout: default\n---\n\n" 207 | tpath = os.path.join(cargs.table_out, filename + '.html') 208 | with open(tpath, 'w') as f: 209 | f.write(header + subf.to_html()) 210 | vprint('saved html table to', tpath) 211 | 212 | 213 | columns = [] 214 | 215 | def ac(*args): 216 | columns.extend([*args]) 217 | return columns 218 | 219 | 220 | make_plot('mutex', 'Increment Cost: std::mutex', ac('mutex add'), minthreads=2) 221 | make_plot('atomic-inc', 'Increment Cost: Atomic Increments', ac('atomic add', 'cas add'), minthreads=2) 222 | make_plot('atomic-inc1','Increment Cost: Atomic Increments', ac(), maxthreads=1) 223 | make_plot('fair-yield', 'Increment Cost: Yielding Ticket', ac('ticket yield'), minthreads=2) 224 | make_plot('more-fair', 'Increment Cost: More Fair Locks', ac('ticket blocking', 'queued fifo'), minthreads=2) 225 | make_plot('ts-4', 'Increment Cost: Ticket Spin', ac('ticket spin'), minthreads=2) 226 | make_plot('ts-6', 'Increment Cost: Ticket Spin (Oversubscribed)', ac(), minthreads=2, maxthreads=(cargs.procs + 2)) 227 | 228 | make_plot('single', 'Increment Cost: Single Threaded', ac(), maxthreads=1, overlay=[2, 1, 1, 1, 3, 4, 1]) 229 | 230 | columns = ['atomic add', 'cas multi'] 231 | make_plot('cas-multi', 'Increment Cost: Contention Adaptive Multi-Counter', ac()) 232 | make_plot('tls', 'Increment Cost: Thread Local Storage', ac('tls add')) -------------------------------------------------------------------------------- /stats.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Really simple descriptive stats. 3 | * 4 | * stats.hpp 5 | */ 6 | 7 | #ifndef STATS_HPP_ 8 | #define STATS_HPP_ 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | namespace Stats { 20 | 21 | class DescriptiveStats { 22 | double min_, max_, avg_, median_; 23 | size_t count_; 24 | public: 25 | DescriptiveStats() : DescriptiveStats(0., 0., 0., 0., 0) {} 26 | 27 | DescriptiveStats(double min, double max, double avg, double median, size_t count) : 28 | min_(min), max_(max), avg_(avg), median_(median), count_(count) {} 29 | 30 | double getAvg() const { 31 | return avg_; 32 | } 33 | 34 | size_t getCount() const { 35 | return count_; 36 | } 37 | 38 | double getMax() const { 39 | return max_; 40 | } 41 | 42 | double getMin() const { 43 | return min_; 44 | } 45 | 46 | double getMedian() const { 47 | return median_; 48 | } 49 | 50 | /* 51 | * Return a string with the values of min/median/avg/max at the specified precision. 52 | * Note that the count is not included. 53 | */ 54 | std::string getString4(int width, int precision) const { 55 | std::ostringstream os; 56 | os << std::fixed << std::setprecision(precision) << 57 | std::setw(width) << getMin() << "/" << 58 | std::setw(width) << getMedian() << "/" << 59 | std::setw(width) << getAvg() << "/" << 60 | std::setw(width) << getMax(); 61 | 62 | return os.str(); 63 | } 64 | }; 65 | 66 | template 67 | typename std::iterator_traits::value_type median(iter_type first, iter_type last, LESS comp) { 68 | if (first == last) { 69 | throw std::logic_error("can't get median of empty range"); 70 | } 71 | using T = typename std::iterator_traits::value_type; 72 | std::vector copy(first, last); 73 | std::sort(copy.begin(), copy.end(), comp); 74 | size_t sz = copy.size(), half_sz = sz / 2; 75 | return sz % 2 ? copy[half_sz] : (copy[half_sz - 1] + copy[half_sz]) / 2; 76 | } 77 | 78 | /** 79 | * Like median above, except that with an even number of elements, where there are two middle elements with 80 | * equal claim to the throne, the lesser of the two elements is returned rather trying to average them. This 81 | * method is more generally applicable since it always returns on of the elements of the input range directly 82 | * and doesn't require the elements to expose the operations required to calculate an average. 83 | */ 84 | template 85 | typename std::iterator_traits::value_type medianf(iter_type first, iter_type last, LESS comp) { 86 | if (first == last) { 87 | throw std::logic_error("can't get median of empty range"); 88 | } 89 | using T = typename std::iterator_traits::value_type; 90 | std::vector copy(first, last); 91 | std::sort(copy.begin(), copy.end(), comp); 92 | assert(!copy.empty()); 93 | return copy[(copy.size() - 1) / 2]; 94 | } 95 | 96 | template 97 | typename std::iterator_traits::value_type median(iter_type first, iter_type last) { 98 | auto p = std::less::value_type>(); 99 | return Stats::median(first, last, p); 100 | } 101 | 102 | 103 | template 104 | DescriptiveStats get_stats(iter_type first, iter_type last) { 105 | using dlimits = std::numeric_limits; 106 | double min = dlimits::max(), max = dlimits::min(), total = 0; 107 | size_t count = 0; 108 | for (iter_type itr = first; itr != last; itr++) { 109 | auto val = *itr; 110 | double vald = val; 111 | if (vald < min) min = vald; 112 | if (vald > max) max = vald; 113 | total += vald; 114 | count++; 115 | } 116 | 117 | return DescriptiveStats(min, max, total / count, median(first, last), count); 118 | } 119 | 120 | 121 | 122 | inline std::ostream& operator<<(std::ostream &os, const DescriptiveStats &stats) { 123 | os << "min=" << stats.getMin() << ", median=" << stats.getMedian() << ", avg=" << stats.getAvg() 124 | << ", max=" << stats.getMax() << ", n=" << stats.getCount(); 125 | return os; 126 | } 127 | 128 | } // namepsace Stats 129 | 130 | #endif /* STATS_HPP_ */ 131 | -------------------------------------------------------------------------------- /table.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * table.hpp 3 | * 4 | * Simple tabular output. 5 | */ 6 | 7 | #include "table.hpp" 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | namespace table { 17 | 18 | 19 | 20 | /** return a vector of sizes for each element */ 21 | std::vector Row::getSizes() const { 22 | std::vector sizes; 23 | for (const auto& e : elems_) { 24 | sizes.push_back(e.size()); 25 | } 26 | return sizes; 27 | } 28 | 29 | 30 | std::string Row::justify(const ColInfo& cinfo, const std::string& e, size_t w) const { 31 | // left pad 32 | std::stringstream ss; 33 | ss << std::setw(w) << (cinfo.justify == ColInfo::LEFT ? std::left : std::right) << e; 34 | auto s = ss.str(); 35 | assert(s.size() == w); 36 | return s; 37 | } 38 | 39 | 40 | /** 41 | * Get a reference to the ColInfo object for the given column, which lets you 42 | * set column-global info such as the justification. 43 | */ 44 | ColInfo& Table::colInfo(size_t col) { 45 | if (col >= colinfo_.size()) { 46 | colinfo_.resize(col + 1); 47 | } 48 | return colinfo_.at(col); 49 | } 50 | 51 | /* in the cost case, return a default ColInfo if it doesn't exist */ 52 | ColInfo Table::colInfo(size_t col) const { 53 | return col < colinfo_.size() ? colinfo_.at(col) : ColInfo{}; 54 | } 55 | 56 | Row& Table::newRow() { 57 | rows_.push_back(Row{*this}); 58 | return rows_.back(); 59 | } 60 | 61 | /** return the current representation of the table as a string */ 62 | std::string Table::str() const { 63 | 64 | // calculate max row sizes 65 | std::vector max_sizes; 66 | for (const auto& r : rows_) { 67 | std::vector sizes = r.getSizes(); 68 | for (size_t c = 0; c < sizes.size(); c++) { 69 | size_t row_size = sizes[c]; 70 | if (c >= max_sizes.size()) { 71 | assert(max_sizes.size() == c); 72 | max_sizes.push_back(row_size); 73 | } else { 74 | max_sizes[c] = std::max(max_sizes[c], row_size); 75 | } 76 | } 77 | } 78 | 79 | std::stringstream ss; 80 | for (const auto& r : rows_) { 81 | r.str(ss, max_sizes); 82 | ss << "\n"; 83 | } 84 | 85 | return ss.str(); 86 | } 87 | 88 | 89 | void Row::str(std::ostream& os, const std::vector sizes) const 90 | { 91 | bool first = true; 92 | for (size_t c = 0; c < elems_.size(); c++) { 93 | const auto& e = elems_[c]; 94 | assert(c < sizes.size()); 95 | if (!first) os << table_->sep; // inter-cell padding 96 | first = false; 97 | os << justify(table_->colInfo(c), e, sizes[c]); 98 | } 99 | } 100 | 101 | std::string Table::csv_str() const { 102 | std::string out; 103 | for (const auto& r : rows_) { 104 | r.csv_str(out); 105 | out += '\n'; 106 | } 107 | return out; 108 | } 109 | 110 | void Row::csv_str(std::string& out) const { 111 | bool first = true; 112 | for (auto& cell : elems_) { 113 | out += first ? "" : ","; 114 | out += cell; 115 | first = false; 116 | } 117 | } 118 | 119 | Row& Row::add_string(const std::string& s) { 120 | elems_.push_back(s); 121 | return *this; 122 | } 123 | 124 | 125 | template 126 | Row& add_inner(Row& r, const T& elem) { 127 | std::stringstream ss; 128 | ss << elem; 129 | return r.add_string(ss.str()); 130 | } 131 | 132 | #define DEFINE_ADD(T) Row& Row::add(T t) { return add_inner(*this, t); } 133 | SUPPORTED_TYPES_X(DEFINE_ADD) 134 | 135 | } -------------------------------------------------------------------------------- /table.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * table.hpp 3 | * 4 | * Simple tabular output. 5 | */ 6 | 7 | #ifndef TABLE_HPP_ 8 | #define TABLE_HPP_ 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #ifdef TABLE_ARBITRARY_ELEMS 16 | #include 17 | #include 18 | #endif 19 | 20 | #define SUPPORTED_TYPES_X(fn) \ 21 | fn(int) \ 22 | fn(long) \ 23 | fn(long long) \ 24 | fn(unsigned int) \ 25 | fn(unsigned long) \ 26 | fn(unsigned long long) \ 27 | fn(double) \ 28 | fn(const char *) \ 29 | fn(const std::string& ) 30 | 31 | namespace table { 32 | 33 | class Table; 34 | 35 | struct ColInfo { 36 | enum Justify { LEFT, RIGHT } justify; 37 | ColInfo() : justify(LEFT) {} 38 | }; 39 | 40 | /* 41 | * Given a printf-style format and args, return the formatted string as a std::string. 42 | * 43 | * See https://stackoverflow.com/a/26221725/149138. 44 | */ 45 | template 46 | std::string string_format(const std::string& format, Args ... args) { 47 | int size = snprintf( nullptr, 0, format.c_str(), args ... ) + 1; // Extra space for '\0' 48 | if (size < 0) { 49 | throw std::runtime_error("failed while formatting: " + format); 50 | } 51 | char* buf = new char[size + 1]; 52 | snprintf( buf, size + 1, format.c_str(), args ... ); 53 | assert(buf[size] == '\0'); 54 | std::string ret(buf); // We don't want the '\0' inside 55 | delete [] buf; 56 | return ret; 57 | } 58 | 59 | class Row { 60 | friend Table; 61 | using row_t = std::vector; 62 | 63 | const Table* table_; 64 | row_t elems_; 65 | 66 | Row(const Table& table) : table_(&table) {} 67 | 68 | /** return a vector of sizes for each element */ 69 | std::vector getSizes() const; 70 | 71 | void str(std::ostream& os, const std::vector sizes) const; 72 | void csv_str(std::string& out) const; 73 | 74 | std::string justify(const ColInfo& cinfo, const std::string& e, size_t w) const; 75 | 76 | public: 77 | 78 | #ifdef TABLE_ARBITRARY_ELEMENTS 79 | /** add a cell to this row with the given element, returns a reference to this row */ 80 | template 81 | Row& add(const T& elem) { 82 | std::stringstream ss; 83 | ss << elem; 84 | elems_.push_back(ss.str()); 85 | return *this; 86 | } 87 | #endif 88 | 89 | #define DECLARE_ADD(T) Row& add(T); 90 | SUPPORTED_TYPES_X(DECLARE_ADD) 91 | 92 | Row& add_string(const std::string& s); 93 | 94 | /** 95 | * Add a formatted cell to this row with the given element. 96 | * The format is a printf-style format string and any additional arguments are the format arguments. 97 | * Returns a reference to this row. 98 | */ 99 | template 100 | Row& addf(const char* format, Args ... args) { 101 | elems_.push_back(string_format(format, args...)); 102 | return *this; 103 | } 104 | }; 105 | 106 | class Table { 107 | friend Row; 108 | using table_t = std::vector; 109 | using colinfo_t = std::vector; 110 | 111 | table_t rows_; 112 | colinfo_t colinfo_; 113 | std::string sep; 114 | 115 | public: 116 | 117 | Table() : sep(" ") {} 118 | 119 | /** 120 | * Get a reference to the ColInfo object for the given column, which lets you 121 | * set column-global info such as the justification. 122 | */ 123 | ColInfo& colInfo(size_t col); 124 | 125 | /* in the cost case, return a default ColInfo if it doesn't exist */ 126 | ColInfo colInfo(size_t col) const; 127 | 128 | Row& newRow(); 129 | 130 | /** return a representation of the table as a human readable, column-aligned string */ 131 | std::string str() const; 132 | 133 | /** return the table as csv without padding */ 134 | std::string csv_str() const; 135 | 136 | void setColColumnSeparator(std::string s) { 137 | sep = s; 138 | } 139 | 140 | }; 141 | 142 | } 143 | 144 | 145 | #endif /* TABLE_HPP_ */ 146 | -------------------------------------------------------------------------------- /tsc-support.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * tsc-support.cpp 3 | */ 4 | 5 | #include "tsc-support.hpp" 6 | #include "cpuid.hpp" 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include 17 | #include 18 | 19 | using std::uint32_t; 20 | 21 | static FILE* out = stdout; 22 | 23 | #define VERBOSE(...) do { if (out) std::fprintf(out, __VA_ARGS__); } while (false) 24 | 25 | uint64_t get_tsc_from_cpuid_inner() { 26 | if (cpuid_highest_leaf() < 0x15) { 27 | VERBOSE("CPUID doesn't support leaf 0x15, falling back to manual TSC calibration.\n"); 28 | return 0; 29 | } 30 | 31 | auto cpuid15 = cpuid(0x15); 32 | VERBOSE("cpuid = %s\n", cpuid15.to_string().c_str()); 33 | 34 | if (cpuid15.ecx) { 35 | // the crystal frequency was present in ECX 36 | return (uint64_t)cpuid15.ecx * cpuid15.ebx / cpuid15.eax; 37 | } 38 | 39 | // ecx == 0 means we have to use a hard-coded frequency based on the model and table provided by Intel 40 | // in 18.7.3 41 | auto family = get_family_model(); 42 | VERBOSE("cpu: %s\n", family.to_string().c_str()); 43 | 44 | 45 | if (family.family == 6) { 46 | if (family.model == 0x4E || family.model == 0x5E || family.model == 0x8E || family.model == 0x9E) { 47 | // skylake client or kabylake 48 | return (int64_t)24000000 * cpuid15.ebx / cpuid15.eax; // 24 MHz crystal clock 49 | } 50 | } else { 51 | VERBOSE("CPU family not 6 (perhaps AMD or old Intel), falling back to manual TSC calibration.\n"); 52 | } 53 | 54 | return 0; 55 | } 56 | 57 | uint64_t get_tsc_from_cpuid() { 58 | static auto cached = get_tsc_from_cpuid_inner(); 59 | return cached; 60 | } 61 | 62 | 63 | namespace Clock { 64 | static inline uint64_t nanos() { 65 | struct timespec ts; 66 | clock_gettime(CLOCK_MONOTONIC, &ts); 67 | return (uint64_t)ts.tv_sec * 1000000000 + ts.tv_nsec; 68 | } 69 | } 70 | 71 | constexpr size_t SAMPLES = 101; 72 | constexpr uint64_t DELAY_NANOS = 10000; // nanos 1us 73 | 74 | uint64_t do_sample() { 75 | _mm_lfence(); 76 | uint64_t nsbefore = Clock::nanos(); 77 | uint64_t tscbefore = rdtsc(); 78 | while (nsbefore + DELAY_NANOS > Clock::nanos()) 79 | ; 80 | uint64_t nsafter = Clock::nanos(); 81 | uint64_t tscafter = rdtsc(); 82 | return (tscafter - tscbefore) * 1000000000u / (nsafter - nsbefore); 83 | } 84 | 85 | uint64_t tsc_from_cal() { 86 | std::array samples; 87 | 88 | for (size_t s = 0; s < SAMPLES * 2; s++) { 89 | samples[s] = do_sample(); 90 | } 91 | 92 | // throw out the first half of samples as a warmup 93 | std::array second_half; 94 | std::copy(samples.begin() + SAMPLES, samples.end(), second_half.begin()); 95 | std::sort(second_half.begin(), second_half.end()); 96 | 97 | // average the middle quintile 98 | auto third_quintile = second_half.begin() + 2 * SAMPLES/5; 99 | uint64_t sum = std::accumulate(third_quintile, third_quintile + SAMPLES/5, (uint64_t)0); 100 | 101 | return sum / (SAMPLES/5); 102 | } 103 | 104 | /** 105 | * TSC frequency detection is described in 106 | * Intel SDM Vol3 18.7.3: Determining the Processor Base Frequency 107 | * 108 | * Nominal TSC frequency = ( CPUID.15H.ECX[31:0] * CPUID.15H.EBX[31:0] ) ÷ CPUID.15H.EAX[31:0] 109 | */ 110 | uint64_t get_tsc_freq(bool force_calibrate) { 111 | uint64_t tsc; 112 | if (!force_calibrate && (tsc = get_tsc_from_cpuid())) { 113 | return tsc; 114 | } 115 | 116 | return tsc_from_cal(); 117 | } 118 | 119 | 120 | const char* get_tsc_cal_info(bool force_calibrate) { 121 | if (!force_calibrate && get_tsc_from_cpuid()) { 122 | return "from cpuid leaf 0x15"; 123 | } else { 124 | return "from calibration loop"; 125 | } 126 | 127 | } 128 | 129 | void set_logging_file(FILE *file) { 130 | out = file; 131 | } 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | -------------------------------------------------------------------------------- /tsc-support.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * tsc-support.cpp 3 | */ 4 | 5 | #include 6 | #include 7 | 8 | #ifdef _MSC_VER 9 | #include 10 | #else 11 | #include 12 | #endif 13 | 14 | static inline uint64_t rdtsc() { 15 | return __rdtsc(); 16 | } 17 | 18 | /** 19 | * Get the TSC frequency. 20 | * 21 | * By default, this tries to read the TSC frequency directly from cpuid leaf 0x15, 22 | * if it is on a supported architecture, otherwise it falls back to using a calibration 23 | * loop. If force_calibrate is true, it always uses the calibration loop and never reads 24 | * from cpuid. 25 | */ 26 | std::uint64_t get_tsc_freq(bool force_calibrate); 27 | 28 | /** return a string describing how the TSC frequency was determined */ 29 | const char* get_tsc_cal_info(bool force_calibrate); 30 | 31 | /** set the file to log messages to, or nullptr if messages should simply be dropped */ 32 | void set_logging_file(FILE *file); 33 | -------------------------------------------------------------------------------- /util.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * util.hpp 3 | */ 4 | 5 | #ifndef UTIL_HPP_ 6 | #define UTIL_HPP_ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | /* 14 | * Split a string delimited by sep. 15 | * 16 | * See https://stackoverflow.com/a/7408245/149138 17 | */ 18 | static inline std::vector split(const std::string &text, const std::string &sep) { 19 | std::vector tokens; 20 | std::size_t start = 0, end = 0; 21 | while ((end = text.find(sep, start)) != std::string::npos) { 22 | tokens.push_back(text.substr(start, end - start)); 23 | start = end + sep.length(); 24 | } 25 | tokens.push_back(text.substr(start)); 26 | return tokens; 27 | } 28 | 29 | template 30 | static inline std::string join(const C& c, std::string sep) { 31 | std::string ret; 32 | for (auto& e : c) { 33 | if (!ret.empty()) { 34 | ret += sep; 35 | } 36 | ret += std::to_string(e); 37 | } 38 | return ret; 39 | } 40 | 41 | /** 42 | * Like std::transform, but allocates and returns a std::vector for the result. 43 | */ 44 | template 45 | auto transformr(Itr begin, Itr end, F f) -> std::vector::value_type>()))> { 46 | decltype(transformr(begin, end, f)) ret; 47 | ret.reserve(std::distance(begin, end)); 48 | std::transform(begin, end, std::back_inserter(ret), f); 49 | return ret; 50 | } 51 | 52 | template 53 | auto transformv(const C& c, F f) -> std::vector { 54 | return transformr(std::begin(c), std::end(c), f); 55 | } 56 | 57 | template 58 | static inline auto concurrency(Itr start, Itr stop) -> typename std::iterator_traits::value_type { 59 | if (start == stop) { 60 | return {0, 0}; // early out for empty range simplifies some logic below 61 | } 62 | 63 | using T1 = decltype(concurrency(start, stop).first); 64 | 65 | struct event { 66 | T1 stamp; 67 | enum Type { START, STOP } type; 68 | event(T1 stamp, Type type) : stamp{stamp}, type{type} {} 69 | }; 70 | 71 | std::vector events; 72 | events.reserve(std::distance(start, stop)); 73 | T1 sum_top{}, sum_bottom{}; 74 | for (Itr i = start; i != stop; i++) { 75 | sum_top += i->second - i->first; 76 | events.emplace_back(i->first, event::START); 77 | events.emplace_back(i->second,event::STOP); 78 | } 79 | 80 | std::sort(events.begin(), events.end(), [](event l, event r){ return l.stamp < r.stamp; }); 81 | 82 | size_t count = 0; 83 | const event* last_event = nullptr; 84 | for (auto& event : events) { 85 | assert(count > 0 || event.type == event::START); 86 | if (count != 0) { 87 | assert(last_event); 88 | T1 period = event.stamp - last_event->stamp; 89 | // active interval, accumulate the numerator and denominators 90 | sum_bottom += period; 91 | } 92 | last_event = &event; 93 | count += event.type == event::START ? 1 : -1; 94 | } 95 | 96 | assert(count == 0); 97 | 98 | return {sum_top, sum_bottom}; 99 | } 100 | 101 | /** 102 | * Nested concurrency. 103 | * 104 | * Returns a pair, where second is the sum of all of the inner intervals, and first is the weighted sum of 105 | * all of the inner interval, weighted by the number of concurrent outer intervals. That is, if there are 106 | * two concurrent outer intervals for the entire period of an inner interval, the value contributed to first 107 | * is twice the size of its interval. 108 | * 109 | * This calculates a concurrency value somewhat like concurrency(), except that the evaluated intervals are 110 | * broken into two sets: inner and outer (although these names are somewhat arbitrary). The returned value 111 | * is the concurrency of the inner ranges evaluated against the outer ranges. That is, the concurrency value 112 | * at any point for any nested range is not related to any other concurrent nested ranges, but the number of 113 | * concurrent outer ranges. 114 | * 115 | * This intuition is that this is a useful for figure for evaluating concurrent benchmarks where each benchmark 116 | * thread has a nested structure like: 117 | * 118 | * { 119 | * // OUTER region 120 | * { 121 | * // INNER region 122 | * } 123 | * } 124 | * 125 | * That is, the OUTER region encloses the INNER. In this scenio, the INNER region may be the timed one, while the OUTER 126 | * region is performing the same type of operations as the INNER, but not timed. In particular, the effect on other 127 | * threads is similar in the OUTER and INNER regions. One may way to evaluate whether all INNER regions were executed 128 | * during a time when the OUTER region on all other threads was active. 129 | */ 130 | template 131 | static inline auto nested_concurrency(Itr starto, Itr stopo, Itr starti, Itr stopi) -> typename std::iterator_traits::value_type { 132 | if (starti == stopi) { 133 | return {0, 0}; // early out for empty range simplifies some logic below 134 | } 135 | 136 | using T1 = decltype(nested_concurrency(stopo, stopo, stopo, stopo).first); 137 | 138 | struct event { 139 | T1 stamp; 140 | enum Type { STARTO, STOPO, STARTI, STOPI } type; 141 | event(T1 stamp, Type type) : stamp{stamp}, type{type} {} 142 | }; 143 | 144 | std::vector events; 145 | events.reserve(std::distance(starto, stopo) + std::distance(starti, stopi)); 146 | for (Itr i = starto; i != stopo; i++) { 147 | events.emplace_back(i->first, event::STARTO); 148 | events.emplace_back(i->second,event::STOPO); 149 | } 150 | T1 sum_top{}, sum_bottom{}; 151 | for (Itr i = starti; i != stopi; i++) { 152 | sum_bottom += i->second - i->first; 153 | events.emplace_back(i->first, event::STARTI); 154 | events.emplace_back(i->second,event::STOPI); 155 | } 156 | 157 | std::sort(events.begin(), events.end(), [](event l, event r){ return l.stamp < r.stamp; }); 158 | 159 | /* 00011242110 == 12 (out of a possible 11 * 2 == 22) 160 | * IIIII 161 | * IIIIII 162 | * CCCCC 163 | * CCC 164 | */ 165 | size_t ocount = 0, icount = 0; 166 | T1 last_stamp = events.front().stamp; 167 | for (auto& event : events) { 168 | sum_top += ocount * icount * (event.stamp - last_stamp); 169 | switch (event.type) { 170 | case event::STARTO: 171 | ocount++; 172 | break; 173 | case event::STOPO: 174 | assert(ocount > 0); 175 | ocount--; 176 | break; 177 | case event::STARTI: 178 | icount++; 179 | break; 180 | case event::STOPI: 181 | assert(icount > 0); 182 | icount--; 183 | break; 184 | } 185 | last_stamp = event.stamp; 186 | } 187 | 188 | assert(ocount == 0); 189 | assert(icount == 0); 190 | 191 | return {sum_top, sum_bottom}; 192 | } 193 | 194 | /** 195 | * Linearly remap value from the input range to the output range. That is, return the value that represents in outrange the 196 | * relative position of the input value in outrange. 197 | */ 198 | static inline double remap(double value, double inrange_start, double inrange_end, double outrange_start, double outrange_end) { 199 | return outrange_start + (outrange_end - outrange_start) / (inrange_end - inrange_start) * (value - inrange_start); 200 | } 201 | 202 | /** 203 | * The concurrency ratio for the pairs in the range [start, stop). 204 | * 205 | * Intuitively, a ratio of 1.0 means maximum overlap, while a ratio of 0.0 means all 206 | * the ranges were distinct. 207 | * 208 | * Simply a shortcut for (c.first - c.second) / (c.second * std::distance(start, stop)) where 209 | * c = concurrency(start, stop). 210 | */ 211 | template 212 | static inline double conc_ratio(Itr start, Itr stop) { 213 | size_t N = std::distance(start, stop); 214 | if (N == 1) { 215 | return 1.0; // special "by definition" case since remap doesn't work in this case 216 | } 217 | auto conc = concurrency(start, stop); 218 | // gives a ratio between N and 1 where N is the number of ranges 219 | double raw_ratio = conc.first/((double)conc.second); 220 | return remap(raw_ratio, 1.0, N, 0.0, 1.0); 221 | } 222 | 223 | template 224 | static inline double nconc_ratio(Itr starto, Itr stopo, Itr starti, Itr stopi) { 225 | size_t ocount = std::distance(starto, stopo); 226 | if (ocount == 0) { 227 | return 0.0; 228 | } 229 | auto conc = nested_concurrency(starto, stopo, starti, stopi); 230 | // gives a ratio between N and 1 where N is the number of ranges 231 | double raw_ratio = conc.first/((double)conc.second); 232 | if (ocount == 1) { 233 | return raw_ratio; 234 | } 235 | return remap(raw_ratio, 1, ocount, 0.0, 1.0); 236 | } 237 | 238 | 239 | #endif /* UTIL_HPP_ */ 240 | --------------------------------------------------------------------------------