├── AUTHORS ├── CMakeLists.txt ├── Doxyfile ├── README-BENCHMARKS-TESTING.md ├── README.md ├── TODO.dox ├── bench ├── CMakeLists.txt ├── memlat │ ├── CMakeLists.txt │ └── memlat.c ├── multilat │ ├── CMakeLists.txt │ └── multilat.c └── new_memlat │ ├── CMakeLists.txt │ ├── memlat.c │ └── memlat.sh ├── benchmark-tests ├── bandwidth-model-building.sh ├── memlat-bench-test-10M-single-socket.sh ├── memlat-bench-test-10M.sh ├── memlat-orig-lat-test-single-socket.sh ├── memlat-orig-lat-test.sh ├── nvmemul-bandwidth.ini ├── nvmemul-debug.ini ├── nvmemul-orig.ini └── nvmemul.ini ├── license.txt ├── nvmemul-orig.ini ├── nvmemul.dox ├── nvmemul.ini ├── scripts ├── install.sh ├── runenv.sh ├── setupdev.sh └── turboboost.sh ├── src ├── CMakeLists.txt ├── dev │ ├── CMakeLists.txt │ ├── Makefile │ ├── ioctl_query.h │ └── pmc.c └── lib │ ├── CMakeLists.txt │ ├── config.c │ ├── config.h │ ├── cpu │ ├── CMakeLists.txt │ ├── cpu.c │ ├── cpu.h │ ├── haswell-papi.h │ ├── haswell.h │ ├── ivybridge-papi.h │ ├── ivybridge.h │ ├── known_cpus.h │ ├── pmc-papi.c │ ├── pmc-papi.h │ ├── pmc.c │ ├── pmc.h │ ├── sandybridge-papi.h │ ├── sandybridge.h │ └── xeon-ex.h │ ├── debug.c │ ├── debug.h │ ├── dev.c │ ├── dev.h │ ├── errno.h │ ├── error.h │ ├── init.c │ ├── interpose.c │ ├── interpose.h │ ├── measure.h │ ├── measure_bw.c │ ├── measure_lat.c │ ├── misc.c │ ├── misc.h │ ├── model.h │ ├── model_bw.c │ ├── model_lat.c │ ├── monotonic_timer.c │ ├── monotonic_timer.h │ ├── pflush.c │ ├── pflush.h │ ├── pmalloc.c │ ├── pmalloc.h │ ├── process_rank.c │ ├── stat.c │ ├── stat.h │ ├── thread.c │ ├── thread.h │ ├── topology.c │ └── topology.h └── test ├── CMakeLists.txt ├── test_dev.cc ├── test_interpose.cc ├── test_multithread.c ├── test_mutex.cc ├── test_nvm.c ├── test_nvm_remote_dram.c └── test_thread.cc /AUTHORS: -------------------------------------------------------------------------------- 1 | Haris Volos (haris.volos@hpe.com) 2 | Guilherme Magalhaes (guilherme.magalhaes@hpe.com) 3 | Lucy Cherkasova (lucy.cherkasova@gmail.com) 4 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8) 2 | 3 | #add_subdirectory(third_party) 4 | add_subdirectory(src) 5 | add_subdirectory(bench) 6 | enable_testing() 7 | #add_subdirectory(test) 8 | -------------------------------------------------------------------------------- /README-BENCHMARKS-TESTING.md: -------------------------------------------------------------------------------- 1 | **For testing whether your environment is configured correctly for 2 | running Quartz** (e.g., whether you set all the required environmental 3 | variables, etc.) **we have created a few scripts with benchmarks, which 4 | can be executed automatically** and which can provide you with a 5 | feedback on Quartz performance in your environment. 6 | 7 | **The directory with these scripts is called: *benchmark-tests*. There are three scripts which you can run:** 8 | - **bandwidth-model-building.sh** 9 | 10 | This script will execute for approximately **10 min** and will build a memory 11 | bandwidth model that can be used in the experiments with memory bandwidth 12 | throttling. The configuration file uses a "debug" mode on purpose -- that 13 | you can see the messages on the screen about the progress of the memory 14 | bandwidth model building, which can be found at */tmp/bandwidth_model* 15 | 16 | - **memlat-orig-lat-test.sh** 17 | 18 | This script will measure your server hardware *memory access latency* in nanoseconds: local 19 | and remote (for two sockets servers). It will execute the test 20 times, and write the results in directory *ORIG-lat-test*. 20 | You can find the summary of the results in the file *ORIG-lat-test/final-hw-latency.txt*. 21 | It will have measurements like: 22 | 23 | FORMAT: 1_min_local 2_aver_local 3_max_local 4_min_remote 5_aver_remote 6_max_remote 24 | 91 91.9 92 152 163.9 176 25 | 26 | First three numbers show: minimal, average and maximum measured local 27 | memory access latency (in ns, over 20 measurements). The last three numbers 28 | show show similar measurements for access latency of the remote memory, 29 | i.e., in the second socket. 30 | 31 | - **memlat-bench-test-10M.sh** 32 | 33 | This script will execute memlat benchmark (pointer-chasing benchmark) with 34 | nine emulated memory access latencies: 200 ns, 300 ns,..., 1000 ns. 35 | It will run the benchmark with these emulated latencies in two settings: 36 | in the local socket (.i.e., emulating a higher memory access latency in the 37 | local socket) and similarly, in the remote socket. 38 | Each test is repeated 10 times: this is used for assessing the variability 39 | of your environment. In some cases, we had issues with TurboBoost mode, \ 40 | which did impact the quality of the emulation... 41 | This test might take **approx. 30 min to finish** (since it executes 180 tests), 42 | and will create two output directories: *FULL-RESULTS-test* and 43 | *SUMMARY-RESULTS-test* 44 | In the directory SUMMARY-RESULTS-test, you will find two files that 45 | summarize the outcome of the experiments in the local and remote sockets. 46 | The outcome should look like this: 47 | 48 | FORMAT: 1_emul_lat 2_min_meas_lat 3_aver_meas_lat 4_max_meas_lat 5_aver_error(%) 6_max_error(%) 49 | 200 177 197.9 204 1.05 11.5 50 | 300 259 289.5 300 3.5 13.6 51 | 400 354 382.6 395 4.3 11.5 52 | 500 468 485.8 490 2.8 6.4 53 | 600 554 575.3 585 4.1 7.6 54 | 700 640 666.6 681 4.7 8.5 55 | 800 749 766.4 776 4.2 6.3 56 | 900 851 866.2 871 3.7 5.4 57 | 1000 926 956.5 966 4.35 7.4 58 | 59 | The format is the following: 60 | 1st column: emulated latency (in nanoseconds) 61 | 2nd column: minimum measured latency (across 10 tests, in ns) 62 | 3d column: average measured latency (across 10 tests, in ns) 63 | 4th column: maximum measured latency (across 10 tests, in ns) 64 | 5th column: average error (between emulated and measured latencies, in %) 65 | 6th column: max error (between emulated and measured latencies, in %) 66 | 67 | One of the goals of the designed performance emulator is to provide a 68 | framework for application sensitivity studies under different 69 | latencies and memory bw. Even if you have 15% deviation (error) from 70 | the targeted emulated latencies, but the benchmark measurements are 71 | consistent -- this is a good sign that you can perform a good 72 | sensitivity study. 73 | -------------------------------------------------------------------------------- /TODO.dox: -------------------------------------------------------------------------------- 1 | /** 2 | \file 3 | 4 | \todo Improve performance counter API by making it more generic. For example, autogenerate pmc event_id using perf. 5 | \todo Currently we may interrupt a thread to form a new epoch while it is blocked. This might cause accumulation of overhead cycles. 6 | \todo Currently our bandwidth model cannot independently throttle read and write bandwidth as it relies on throttling DDR ACT transactions. We tried throttling DDR READ and DDR WRITE transactions but this didn't work. 7 | \todo Extend library to interpose on other synchronization events we care: semaphores, barriers, context switches, openMP sync primitives, etc. 8 | \todo Currently our library does not support context switching. Extent the device driver to properly handle context switching: keep track of per-thread cpu counters, introduce proper delay at context switch points. 9 | \todo Support uncacheable and write-through memory. 10 | \todo Signal SIGUSR1 should be dedicated to the emulator. If the application makes use of this signal, the emulator will not work. Figure out a way to fix this limitation. 11 | \todo Interpose pthread_cancel() e pthread_exit() to make sure the thread is always deregistered internally to the emulator? 12 | \todo CPU counters overflow is not currently handled. 13 | \todo Multiple processes emulation must be reviewed: log file per process, statistics report by process, process id and thread id indications in the log messages. 14 | \todo See Limitations section in the README file. 15 | */ 16 | -------------------------------------------------------------------------------- /bench/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(memlat) 2 | add_subdirectory(new_memlat) 3 | add_subdirectory(multilat) 4 | -------------------------------------------------------------------------------- /bench/memlat/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include_directories(${CMAKE_SOURCE_DIR}/src/lib) 2 | add_executable(memlat memlat.c) 3 | target_link_libraries(memlat nvmemul pthread) 4 | -------------------------------------------------------------------------------- /bench/memlat/memlat.c: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #define MAX_NUM_THREADS 512 21 | 22 | uint64_t g_seed, g_nchains, g_nelems, g_from_node_id, g_to_node_id, g_element_size, g_access_size; 23 | 24 | extern int measure_latency2(uint64_t seedin, int nchains, size_t nelems, int element_size, int access_size, int from_node_id, int to_node_id); 25 | 26 | static uint64_t safe_strtoull(const char *s) { 27 | char *ep; 28 | uint64_t r; 29 | assert(NULL != s && '\0' != *s); 30 | r = strtoull(s, &ep, 10); 31 | assert('\0' == *ep); 32 | return r; 33 | } 34 | 35 | 36 | void* worker(void* arg) 37 | { 38 | int latency_ns; 39 | 40 | latency_ns = measure_latency2(g_seed, g_nchains, g_nelems, g_element_size, g_access_size, g_from_node_id, g_to_node_id); 41 | printf("latency_ns: %d\n", latency_ns); 42 | 43 | return NULL; 44 | } 45 | int main(int argc, char *argv[]) { 46 | int i; 47 | uint64_t nthreads; 48 | pthread_t thread[MAX_NUM_THREADS]; 49 | 50 | if (9 != argc) { 51 | fprintf(stderr, "usage: %s PRNGseed Nthreads Nchains Nelems SZelem SZaccess from_node to_node\n", argv[0]); 52 | return 1; 53 | } 54 | g_seed = safe_strtoull(argv[1]); 55 | nthreads = safe_strtoull(argv[2]); 56 | g_nchains = safe_strtoull(argv[3]); 57 | g_nelems = safe_strtoull(argv[4]); 58 | g_element_size = safe_strtoull(argv[5]); 59 | g_access_size = safe_strtoull(argv[6]); 60 | g_from_node_id = safe_strtoull(argv[7]); 61 | g_to_node_id = safe_strtoull(argv[8]); 62 | 63 | for (i = 0; i< nthreads; i++) { 64 | pthread_create(&thread[i], NULL, worker, NULL); 65 | } 66 | for(i = 0 ; i < nthreads; i++) { 67 | pthread_join(thread[i], NULL); 68 | } 69 | return 0; 70 | } 71 | -------------------------------------------------------------------------------- /bench/multilat/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include_directories(${CMAKE_SOURCE_DIR}/src/lib) 2 | 3 | add_executable(multilat multilat.c) 4 | target_link_libraries(multilat nvmemul pthread) 5 | -------------------------------------------------------------------------------- /bench/new_memlat/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include_directories(${CMAKE_SOURCE_DIR}/src/lib) 2 | add_executable(new_memlat memlat.c) 3 | target_link_libraries(new_memlat nvmemul pthread) 4 | -------------------------------------------------------------------------------- /bench/new_memlat/memlat.c: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include "model.h" 20 | #include "thread.h" 21 | 22 | #define MAX_NUM_THREADS 512 23 | 24 | uint64_t g_seed, g_nchains, g_nelems, g_from_node_id, g_to_node_id, g_element_size, g_access_size; 25 | 26 | extern int measure_latency2(uint64_t seedin, int nchains, size_t nelems, int element_size, int access_size, int from_node_id, int to_node_id); 27 | 28 | static uint64_t safe_strtoull(const char *s) { 29 | char *ep; 30 | uint64_t r; 31 | assert(NULL != s && '\0' != *s); 32 | r = strtoull(s, &ep, 10); 33 | assert('\0' == *ep); 34 | return r; 35 | } 36 | 37 | extern latency_model_t latency_model; 38 | 39 | #ifdef MEMLAT_SUPPORT 40 | extern __thread int tls_hw_local_latency; 41 | extern __thread int tls_hw_remote_latency; 42 | extern __thread uint64_t tls_global_remote_dram; 43 | extern __thread uint64_t tls_global_local_dram; 44 | 45 | static inline uint64_t ns_to_cycles(int cpu_speed_mhz, int ns) 46 | { 47 | return (cpu_speed_mhz * ns) / 1000; 48 | } 49 | #endif 50 | 51 | void* worker(void* arg) 52 | { 53 | int latency_ns; 54 | #ifdef MEMLAT_SUPPORT 55 | uint64_t exp_stalls; 56 | uint64_t calc_nvm_accesses; 57 | uint64_t detected_hw_lat; 58 | uint64_t actual_lat = 0; 59 | uint64_t total_time; 60 | uint64_t fixed_latency_ns = 0; 61 | uint64_t nvm_accesses = 0; 62 | uint64_t nvm_hw_latency; 63 | #endif 64 | 65 | latency_ns = measure_latency2(g_seed, g_nchains, g_nelems, g_element_size, g_access_size, g_from_node_id, g_to_node_id); 66 | printf("latency_ns: %d ns\n", latency_ns); 67 | 68 | #ifdef MEMLAT_SUPPORT 69 | total_time = g_nelems * latency_ns; 70 | if (thread_self()->virtual_node->dram_node != thread_self()->virtual_node->nvram_node) { 71 | detected_hw_lat = ns_to_cycles(thread_self()->cpu_speed_mhz, tls_hw_remote_latency); 72 | if (tls_global_remote_dram > 0) { 73 | actual_lat = thread_self()->stall_cycles / tls_global_remote_dram; 74 | fixed_latency_ns = total_time / tls_global_remote_dram; 75 | nvm_accesses = tls_global_remote_dram; 76 | } 77 | nvm_hw_latency = tls_hw_remote_latency; 78 | } else { 79 | detected_hw_lat = ns_to_cycles(thread_self()->cpu_speed_mhz, tls_hw_local_latency); 80 | if (tls_global_local_dram > 0) { 81 | actual_lat = thread_self()->stall_cycles / tls_global_local_dram; 82 | fixed_latency_ns = total_time / tls_global_local_dram; 83 | nvm_accesses = tls_global_local_dram; 84 | } 85 | nvm_hw_latency = tls_hw_local_latency; 86 | } 87 | exp_stalls = g_nelems * detected_hw_lat; 88 | calc_nvm_accesses = thread_self()->stall_cycles / detected_hw_lat; 89 | 90 | printf("target latency: %d ns\n", latency_model.read_latency); 91 | printf("Error: %3.1f%%\n", (double)(abs(latency_model.read_latency - latency_ns)*100) / (double)latency_model.read_latency); 92 | printf("target NVM accesses: %ld\n", g_nelems); 93 | printf("detected HW latency: %ld ns\n", nvm_hw_latency); 94 | printf("detected HW latency: %ld cycles (detected_hw_lat making use of cpu_speed_mhz)\n", detected_hw_lat); 95 | printf("expected CPU stalls: %ld cycles (target_nvm_accesses * detected_hw_lat)\n", exp_stalls); 96 | printf("actual CPU stalls: %ld cycles\n", thread_self()->stall_cycles); 97 | printf("calculated NVM accesses: %ld (actual_cpu_stalls / detected_hw_lat)\n", calc_nvm_accesses); 98 | if (nvm_accesses != 0) { 99 | printf("actual NVM accesses: %ld\n", nvm_accesses); 100 | printf("actual latency: %ld cyles (actual_stalls / actual_nvm_accesses)\n", actual_lat); 101 | printf("fixed measured latency: %ld ns (total_chasing_time / actual_nvm_accesses)\n", fixed_latency_ns); 102 | printf("fixed latency error: %3.1f%%\n", (double)(abs(latency_model.read_latency - fixed_latency_ns)*100) / (double)latency_model.read_latency); 103 | } else { 104 | fixed_latency_ns = total_time / calc_nvm_accesses; 105 | printf("fixed measured latency: %ld ns (total_chasing_time / calculated_nvm_accesses)\n", fixed_latency_ns); 106 | printf("fixed latency error: %3.1f%%\n", (double)(abs(latency_model.read_latency - fixed_latency_ns)*100) / (double)latency_model.read_latency); 107 | } 108 | #endif 109 | return NULL; 110 | } 111 | int main(int argc, char *argv[]) { 112 | int i; 113 | uint64_t nthreads; 114 | pthread_t thread[MAX_NUM_THREADS]; 115 | 116 | if (9 != argc) { 117 | fprintf(stderr, "usage: %s PRNGseed Nthreads Nchains Nelems SZelem SZaccess from_node to_node\n", argv[0]); 118 | return 1; 119 | } 120 | g_seed = safe_strtoull(argv[1]); 121 | nthreads = safe_strtoull(argv[2]); 122 | g_nchains = safe_strtoull(argv[3]); 123 | g_nelems = safe_strtoull(argv[4]); 124 | g_element_size = safe_strtoull(argv[5]); 125 | g_access_size = safe_strtoull(argv[6]); 126 | g_from_node_id = safe_strtoull(argv[7]); 127 | g_to_node_id = safe_strtoull(argv[8]); 128 | 129 | for (i = 0; i< nthreads; i++) { 130 | pthread_create(&thread[i], NULL, worker, NULL); 131 | } 132 | for(i = 0 ; i < nthreads; i++) { 133 | pthread_join(thread[i], NULL); 134 | } 135 | return 0; 136 | } 137 | -------------------------------------------------------------------------------- /bench/new_memlat/memlat.sh: -------------------------------------------------------------------------------- 1 | ################################################################# 2 | #Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | #This program is free software; you can redistribute it and/or modify 4 | #it under the terms of the GNU General Public License as published by 5 | #the Free Software Foundation; either version 2 of the License, or (at 6 | #your option) any later version. This program is distributed in the 7 | #hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | #the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | #PURPOSE. See the GNU General Public License for more details. You 10 | #should have received a copy of the GNU General Public License along 11 | #with this program; if not, write to the Free Software Foundation, 12 | #Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ################################################################# 14 | #!/bin/bash 15 | 16 | # percentage of error as threshold to discard outliers, anything above this percentage will be discarded 17 | MAX_ERROR_PERCENTAGE=10 18 | # max number of tries to execute memlat 19 | MAX_TRIES=10 20 | 21 | 22 | TEMP_FILE=/tmp/tmp_memlat.out 23 | 24 | 25 | NVM_EMUL_PATH="`dirname $0`/../.." 26 | NELEMS=$1 27 | TARGET_DRAM=$2 28 | 29 | 30 | function usage() 31 | { 32 | echo "$0 [number of elements] [0=local dram|1=remote dram]" 33 | exit 1 34 | } 35 | 36 | function validate_decimal() 37 | { 38 | re='^[0-9]+$' 39 | if ! [[ $1 =~ $re ]] ; then 40 | return 1 41 | fi 42 | return 0 43 | } 44 | 45 | function check_parameters() 46 | { 47 | if [ $# -ne 2 ]; then 48 | echo "Incorrect arguments" 49 | usage 50 | fi 51 | 52 | validate_decimal ${NELEMS} 53 | 54 | if [ $? -ne 0 ]; then 55 | echo "Invalid number of arguments" 56 | usage 57 | fi 58 | 59 | if [ ${TARGET_DRAM} -ne 0 -a ${TARGET_DRAM} -ne 1 ]; then 60 | echo "Incorret dram target" 61 | usage 62 | fi 63 | } 64 | 65 | function verify_run 66 | { 67 | target=$(cat ${TEMP_FILE} | grep "target latency" | awk '{ print $3 }') 68 | measured=$(cat ${TEMP_FILE} | grep "measured latency" | awk '{ print $4 }') 69 | 70 | if [ ${measured} -gt ${target} ]; then 71 | delta=$(expr ${measured} - ${target}); 72 | else 73 | delta=$(expr ${target} - ${measured}); 74 | fi 75 | 76 | if [ ${target} -gt 0 ]; then 77 | error=$(expr ${delta} \* 100) 78 | error=$(expr ${error} \/ ${target}) 79 | else 80 | error=0 81 | fi 82 | 83 | 84 | if [ ${error} -gt ${MAX_ERROR_PERCENTAGE} ]; then 85 | return 1 86 | fi 87 | 88 | return 0 89 | } 90 | 91 | ############ MAIN ###################### 92 | 93 | check_parameters $* 94 | 95 | # execute memlat in loop until the result is within the threshold or the max tries is reached 96 | for (( c=0; c<${MAX_TRIES}; c++ )); do 97 | ${NVM_EMUL_PATH}/scripts/runenv.sh ${NVM_EMUL_PATH}/build/bench/new_memlat/new_memlat 1 1 1 ${NELEMS} 64 8 0 ${TARGET_DRAM} &> ${TEMP_FILE} 98 | 99 | verify_run 100 | 101 | ret=$? 102 | 103 | if [ ${ret} -eq 0 ]; then 104 | cat ${TEMP_FILE} | grep "measured latency" 105 | break 106 | fi 107 | done 108 | 109 | if [ ${ret} -ne 0 ]; then 110 | echo "Could not produce a valid run" 111 | fi 112 | 113 | rm -f ${TEMP_FILE} 114 | 115 | exit ${ret} 116 | -------------------------------------------------------------------------------- /benchmark-tests/bandwidth-model-building.sh: -------------------------------------------------------------------------------- 1 | ################################################################# 2 | #Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | #This program is free software; you can redistribute it and/or modify 4 | #it under the terms of the GNU General Public License as published by 5 | #the Free Software Foundation; either version 2 of the License, or (at 6 | #your option) any later version. This program is distributed in the 7 | #hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | #the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | #PURPOSE. See the GNU General Public License for more details. You 10 | #should have received a copy of the GNU General Public License along 11 | #with this program; if not, write to the Free Software Foundation, 12 | #Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ################################################################# 14 | #!/bin/bash 15 | 16 | echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor 17 | 18 | cp nvmemul-bandwidth.ini nvmemul.ini 19 | rm /tmp/bandwidth_model 20 | ../build/bench/memlat/memlat 1 1 1 1000000 64 8 0 0 21 | ../build/bench/memlat/memlat 1 1 1 1000000 64 8 0 0 22 | -------------------------------------------------------------------------------- /benchmark-tests/memlat-bench-test-10M-single-socket.sh: -------------------------------------------------------------------------------- 1 | ################################################################# 2 | #Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | #This program is free software; you can redistribute it and/or modify 4 | #it under the terms of the GNU General Public License as published by 5 | #the Free Software Foundation; either version 2 of the License, or (at 6 | #your option) any later version. This program is distributed in the 7 | #hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | #the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | #PURPOSE. See the GNU General Public License for more details. You 10 | #should have received a copy of the GNU General Public License along 11 | #with this program; if not, write to the Free Software Foundation, 12 | #Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ################################################################# 14 | #!/bin/bash 15 | 16 | #awk '($1~/physical_nodes/) {print;}' nvmemul.ini 17 | 18 | echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor 19 | 20 | dir_name_res=FULL-RESULTS-test 21 | dir_name_sum=SUMMARY-RESULTS-test 22 | 23 | rm -rf $dir_name_sum 24 | mkdir $dir_name_sum 25 | 26 | rm -f foo* 27 | rm -rf $dir_name_res 28 | mkdir $dir_name_res 29 | 30 | cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor >> $dir_name_res/foo-runs-test 31 | 32 | cp nvmemul-orig.ini nvmemul.ini 33 | ../build/bench/memlat/memlat 1 1 1 1000000 64 8 0 0 >foo 34 | 35 | 36 | for numchains in 1 37 | do 38 | for epoch in 10000 39 | do 40 | echo "#FORMAT #1_emul_lat(ns) #2_min_meas_lat(ns) #3_aver_meas_lat(ns) #4_max_meas_lat(ns) #5_aver_error(%) #6_max_error(%)" > $dir_name_sum/summary-nvm-lat-accuracy-epoch-$epoch-numchains-$numchains.txt 41 | 42 | for lat in 200 300 400 500 600 700 800 900 1000 43 | do 44 | awk 'BEGIN {read_lat = substr(ARGV[2],3); epoch_lat = substr(ARGV[3],3);} 45 | (!(NR==7 || NR==9 || NR==10 || $1~/physical_nodes/)){ print;} 46 | (NR==7){ print $1,$2, read_lat,";";} 47 | (NR==9){ print $1,$2, epoch_lat,";";} 48 | (NR==10){ print $1,$2, epoch_lat,";";} 49 | ($1~/physical_nodes/) {print $1,$2,"\"0\""";";} 50 | ' nvmemul-orig.ini v=$lat v=$epoch > foo-nvmemul-$lat-$epoch.ini 51 | mv foo-nvmemul-$lat-$epoch.ini nvmemul.ini 52 | echo "lat epoch chains" $lat $epoch $numchains >> $dir_name_res/foo-runs 53 | 54 | for time in 1 2 3 4 5 6 7 8 9 10 55 | do 56 | ../build/bench/memlat/memlat 1 1 $numchains 10000000 64 8 0 0 >> $dir_name_res/full_results-$lat-$epoch-$numchains.txt 57 | done 58 | grep latency_ns $dir_name_res/full_results-$lat-$epoch-$numchains.txt > $dir_name_res/results-$lat-$epoch-$numchains.txt 59 | awk 'BEGIN {max = 0; min = 1000000; sum = 0; aver=0.0; max_error=0.0; aver_error=0.0;read_lat = substr(ARGV[2],3);epoch_lat = substr(ARGV[3],3); MPL = substr(ARGV[4],3); } 60 | ($2 > max){max = $2;} 61 | ($2 < min){min = $2;} 62 | {sum=sum+$2; if ($2 < read_lat*1.0) {error=read_lat -$2} else {error=$2 - read_lat}; if (error > max_error) max_error=error;} 63 | END {aver=sum/NR; if (aver < read_lat*1.0) {aver_error = (read_lat - aver)*100.0/read_lat} else {aver_error = (aver - read_lat )*100.0/read_lat}; print read_lat, min,aver,max, aver_error,max_error*100.0/read_lat;} ' $dir_name_res/results-$lat-$epoch-$numchains.txt v=$lat v=$epoch v=$numchains >> $dir_name_sum/summary-nvm-lat-accuracy-epoch-$epoch-numchains-$numchains.txt 64 | 65 | done 66 | done 67 | done 68 | 69 | 70 | #FORMAT_summary-results: #1_nvm_lat(ns) #2_min_nvm_lat(ns) #3_aver_nvm_lat(ns) #4_max_nvm_lat(ns) #5_aver_error(%) #6_max_error(%) 71 | 72 | #parameter is nvm_lat 73 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /benchmark-tests/memlat-bench-test-10M.sh: -------------------------------------------------------------------------------- 1 | ################################################################# 2 | #Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | #This program is free software; you can redistribute it and/or modify 4 | #it under the terms of the GNU General Public License as published by 5 | #the Free Software Foundation; either version 2 of the License, or (at 6 | #your option) any later version. This program is distributed in the 7 | #hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | #the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | #PURPOSE. See the GNU General Public License for more details. You 10 | #should have received a copy of the GNU General Public License along 11 | #with this program; if not, write to the Free Software Foundation, 12 | #Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ################################################################# 14 | #!/bin/bash 15 | 16 | #awk '($1~/physical_nodes/) {print;}' nvmemul.ini 17 | 18 | num_sockets=$(cat /proc/cpuinfo | grep "physical id" | sort -u | wc -l) 19 | if [ $num_sockets -eq 1 ]; 20 | then 21 | echo "Single Socket" 22 | ./memlat-bench-test-10M-single-socket.sh 23 | exit 0 24 | fi 25 | 26 | echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor 27 | 28 | dir_name_res=FULL-RESULTS-test 29 | dir_name_sum=SUMMARY-RESULTS-test 30 | 31 | rm -rf $dir_name_sum 32 | mkdir $dir_name_sum 33 | 34 | rm -f foo* 35 | rm -rf $dir_name_res 36 | mkdir $dir_name_res 37 | 38 | cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor >> $dir_name_res/foo-runs-test 39 | 40 | cp nvmemul-orig.ini nvmemul.ini 41 | ../build/bench/memlat/memlat 1 1 1 1000000 64 8 0 1 >foo 42 | 43 | for conf in local remote 44 | do 45 | if [ $conf = local ]; then confpar=0 46 | else confpar=1 47 | fi 48 | for numchains in 1 49 | do 50 | for epoch in 10000 51 | do 52 | echo "#FORMAT #1_emul_lat(ns) #2_min_meas_lat(ns) #3_aver_meas_lat(ns) #4_max_meas_lat(ns) #5_aver_error(%) #6_max_error(%)" > $dir_name_sum/summary-nvm-lat-accuracy-$conf-epoch-$epoch-numchains-$numchains.txt 53 | 54 | for lat in 200 300 400 500 600 700 800 900 1000 55 | do 56 | awk 'BEGIN {read_lat = substr(ARGV[2],3); epoch_lat = substr(ARGV[3],3); config = substr(ARGV[4],3);} 57 | (!(NR==7 || NR==9 || NR==10 || $1~/physical_nodes/)){ print;} 58 | (NR==7){ print $1,$2, read_lat,";";} 59 | (NR==9){ print $1,$2, epoch_lat,";";} 60 | (NR==10){ print $1,$2, epoch_lat,";";} 61 | ($1~/physical_nodes/ && config ~ /local/) {print $1,$2,"\"0\""";";} 62 | ($1~/physical_nodes/ && config ~ /remote/) {print $1,$2,"\"0,1\""";";} 63 | ' nvmemul-orig.ini v=$lat v=$epoch v=$conf > foo-nvmemul-$lat-$epoch.ini 64 | mv foo-nvmemul-$lat-$epoch.ini nvmemul.ini 65 | echo "lat epoch chains" $lat $epoch $numchains >> $dir_name_res/foo-runs 66 | 67 | for time in 1 2 3 4 5 6 7 8 9 10 68 | do 69 | ../build/bench/memlat/memlat 1 1 $numchains 10000000 64 8 0 $confpar >> $dir_name_res/full_results-$conf-$lat-$epoch-$numchains.txt 70 | done 71 | grep latency_ns $dir_name_res/full_results-$conf-$lat-$epoch-$numchains.txt > $dir_name_res/results-$conf-$lat-$epoch-$numchains.txt 72 | awk 'BEGIN {max = 0; min = 1000000; sum = 0; aver=0.0; max_error=0.0; aver_error=0.0;read_lat = substr(ARGV[2],3);epoch_lat = substr(ARGV[3],3); MPL = substr(ARGV[4],3); } 73 | ($2 > max){max = $2;} 74 | ($2 < min){min = $2;} 75 | {sum=sum+$2; if ($2 < read_lat*1.0) {error=read_lat -$2} else {error=$2 - read_lat}; if (error > max_error) max_error=error;} 76 | END {aver=sum/NR; if (aver < read_lat*1.0) {aver_error = (read_lat - aver)*100.0/read_lat} else {aver_error = (aver - read_lat )*100.0/read_lat}; print read_lat, min,aver,max, aver_error,max_error*100.0/read_lat;} ' $dir_name_res/results-$conf-$lat-$epoch-$numchains.txt v=$lat v=$epoch v=$numchains >> $dir_name_sum/summary-nvm-lat-accuracy-$conf-epoch-$epoch-numchains-$numchains.txt 77 | 78 | done 79 | done 80 | done 81 | done 82 | 83 | 84 | #FORMAT_summary-results: #1_nvm_lat(ns) #2_min_nvm_lat(ns) #3_aver_nvm_lat(ns) #4_max_nvm_lat(ns) #5_aver_error(%) #6_max_error(%) 85 | 86 | #parameter is nvm_lat 87 | 88 | 89 | 90 | -------------------------------------------------------------------------------- /benchmark-tests/memlat-orig-lat-test-single-socket.sh: -------------------------------------------------------------------------------- 1 | ################################################################# 2 | #Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | #This program is free software; you can redistribute it and/or modify 4 | #it under the terms of the GNU General Public License as published by 5 | #the Free Software Foundation; either version 2 of the License, or (at 6 | #your option) any later version. This program is distributed in the 7 | #hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | #the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | #PURPOSE. See the GNU General Public License for more details. You 10 | #should have received a copy of the GNU General Public License along 11 | #with this program; if not, write to the Free Software Foundation, 12 | #Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ################################################################# 14 | #!/bin/bash 15 | 16 | #awk '($1~/physical_nodes/) {print;}' nvmemul.ini 17 | 18 | echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor 19 | 20 | dir_name_res=ORIG-lat-test 21 | 22 | rm -f foo* 23 | rm -rf $dir_name_res 24 | mkdir $dir_name_res 25 | 26 | 27 | cp nvmemul-debug.ini nvmemul.ini 28 | ../build/bench/memlat/memlat 1 1 1 1000000 64 8 0 0 29 | 30 | for time in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 31 | do 32 | ../build/bench/memlat/memlat 1 1 1 1000000 64 8 0 0 > $dir_name_res/foo-hw-latency.txt 33 | grep "measuring latency: latency is" $dir_name_res/foo-hw-latency.txt > $dir_name_res/foo 34 | awk 'NR==1 {local=$7;} 35 | END {print local}' $dir_name_res/foo >> $dir_name_res/list-hw-latency.txt 36 | done 37 | 38 | echo "#FORMAT:#1_min #2_aver #3_max" > $dir_name_res/final-hw-latency.txt 39 | 40 | awk 'BEGIN {max1 = 0.0; min1 = 10000000.0; sum1 = 0.0;} 41 | ($1 > max1){max1 = $1;} 42 | ($1 < min1){min1 = $1;} 43 | {sum1=sum1+$1;sum2=sum2+$2;} 44 | END {print min1, sum1/NR, max1;}' $dir_name_res/list-hw-latency.txt >> $dir_name_res/final-hw-latency.txt 45 | 46 | rm $dir_name_res/foo* 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /benchmark-tests/memlat-orig-lat-test.sh: -------------------------------------------------------------------------------- 1 | ################################################################# 2 | #Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | #This program is free software; you can redistribute it and/or modify 4 | #it under the terms of the GNU General Public License as published by 5 | #the Free Software Foundation; either version 2 of the License, or (at 6 | #your option) any later version. This program is distributed in the 7 | #hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | #the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | #PURPOSE. See the GNU General Public License for more details. You 10 | #should have received a copy of the GNU General Public License along 11 | #with this program; if not, write to the Free Software Foundation, 12 | #Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ################################################################# 14 | #!/bin/bash 15 | 16 | #awk '($1~/physical_nodes/) {print;}' nvmemul.ini 17 | 18 | num_sockets=$(cat /proc/cpuinfo | grep "physical id" | sort -u | wc -l) 19 | if [ $num_sockets -eq 1 ]; 20 | then 21 | echo "Single Socket" 22 | ./memlat-orig-lat-test-single-socket.sh 23 | exit 0 24 | fi 25 | 26 | echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor 27 | 28 | dir_name_res=ORIG-lat-test 29 | 30 | rm -f foo* 31 | rm -rf $dir_name_res 32 | mkdir $dir_name_res 33 | 34 | 35 | cp nvmemul-debug.ini nvmemul.ini 36 | ../build/bench/memlat/memlat 1 1 1 1000000 64 8 0 1 37 | 38 | #FORMAT: ns 39 | #FORMAT: min_local #2_aver_local max_local min_remote #5_aver_remote max_remote 40 | #FORMAT: 41 | 42 | for time in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 43 | do 44 | ../build/bench/memlat/memlat 1 1 1 1000000 64 8 0 1 > $dir_name_res/foo-hw-latency.txt 45 | grep "measuring latency: latency is" $dir_name_res/foo-hw-latency.txt > $dir_name_res/foo 46 | awk 'NR==1 {local=$7;} 47 | NR==2 {remote=$7;} 48 | END {print local , remote}' $dir_name_res/foo >> $dir_name_res/list-hw-latency.txt 49 | done 50 | 51 | echo "#FORMAT:#1_min_local #2_aver_local #3_max_local #4_min_remote #5_aver_remote #6_max_remote" > $dir_name_res/final-hw-latency.txt 52 | 53 | awk 'BEGIN {max1 = 0.0; min1 = 10000000.0; max2 = 0.0; min2 = 10000000.0; sum1 = 0.0; sum2 = 0.0;} 54 | ($1 > max1){max1 = $1;} 55 | ($1 < min1){min1 = $1;} 56 | ($2 > max2){max2 = $2;} 57 | ($2 < min2){min2 = $2;} 58 | {sum1=sum1+$1;sum2=sum2+$2;} 59 | END {print min1, sum1/NR, max1, min2, sum2/NR, max2 ;}' $dir_name_res/list-hw-latency.txt >> $dir_name_res/final-hw-latency.txt 60 | 61 | rm $dir_name_res/foo* 62 | 63 | #FORMAT: ns 64 | #FORMAT:#1_min_local #2_aver_local #3_max_local #4_min_remote #5_aver_remote #6_max_remote 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /benchmark-tests/nvmemul-bandwidth.ini: -------------------------------------------------------------------------------- 1 | # Configuration file 2 | 3 | latency: 4 | { 5 | enable = true; 6 | inject_delay = true; 7 | read = 1000; 8 | write = 1000; 9 | max_epoch_duration_us = 10000; 10 | min_epoch_duration_us = 10000; 11 | calibration = false; 12 | }; 13 | 14 | bandwidth: 15 | { 16 | enable = true; 17 | model = "/tmp/bandwidth_model"; 18 | read = 2000; 19 | write = 2000; 20 | }; 21 | 22 | topology: 23 | { 24 | mc_pci = "/tmp/mc_pci_bus"; 25 | physical_nodes = "0"; 26 | hyperthreading = true; # do not use multiple hardware threads per core 27 | }; 28 | 29 | statistics: 30 | { 31 | enable = true; 32 | #file = "/tmp/statistics"; 33 | }; 34 | 35 | debug: 36 | { 37 | # debugging level 38 | level = 5; 39 | verbose = 0; 40 | 41 | # modules set to True produce debugging output 42 | module: 43 | { 44 | all = False; 45 | }; 46 | }; 47 | -------------------------------------------------------------------------------- /benchmark-tests/nvmemul-debug.ini: -------------------------------------------------------------------------------- 1 | # Configuration file 2 | 3 | latency: 4 | { 5 | enable = true; 6 | inject_delay = true; 7 | read = 1000 ; 8 | write = 1000; 9 | max_epoch_duration_us = 10000 ; 10 | min_epoch_duration_us = 10000 ; 11 | calibration = false; 12 | }; 13 | 14 | bandwidth: 15 | { 16 | enable = false; 17 | model = "/tmp/bandwidth_model"; 18 | read = 2000; 19 | write = 2000; 20 | }; 21 | 22 | topology: 23 | { 24 | mc_pci = "/tmp/mc_pci_bus"; 25 | physical_nodes = "0,1"; 26 | hyperthreading = true; # do not use multiple hardware threads per core 27 | }; 28 | 29 | statistics: 30 | { 31 | enable = true; 32 | #file = "/tmp/statistics"; 33 | }; 34 | 35 | debug: 36 | { 37 | # debugging level 38 | level = 5; 39 | verbose = 0; 40 | 41 | # modules set to True produce debugging output 42 | module: 43 | { 44 | all = False; 45 | }; 46 | }; 47 | -------------------------------------------------------------------------------- /benchmark-tests/nvmemul-orig.ini: -------------------------------------------------------------------------------- 1 | # Configuration file 2 | 3 | latency: 4 | { 5 | enable = true; 6 | inject_delay = true; 7 | read = 1000 ; 8 | write = 1000; 9 | max_epoch_duration_us = 10000 ; 10 | min_epoch_duration_us = 10000 ; 11 | calibration = false; 12 | }; 13 | 14 | bandwidth: 15 | { 16 | enable = false; 17 | model = "/tmp/bandwidth_model"; 18 | read = 2000; 19 | write = 2000; 20 | }; 21 | 22 | topology: 23 | { 24 | mc_pci = "/tmp/mc_pci_bus"; 25 | physical_nodes = "0,1"; 26 | hyperthreading = true; # do not use multiple hardware threads per core 27 | }; 28 | 29 | statistics: 30 | { 31 | enable = true; 32 | #file = "/tmp/statistics"; 33 | }; 34 | 35 | debug: 36 | { 37 | # debugging level 38 | level = 3; 39 | verbose = 0; 40 | 41 | # modules set to True produce debugging output 42 | module: 43 | { 44 | all = False; 45 | }; 46 | }; 47 | -------------------------------------------------------------------------------- /benchmark-tests/nvmemul.ini: -------------------------------------------------------------------------------- 1 | # Configuration file 2 | 3 | latency: 4 | { 5 | enable = true; 6 | inject_delay = true; 7 | read = 300 ; 8 | write = 200; 9 | max_epoch_duration_us = 10000 ; 10 | min_epoch_duration_us = 10000 ; 11 | calibration = false; 12 | }; 13 | 14 | bandwidth: 15 | { 16 | enable = false; 17 | model = "/tmp/bandwidth_model"; 18 | read = 2000; 19 | write = 2000; 20 | }; 21 | 22 | topology: 23 | { 24 | mc_pci = "/tmp/mc_pci_bus"; 25 | physical_nodes = "0,1"; 26 | hyperthreading = true; # do not use multiple hardware threads per core 27 | }; 28 | 29 | statistics: 30 | { 31 | enable = true; 32 | #file = "/tmp/statistics"; 33 | }; 34 | 35 | debug: 36 | { 37 | # debugging level 38 | level = 5; 39 | verbose = 0; 40 | 41 | # modules set to True produce debugging output 42 | module: 43 | { 44 | all = False; 45 | }; 46 | }; 47 | -------------------------------------------------------------------------------- /license.txt: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | 15 | 16 | -------------------------------------------------------------------------------- /nvmemul-orig.ini: -------------------------------------------------------------------------------- 1 | # Configuration file 2 | 3 | latency: 4 | { 5 | enable = true; 6 | inject_delay = true; 7 | read = 1000 ; 8 | write = 1000; 9 | max_epoch_duration_us = 10000 ; 10 | min_epoch_duration_us = 10000 ; 11 | calibration = false; 12 | }; 13 | 14 | bandwidth: 15 | { 16 | enable = false; 17 | model = "/tmp/bandwidth_model"; 18 | read = 2000; 19 | write = 2000; 20 | }; 21 | 22 | topology: 23 | { 24 | mc_pci = "/tmp/mc_pci_bus"; 25 | physical_nodes = "0,1"; 26 | hyperthreading = true; # do not use multiple hardware threads per core 27 | }; 28 | 29 | statistics: 30 | { 31 | enable = true; 32 | #file = "/tmp/statistics"; 33 | }; 34 | 35 | debug: 36 | { 37 | # debugging level 38 | level = 3; 39 | verbose = 0; 40 | 41 | # modules set to True produce debugging output 42 | module: 43 | { 44 | all = False; 45 | }; 46 | }; 47 | -------------------------------------------------------------------------------- /nvmemul.dox: -------------------------------------------------------------------------------- 1 | /** 2 | 3 | @mainpage Quartz: A Lightweight Performance Emulator for Persistent Memory Software. 4 | 5 | 6 | \section section-intro Introduction 7 | 8 | Quartz: A DRAM-based performance emulation platform that leverages features 9 | available in commodity hardware to emulate different latency and bandwidth 10 | characteristics of future byte-addressable NVM technologies. 11 | 12 | */ 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /nvmemul.ini: -------------------------------------------------------------------------------- 1 | # Configuration file 2 | 3 | latency: 4 | { 5 | enable = true; 6 | inject_delay = true; 7 | read = 1000 ; 8 | write = 1000; 9 | max_epoch_duration_us = 10000 ; 10 | min_epoch_duration_us = 10000 ; 11 | calibration = false; 12 | }; 13 | 14 | bandwidth: 15 | { 16 | enable = false; 17 | model = "/tmp/bandwidth_model"; 18 | read = 500; 19 | write = 500; 20 | }; 21 | 22 | topology: 23 | { 24 | mc_pci = "/tmp/mc_pci_bus"; 25 | physical_nodes = "0,1"; 26 | hyperthreading = true; # do not use multiple hardware threads per core 27 | }; 28 | 29 | statistics: 30 | { 31 | enable = true; 32 | #file = "/tmp/statistics"; 33 | }; 34 | 35 | debug: 36 | { 37 | # debugging level 38 | level = 1; 39 | verbose = 0; 40 | 41 | # modules set to True produce debugging output 42 | module: 43 | { 44 | all = False; 45 | }; 46 | }; 47 | -------------------------------------------------------------------------------- /scripts/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ################################################################# 3 | #Copyright 2016 Hewlett Packard Enterprise Development LP. 4 | #This program is free software; you can redistribute it and/or modify 5 | #it under the terms of the GNU General Public License as published by 6 | #the Free Software Foundation; either version 2 of the License, or (at 7 | #your option) any later version. This program is distributed in the 8 | #hope that it will be useful, but WITHOUT ANY WARRANTY; without even 9 | #the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 10 | #PURPOSE. See the GNU General Public License for more details. You 11 | #should have received a copy of the GNU General Public License along 12 | #with this program; if not, write to the Free Software Foundation, 13 | #Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 14 | ################################################################# 15 | 16 | PAPI_MAJOR=5 17 | PAPI_MINOR=1 18 | PAPI_RELEASE=1 19 | 20 | CMAKE_MAJOR=2 21 | CMAKE_MINOR=8 22 | 23 | function install_deps_rpm() { 24 | yum install -q -y numactl-devel libconfig libconfig-devel cmake kernel-devel-`uname -r` msr-tools uthash-devel 25 | 26 | if [ $? -ne 0 ]; then 27 | echo "Dependencies installation failed" 28 | exit -1 29 | fi 30 | } 31 | 32 | function install_deps_deb() { 33 | apt-get install -y libnuma-dev libconfig-dev cmake msr-tools uthash-dev 34 | 35 | if [ $? -ne 0 ]; then 36 | echo "Dependencies installation failed" 37 | exit -1 38 | fi 39 | } 40 | 41 | function check_supported_papi() { 42 | major=`papi_version | cut -d ' ' -f3 | cut -d '-' -f1 | cut -d '.' -f1` 43 | minor=`papi_version | cut -d ' ' -f3 | cut -d '-' -f1 | cut -d '.' -f2` 44 | release=`papi_version | cut -d ' ' -f3 | cut -d '-' -f1 | cut -d '.' -f3` 45 | 46 | if [ ${major} -ne ${PAPI_MAJOR} ]; then 47 | echo "CMake version (${major}.${minor}.${release}) not supported (=${PAPI_MAJOR}.${PAPI_MINOR}.${PAPI_RELEASE})" 48 | exit -1 49 | fi 50 | if [ ${minor} -ne ${PAPI_MINOR} ]; then 51 | echo "CMake version (${major}.${minor}.${release}) not supported (=${PAPI_MAJOR}.${PAPI_MINOR}.${PAPI_RELEASE})" 52 | exit -1 53 | fi 54 | if [ ${release} -ne ${PAPI_RELEASE} ]; then 55 | echo "CMake version (${major}.${minor}.${release}) not supported (=${PAPI_MAJOR}.${PAPI_MINOR}.${PAPI_RELEASE})" 56 | exit -1 57 | fi 58 | } 59 | 60 | function check_supported_cmake() { 61 | major=`cmake -version | head -1 | cut -d ' ' -f3 | cut -d '-' -f1 | cut -d '.' -f1` 62 | minor=`cmake -version | head -1 | cut -d ' ' -f3 | cut -d '-' -f1 | cut -d '.' -f2` 63 | 64 | if [ ${major} -lt ${CMAKE_MAJOR} ]; then 65 | echo "CMake version (${major}.${minor}) not supported (>=${CMAKE_MAJOR}.${CMAKE_MINOR})" 66 | exit -1 67 | fi 68 | if [ ${major} -eq ${CMAKE_MAJOR} ]; then 69 | if [ ${minor} -lt ${CMAKE_MINOR} ]; then 70 | echo "CMake version (${major}.${minor}) not supported (>=${CMAKE_MAJOR}.${CMAKE_MINOR})" 71 | exit -1 72 | fi 73 | fi 74 | } 75 | 76 | function check_supported_versions() { 77 | check_supported_cmake 78 | # check_supported_papi 79 | } 80 | 81 | 82 | #################### MAIN #################### 83 | 84 | if [ $(id -u) -ne 0 ]; then 85 | echo "You mut be root to execute this script" 86 | exit -1 87 | fi 88 | 89 | if [ -f /etc/redhat-release ]; then 90 | install_deps_rpm 91 | elif [ -f /etc/centos-release ]; then 92 | install_deps_rpm 93 | elif [ -f /etc/debian_version -o -f /etc/debian-release ]; then 94 | install_deps_deb 95 | else 96 | echo "Linux distribution not supported" 97 | exit -1 98 | fi 99 | 100 | check_supported_versions 101 | 102 | -------------------------------------------------------------------------------- /scripts/runenv.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ################################################################# 3 | #Copyright 2016 Hewlett Packard Enterprise Development LP. 4 | #This program is free software; you can redistribute it and/or modify 5 | #it under the terms of the GNU General Public License as published by 6 | #the Free Software Foundation; either version 2 of the License, or (at 7 | #your option) any later version. This program is distributed in the 8 | #hope that it will be useful, but WITHOUT ANY WARRANTY; without even 9 | #the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 10 | #PURPOSE. See the GNU General Public License for more details. You 11 | #should have received a copy of the GNU General Public License along 12 | #with this program; if not, write to the Free Software Foundation, 13 | #Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 14 | ################################################################# 15 | 16 | NVM_EMUL_PATH="`dirname $0`/.." 17 | 18 | 19 | if [ -z "$1" ]; then 20 | echo "runenv.sh [cmd to run]" 21 | exit 1 22 | fi 23 | 24 | rootdir="$NVM_EMUL_PATH" 25 | bindir=$rootdir"/build" 26 | 27 | if [ -f /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor ]; then 28 | current_scaling=$(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor); 29 | 30 | if [ "${current_scaling}" != "performance" ]; then 31 | file_list=$(ls /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor) 32 | for cpu_file in ${file_list}; do 33 | echo "performance" | sudo tee ${cpu_file} > /dev/null 34 | done 35 | fi 36 | fi 37 | 38 | $rootdir/scripts/turboboost.sh disable 39 | 40 | v=$(uname -r | cut -d '.' -f1) 41 | if [ $v -ge 4 ]; then 42 | echo "2" | sudo tee /sys/bus/event_source/devices/cpu/rdpmc 43 | fi 44 | 45 | export LD_PRELOAD=$bindir"/src/lib/libnvmemul.so" 46 | export NVMEMUL_INI=$rootdir"/nvmemul.ini" 47 | 48 | if [ ! -f ${LD_PRELOAD} ]; then 49 | echo "Library not found. Compile the emulator's library first." 50 | exit -1 51 | fi 52 | 53 | echo $LD_PRELOAD 54 | echo $NVMEMUL_INI 55 | 56 | # execute the command passed as argument 57 | $@ 58 | 59 | -------------------------------------------------------------------------------- /scripts/setupdev.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ################################################################# 3 | #Copyright 2016 Hewlett Packard Enterprise Development LP. 4 | #This program is free software; you can redistribute it and/or modify 5 | #it under the terms of the GNU General Public License as published by 6 | #the Free Software Foundation; either version 2 of the License, or (at 7 | #your option) any later version. This program is distributed in the 8 | #hope that it will be useful, but WITHOUT ANY WARRANTY; without even 9 | #the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 10 | #PURPOSE. See the GNU General Public License for more details. You 11 | #should have received a copy of the GNU General Public License along 12 | #with this program; if not, write to the Free Software Foundation, 13 | #Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 14 | ################################################################# 15 | 16 | NVM_EMUL_PATH="`dirname $0`/.." 17 | 18 | device_name="nvmemul" 19 | device_module_name=${device_name}".ko" 20 | device_path="/dev/${device_name}" 21 | device_module_path=`find ${NVM_EMUL_PATH}/build -name ${device_module_name}` 22 | 23 | 24 | function loaddev { 25 | if [ -z "${device_module_path}" ]; then 26 | echo "Module not found. Compile the emulator's source code first." 27 | exit -1 28 | fi 29 | 30 | /sbin/insmod ${device_module_path} 2> /dev/null 31 | 32 | if [ $? -ne 0 ]; then 33 | lsmod | grep ${device_name} > /dev/null 34 | if [ $? -eq 0 ]; then 35 | echo "Kernel module already loaded, please reload it." 36 | exit 1 37 | fi 38 | echo "Kernel module loading failed" 39 | exit 1 40 | fi 41 | 42 | device_major=`grep ${device_name} /proc/devices | awk '{ print $1 }'` 43 | if [ $? -ne 0 -o -z "${device_major}" ]; then 44 | echo "Failed to detect module major" 45 | exit 1 46 | fi 47 | 48 | rm -f ${device_path} 49 | if [ $? -ne 0 ]; then 50 | echo "Failed to delete kernel module device file" 51 | exit 1 52 | fi 53 | 54 | mknod ${device_path} c ${device_major} 0 55 | chmod a+wr ${device_path} 56 | 57 | lsmod | grep ${device_name} > /dev/null 58 | 59 | if [ $? -eq 0 ]; then 60 | echo "Kernel module loaded successfully" 61 | else 62 | echo "kernel module loading failed" 63 | exit 1 64 | fi 65 | } 66 | 67 | function unloaddev { 68 | /sbin/rmmod ${device_name} 2> /dev/null 69 | rm -f ${device_path} 70 | if [ $? -eq 0 ]; then 71 | echo "Kernel module unloaded successfully" 72 | else 73 | echo "Failed to delete kernel module device file" 74 | exit 1 75 | fi 76 | } 77 | 78 | function help() { 79 | echo "$0 " 80 | } 81 | 82 | ### MAIN ### 83 | 84 | if [ $(id -u) -ne 0 ]; then 85 | echo "You mut be root to execute this script" 86 | exit -1 87 | fi 88 | 89 | if [ $# -eq 0 ]; then 90 | help 91 | exit 1 92 | fi 93 | 94 | if [ "$1" = "load" ] || [ "$1" = "l" ]; then 95 | loaddev 96 | elif [ "$1" = "unload" ] || [ "$1" = "u" ]; then 97 | unloaddev 98 | elif [ "$1" = "reload" ] || [ "$1" = "r" ]; then 99 | unloaddev 100 | loaddev 101 | else 102 | help 103 | exit 1 104 | fi 105 | 106 | exit 0 107 | -------------------------------------------------------------------------------- /scripts/turboboost.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ################################################################# 3 | #Copyright 2016 Hewlett Packard Enterprise Development LP. 4 | #This program is free software; you can redistribute it and/or modify 5 | #it under the terms of the GNU General Public License as published by 6 | #the Free Software Foundation; either version 2 of the License, or (at 7 | #your option) any later version. This program is distributed in the 8 | #hope that it will be useful, but WITHOUT ANY WARRANTY; without even 9 | #the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 10 | #PURPOSE. See the GNU General Public License for more details. You 11 | #should have received a copy of the GNU General Public License along 12 | #with this program; if not, write to the Free Software Foundation, 13 | #Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 14 | ################################################################# 15 | 16 | function usage() 17 | { 18 | echo "$0 [target CPU id]" 19 | echo -e "\tfunctions:" 20 | echo -e "\t\t check: verifies if a given CPU id has Turbo Boost enabled" 21 | echo -e "\t\t disable: disables a given CPU id or all CPUs if not specified" 22 | echo -e "\t\t enabled: enables a given CPU id or all CPUs if not specified" 23 | } 24 | 25 | function verify_cpu_id() 26 | { 27 | re='^[0-9]+$' 28 | if ! [[ $1 =~ $re ]]; then 29 | echo "CPU id is not a number" 30 | exit 1 31 | fi 32 | } 33 | 34 | function check_msr_module() 35 | { 36 | lsmod | grep msr > /dev/null 37 | if [ $? -ne 0 ]; then 38 | # some systems need this, others don't 39 | sudo modprobe msr &> /dev/null 40 | #if [ $? -ne 0 ]; then 41 | # echo "Failed to load MSR module" 42 | # exit 1 43 | #fi 44 | fi 45 | } 46 | 47 | function check() 48 | { 49 | cpu=$1 50 | 51 | if [ -z "${cpu}" ]; then 52 | usage 53 | exit 1 54 | fi 55 | 56 | cpus=$(lscpu | sed -n 4p | awk '{ print $2 }') 57 | 58 | if [ ${cpu} -ge ${cpus} ]; then 59 | echo "CPU id out of range" 60 | exit 1 61 | fi 62 | 63 | disabled=$(sudo rdmsr -p${cpu} 0x1a0 -f 38:38) 64 | 65 | if [ "${disabled}" == "1" ]; then 66 | echo "Turbo Boost for processor ${cpu} is disabled" 67 | else 68 | echo "Turbo Boost for processor ${cpu} is enabled" 69 | fi 70 | } 71 | 72 | function enable() 73 | { 74 | cpu=$1 75 | 76 | cpus=$(lscpu | sed -n 4p | awk '{ print $2 }') 77 | 78 | if [ -z "${cpu}" ]; then 79 | for (( i=0; i<${cpus}; i++ )); do 80 | sudo wrmsr -p$i 0x1a0 0x850089 81 | done 82 | echo "Turbo Boost enabled for all CPUs" 83 | else 84 | if [ ${cpu} -ge ${cpus} ]; then 85 | echo "CPU id out of range" 86 | exit 1 87 | fi 88 | sudo wrmsr -p${cpu} 0x1a0 0x850089 89 | echo "Turbo Boost enabled for CPU ${cpu}" 90 | fi 91 | } 92 | 93 | function disable() 94 | { 95 | cpu=$1 96 | 97 | cpus=$(lscpu | sed -n 4p | awk '{ print $2 }') 98 | 99 | if [ -z "${cpu}" ]; then 100 | for (( i=0; i<${cpus}; i++ )); do 101 | sudo wrmsr -p$i 0x1a0 0x4000850089; 102 | done 103 | echo "Turbo Boost disabled for all CPUs" 104 | else 105 | if [ ${cpu} -ge ${cpus} ]; then 106 | echo "CPU id out of range" 107 | exit 1 108 | fi 109 | sudo wrmsr -p${cpu} 0x1a0 0x4000850089; 110 | echo "Turbo Boost disabled for CPU ${cpu}" 111 | fi 112 | } 113 | 114 | 115 | 116 | ### MAIN ### 117 | 118 | if [ $# -eq 0 ]; then 119 | usage 120 | exit 1 121 | fi 122 | 123 | funct=$1 124 | target_cpu=$2 125 | 126 | check_msr_module 127 | 128 | if [ ! -z "${target_cpu}" ]; then 129 | verify_cpu_id ${target_cpu} 130 | fi 131 | 132 | case ${funct} in 133 | "enable") 134 | enable ${target_cpu} 135 | ;; 136 | "disable") 137 | disable ${target_cpu} 138 | ;; 139 | "check") 140 | check ${target_cpu} 141 | ;; 142 | *) 143 | usage 144 | exit 1 145 | esac 146 | 147 | exit 0 148 | 149 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(lib) 2 | add_subdirectory(dev) 3 | -------------------------------------------------------------------------------- /src/dev/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Build NVM Emulation device driver (using Kbuild Makefile) 2 | 3 | set(DEV_DIR "${CMAKE_CURRENT_SOURCE_DIR}") 4 | set(DEV_BIN_DIR "${CMAKE_CURRENT_BINARY_DIR}") 5 | set(DEV_KERNEL_MODULE "${DEV_BIN_DIR}/nvmemul.ko") 6 | mark_as_advanced(DEV_DIR DEV_BIN_DIR) 7 | 8 | # We invoke make in build folder to keep the glog's source folder clean. 9 | file(MAKE_DIRECTORY ${DEV_BIN_DIR}) 10 | add_custom_command(OUTPUT ${DEV_KERNEL_MODULE} 11 | COMMAND ${CMAKE_COMMAND} -E copy_directory ${DEV_DIR} ${DEV_BIN_DIR} 12 | COMMAND ${CMAKE_MAKE_PROGRAM} -j 13 | COMMENT [Build-NVM Emulation Device] 14 | WORKING_DIRECTORY "${DEV_BIN_DIR}" 15 | DEPENDS ${DEV_DIR}/pmc.c # just to see if it has been overwritten 16 | ) 17 | 18 | # we use add_custom_command for the build itself because otherwise we have to build it 19 | # every time. the following add_custom_target gives a name for the output. 20 | add_custom_target(dev_build ALL DEPENDS ${DEV_KERNEL_MODULE}) 21 | -------------------------------------------------------------------------------- /src/dev/Makefile: -------------------------------------------------------------------------------- 1 | # build modules 2 | obj-m = nvmemul.o 3 | nvmemul-objs = pmc.o 4 | 5 | # use the kernel build system 6 | KERNEL_VERSION := `uname -r` 7 | KERNEL_SOURCE := /lib/modules/$(KERNEL_VERSION)/build 8 | 9 | SRCDIR=`pwd` 10 | OBJDIR=`pwd` 11 | 12 | all: 13 | make -C $(KERNEL_SOURCE) M=$(OBJDIR) modules 14 | 15 | clean: 16 | make -C $(KERNEL_SOURCE) M=$(OBJDIR) clean 17 | -------------------------------------------------------------------------------- /src/dev/ioctl_query.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #ifndef __IOCTL_QUERY_H 15 | #define __IOCTL_QUERY_H 16 | 17 | #include 18 | 19 | #define MYDEV_MAGIC (0xAA) 20 | 21 | typedef struct { 22 | unsigned int counter_id; 23 | unsigned int event_id; 24 | } ioctl_query_setcounter_t; 25 | 26 | typedef struct { 27 | unsigned int bus_id; 28 | unsigned int device_id; 29 | unsigned int function_id; 30 | unsigned int offset; 31 | unsigned int val; 32 | } ioctl_query_setgetpci_t; 33 | 34 | #define IOCTL_SETCOUNTER _IOR(MYDEV_MAGIC, 0, ioctl_query_setcounter_t *) 35 | #define IOCTL_SETPCI _IOR(MYDEV_MAGIC, 1, ioctl_query_setgetpci_t *) 36 | #define IOCTL_GETPCI _IOWR(MYDEV_MAGIC, 2, ioctl_query_setgetpci_t *) 37 | 38 | 39 | #endif /* __IOCTL_QUERY_H */ 40 | -------------------------------------------------------------------------------- /src/dev/pmc.c: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | #include 30 | #include 31 | 32 | #include "ioctl_query.h" 33 | 34 | static long pmc_ioctl(struct file *f, unsigned int cmd, unsigned long arg); 35 | //unsigned long read_cr4(void); 36 | //void write_cr4(unsigned long); 37 | #ifndef read_cr4 38 | #define read_cr4 native_read_cr4 39 | #endif 40 | #ifndef write_cr4 41 | #define write_cr4 native_write_cr4 42 | #endif 43 | 44 | struct file_operations pmc_fops = { 45 | .unlocked_ioctl = pmc_ioctl, 46 | .compat_ioctl = pmc_ioctl, 47 | }; 48 | 49 | static const char* module_name = "nvmemul"; 50 | static int mod_major = 0; 51 | static const int NVMEMUL_MAJOR = 0; 52 | const const int PERFCTR0 = 0xc1; 53 | const const int PERFEVENTSEL0 = 0x186; 54 | 55 | 56 | void pmc_set_pce_bit(void* arg) 57 | { 58 | unsigned long cr4reg; 59 | 60 | cr4reg = read_cr4(); 61 | cr4reg |= 0x100; // setting the PCE bit 62 | write_cr4(cr4reg); 63 | } 64 | 65 | int pmc_init_module(void) 66 | { 67 | printk(KERN_INFO "%s: Loading. Initializing...\n", module_name); 68 | if ((mod_major = register_chrdev(NVMEMUL_MAJOR, module_name, &pmc_fops)) == -EBUSY) { 69 | printk(KERN_INFO "%s: Unable to get major for %s device\n", module_name, module_name); 70 | return -EIO; 71 | } 72 | 73 | if (mod_major <= 0) { 74 | printk(KERN_INFO "%s: Unable to get major for %s device\n", module_name, module_name); 75 | return -EIO; 76 | } 77 | 78 | printk(KERN_INFO "%s: major is %d\n", module_name, mod_major); 79 | 80 | /* 81 | * In order to use the rdpmc instruction in user mode, we need to set the 82 | * PCE bit of CR4. PCE is 8th bit of cr4, and 256 is 2 << 8 83 | */ 84 | 85 | pmc_set_pce_bit(NULL); 86 | smp_call_function(pmc_set_pce_bit, NULL, 1); 87 | 88 | return 0; 89 | } 90 | 91 | void pmc_exit_module(void) { 92 | printk(KERN_INFO "%s: Unloading. Cleaning up...\n", module_name); 93 | /* Freeing the major number */ 94 | unregister_chrdev(mod_major, module_name); 95 | } 96 | 97 | struct counter_s { 98 | int counter_id; 99 | unsigned long val; 100 | }; 101 | 102 | 103 | /* 104 | * pmc_clear clears the PMC specified by counter 105 | * counter = 0 => perfctr0 106 | * counter = 1 => perfctr1 107 | * it uses WRMSR to write the values in the counters 108 | */ 109 | static void __pmc_clear(int counter_id) { 110 | int counterRegister = PERFCTR0 + counter_id; 111 | /* clear the old register */ 112 | 113 | __asm__ __volatile__("mov %0, %%ecx\n\t" 114 | "xor %%edx, %%edx\n\t" 115 | "xor %%eax, %%eax\n\t" 116 | "wrmsr\n\t" 117 | : /* no outputs */ 118 | : "m" (counterRegister) 119 | : "eax", "ecx", "edx" /* all clobbered */); 120 | } 121 | 122 | static void pmc_clear(void* arg) { 123 | struct counter_s* counter = (struct counter_s*) arg; 124 | __pmc_clear(counter->counter_id); 125 | } 126 | 127 | void pmc_clear_all_cpu(int counter_id) 128 | { 129 | struct counter_s counter = { counter_id, 0}; 130 | pmc_clear((void*) &counter); 131 | smp_call_function(pmc_clear, (void*) &counter, 1); 132 | } 133 | 134 | /* 135 | * This function writes the value specified by the arg to the counter 136 | * indicated by counter 137 | */ 138 | 139 | static void __set_counter(int counter_id, unsigned long val) 140 | { 141 | int selectionRegister = PERFEVENTSEL0 + counter_id; 142 | __pmc_clear(counter_id); 143 | 144 | /* set the value */ 145 | 146 | __asm__ __volatile__("mov %0, %%ecx\n\t" /* ecx contains the number of the MSR to set */ 147 | "xor %%edx, %%edx\n\t"/* edx contains the high bits to set the MSR to */ 148 | "mov %1, %%eax\n\t" /* eax contains the low bits to set the MSR to */ 149 | "wrmsr\n\t" 150 | : /* no outputs */ 151 | : "m" (selectionRegister), "m" (val) 152 | : "eax", "ecx", "edx" /* clobbered */); 153 | } 154 | 155 | void set_counter(void* arg) 156 | { 157 | struct counter_s* counter = (struct counter_s*) arg; 158 | 159 | __set_counter(counter->counter_id, counter->val); 160 | } 161 | 162 | void set_counter_all_cpu(int counter_id, unsigned long arg) 163 | { 164 | struct counter_s counter = { counter_id, arg}; 165 | 166 | set_counter((void*) &counter); 167 | smp_call_function(set_counter, (void*) &counter, 1); 168 | } 169 | 170 | static long pmc_ioctl_setcounter(struct file* f, unsigned int cmd, unsigned long arg) 171 | { 172 | ioctl_query_setcounter_t q; 173 | 174 | if (copy_from_user(&q, (ioctl_query_setcounter_t*) arg, sizeof(ioctl_query_setcounter_t))) { 175 | return -EFAULT; 176 | } 177 | 178 | if ((q.counter_id < 0) || (q.counter_id > 3)) { 179 | printk(KERN_INFO "%s: set_counter illegal value 0x%x for counter\n", module_name, q.counter_id); 180 | return -ENXIO; 181 | } 182 | /* disable counter */ 183 | set_counter_all_cpu(q.counter_id, 0); 184 | pmc_clear_all_cpu(q.counter_id); 185 | /* set counter */ 186 | set_counter_all_cpu(q.counter_id, q.event_id); 187 | printk(KERN_INFO "%s: setcounter counter_id: 0x%x event_id=0x%x\n", module_name, q.counter_id, q.event_id); 188 | return 0; 189 | } 190 | 191 | static long pmc_ioctl_setpci(struct file* f, unsigned int cmd, unsigned long arg) 192 | { 193 | ioctl_query_setgetpci_t q; 194 | struct pci_bus *bus = NULL; 195 | 196 | if (copy_from_user(&q, (ioctl_query_setgetpci_t*) arg, sizeof(ioctl_query_setgetpci_t))) { 197 | return -EFAULT; 198 | } 199 | 200 | while ((bus = pci_find_next_bus(bus))) { 201 | if (q.bus_id == bus->number) { 202 | pci_bus_write_config_word(bus, PCI_DEVFN(q.device_id, q.function_id), q.offset, (u16) q.val); 203 | printk(KERN_INFO "%s: setpci bus_id=0x%x device_id=0x%x, function_id=0x%x, val=0x%x\n", 204 | module_name, q.bus_id, q.device_id, q.function_id, q.val); 205 | return 0; 206 | } 207 | } 208 | return -ENXIO; 209 | } 210 | 211 | static long pmc_ioctl_getpci(struct file* f, unsigned int cmd, unsigned long arg) 212 | { 213 | ioctl_query_setgetpci_t q; 214 | struct pci_bus *bus = NULL; 215 | 216 | if (copy_from_user(&q, (ioctl_query_setgetpci_t*) arg, sizeof(ioctl_query_setgetpci_t))) { 217 | return -EFAULT; 218 | } 219 | 220 | while ((bus = pci_find_next_bus(bus))) { 221 | if (q.bus_id == bus->number) { 222 | unsigned int val = 0; 223 | pci_bus_read_config_word(bus, PCI_DEVFN(q.device_id, q.function_id), q.offset, (u16*) &val); 224 | printk(KERN_INFO "%s: getpci bus_id 0x%x device_id 0x%x, function_id 0x%x, offset 0x%x, val 0x%x\n", 225 | module_name, q.bus_id, q.device_id, q.function_id, q.offset, val); 226 | q.val = val; 227 | if (copy_to_user((ioctl_query_setgetpci_t*) arg, &q, sizeof(ioctl_query_setgetpci_t))) { 228 | return -EFAULT; 229 | } 230 | return 0; 231 | } 232 | } 233 | return -ENXIO; 234 | } 235 | 236 | static long pmc_ioctl(struct file *f, unsigned int cmd, unsigned long arg) 237 | { 238 | int ret = -1; 239 | 240 | printk(KERN_INFO "%s: ioctl command: 0x%x\n", module_name, cmd); 241 | switch (cmd) { 242 | case IOCTL_SETCOUNTER: 243 | ret = pmc_ioctl_setcounter(f, cmd, arg); 244 | break; 245 | case IOCTL_SETPCI: 246 | ret = pmc_ioctl_setpci(f, cmd, arg); 247 | break; 248 | case IOCTL_GETPCI: 249 | ret = pmc_ioctl_getpci(f, cmd, arg); 250 | break; 251 | default: 252 | printk(KERN_INFO "%s: ioctl illegal command: 0x%x\n", module_name, cmd); 253 | break; 254 | } 255 | return ret; 256 | } 257 | 258 | 259 | /* Declaration of the init and exit functions */ 260 | module_init(pmc_init_module); 261 | module_exit(pmc_exit_module); 262 | 263 | MODULE_LICENSE("GPL"); 264 | MODULE_AUTHOR("HPLabs"); 265 | -------------------------------------------------------------------------------- /src/lib/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(nvmemul) 2 | 3 | option(STATISTICS "Enable statistics report" ON) 4 | 5 | if(STATISTICS) 6 | message(STATUS "WITH STATISTICS") 7 | add_definitions(-DUSE_STATISTICS) 8 | else() 9 | message(STATUS "WITHOUT STATISTICS") 10 | endif() 11 | 12 | set(nvmemul_src 13 | config.c 14 | debug.c 15 | dev.c 16 | init.c 17 | interpose.c 18 | measure_bw.c 19 | measure_lat.c 20 | misc.c 21 | monotonic_timer.c 22 | model_bw.c 23 | model_lat.c 24 | pflush.c 25 | pmalloc.c 26 | stat.c 27 | thread.c 28 | topology.c 29 | process_rank.c 30 | ) 31 | 32 | include_directories(${CMAKE_SOURCE_DIR}/third_party) 33 | include_directories(${CMAKE_SOURCE_DIR}/src) 34 | include_directories(${CMAKE_SOURCE_DIR}/src/lib) 35 | add_definitions(-g) 36 | add_definitions(-O2) 37 | add_definitions(-fPIC) 38 | add_definitions(-Wall) 39 | add_definitions(-march=native) 40 | add_definitions(-fopenmp) 41 | add_definitions(-std=gnu89) 42 | #add_definitions(-DNDEBUG) 43 | #add_definitions(-std=c99) 44 | add_definitions(-msse4) 45 | add_subdirectory(cpu) 46 | add_library(nvmemul SHARED ${nvmemul_src} $) 47 | target_link_libraries(nvmemul dl) 48 | target_link_libraries(nvmemul config) 49 | target_link_libraries(nvmemul numa) 50 | target_link_libraries(nvmemul rt) 51 | target_link_libraries(nvmemul m) 52 | target_link_libraries(nvmemul gomp) 53 | -------------------------------------------------------------------------------- /src/lib/config.c: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #include "config.h" 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #define ENVVAR_MAX_LEN 128 22 | 23 | static char* __getenv(const char* prefix, const char* name) 24 | { 25 | char normalized_name[ENVVAR_MAX_LEN]; 26 | 27 | if ((strlen(name) + strlen(prefix) + 1) > ENVVAR_MAX_LEN) { 28 | return NULL; 29 | } 30 | 31 | strcpy(normalized_name, prefix); 32 | strcat(normalized_name, "_"); 33 | strcat(normalized_name, name); 34 | 35 | return getenv(normalized_name); 36 | } 37 | 38 | static inline int 39 | env_setting_lookup(const char *name, char **value_str) 40 | { 41 | char *val; 42 | char normalized_name[ENVVAR_MAX_LEN]; 43 | int i; 44 | 45 | if ((strlen(name)) > ENVVAR_MAX_LEN) { 46 | return CONFIG_FALSE; 47 | } 48 | 49 | for (i=0; name[i]; i++) { 50 | if (name[i] == '.') { 51 | normalized_name[i] = '_'; 52 | } else { 53 | normalized_name[i] = toupper(name[i]); 54 | } 55 | } 56 | normalized_name[i] = '\0'; 57 | 58 | val = __getenv(ENVVAR_PREFIX, normalized_name); 59 | if (val) { 60 | *value_str = val; 61 | return CONFIG_TRUE; 62 | } else { 63 | return CONFIG_FALSE; 64 | } 65 | } 66 | 67 | 68 | static inline int 69 | env_setting_lookup_int(const char *name, int *value) 70 | { 71 | char *value_str; 72 | 73 | if (env_setting_lookup(name, &value_str) == CONFIG_FALSE) { 74 | return CONFIG_FALSE; 75 | } 76 | 77 | if (value_str) { 78 | *value = atoi(value_str); 79 | return CONFIG_TRUE; 80 | } else { 81 | return CONFIG_FALSE; 82 | } 83 | } 84 | 85 | 86 | static inline int 87 | env_setting_lookup_bool(const char *name, int *value) 88 | { 89 | return env_setting_lookup_int(name, value); 90 | } 91 | 92 | 93 | static inline int 94 | env_setting_lookup_string(const char *name, char **value) 95 | { 96 | return env_setting_lookup(name, value); 97 | } 98 | 99 | 100 | int 101 | __cconfig_lookup_bool(config_t *cfg, const char *name, int *value) 102 | { 103 | int val; 104 | int found_val = 0; 105 | 106 | if (env_setting_lookup_bool(name, &val) == CONFIG_TRUE) { 107 | found_val = 1; 108 | } else { 109 | if (config_lookup_bool(cfg, name, &val) == CONFIG_TRUE) { 110 | found_val = 1; 111 | } 112 | } 113 | 114 | if (found_val) { 115 | *value = val; 116 | return CONFIG_TRUE; 117 | } 118 | return CONFIG_FALSE; 119 | } 120 | 121 | 122 | int 123 | __cconfig_lookup_valid_bool(config_t *cfg, 124 | const char *name, 125 | int *value, 126 | int validity_check, ...) 127 | { 128 | return __cconfig_lookup_bool(cfg, name, value); 129 | } 130 | 131 | 132 | int 133 | __cconfig_lookup_int(config_t *cfg, const char *name, int *value) 134 | { 135 | int val; 136 | int found_val = 0; 137 | 138 | if (env_setting_lookup_int(name, &val) == CONFIG_TRUE) { 139 | found_val = 1; 140 | } else { 141 | // third parameter changed from libconfig 1.3 to 1.4, it was 'long' and now it is 'int' 142 | if (config_lookup_int(cfg, name, &val) == CONFIG_TRUE) { 143 | found_val = 1; 144 | } 145 | } 146 | 147 | if (found_val) { 148 | *value = val; 149 | return CONFIG_TRUE; 150 | } 151 | return CONFIG_FALSE; 152 | } 153 | 154 | 155 | int 156 | __cconfig_lookup_valid_int(config_t *cfg, 157 | const char *name, 158 | int *value, 159 | int validity_check, ...) 160 | { 161 | int min; 162 | int max; 163 | int list_length; 164 | int i; 165 | int val; 166 | int listval; 167 | va_list ap; 168 | 169 | if (__cconfig_lookup_int(cfg, name, &val) == CONFIG_TRUE) { 170 | switch (validity_check) { 171 | case CONFIG_NO_CHECK: 172 | *value = val; 173 | return CONFIG_TRUE; 174 | case CONFIG_RANGE_CHECK: 175 | va_start(ap, validity_check); 176 | min = va_arg(ap, int); 177 | max = va_arg(ap, int); 178 | va_end(ap); 179 | if (*value >= min && *value <= max) { 180 | *value = val; 181 | return CONFIG_TRUE; 182 | } 183 | break; 184 | case CONFIG_LIST_CHECK: 185 | va_start(ap, validity_check); 186 | list_length = va_arg(ap, int); 187 | for (i=0; i 25 | #include 26 | 27 | #define ENVVAR_PREFIX "NVMEMUL" 28 | 29 | #ifdef __cplusplus 30 | extern "C" { 31 | #endif 32 | 33 | /* Make sure we don't redefine a macro already defined in libconfig.h */ 34 | 35 | #ifdef CONFIG_NO_CHECK 36 | # error "ERROR: Redefining previously defined CONFIG_NO_CHECK" 37 | #else 38 | # define CONFIG_NO_CHECK 0 39 | #endif 40 | 41 | #ifdef CONFIG_RANGE_CHECK 42 | # error "ERROR: Redefining previously defined CONFIG_RANGE_CHECK" 43 | #else 44 | # define CONFIG_RANGE_CHECK 1 45 | #endif 46 | 47 | #ifdef CONFIG_LIST_CHECK 48 | # error "ERROR: Redefining previously defined CONFIG_LIST_CHECK" 49 | #else 50 | # define CONFIG_LIST_CHECK 2 51 | #endif 52 | 53 | 54 | 55 | /** 56 | * The lookup functions return the value of a configuration variable based on 57 | * the following order: 58 | * 1) value of environment variable 59 | * 2) value in configuration file variable 60 | * 61 | * If the variable is not found then a lookup function does not set the value. 62 | */ 63 | 64 | int __cconfig_lookup_bool(config_t *cfg, const char *name, int *value); 65 | int __cconfig_lookup_int(config_t *cfg, const char *name, int *value); 66 | int __cconfig_lookup_string(config_t *cfg, const char *name, char **value); 67 | int __cconfig_lookup_valid_bool(config_t *cfg, const char *name, int *value, int validity_check, ...); 68 | int __cconfig_lookup_valid_int(config_t *cfg, const char *name, int *value, int validity_check, ...); 69 | int __cconfig_lookup_valid_string(config_t *cfg, const char *name, char **value, int validity_check, ...); 70 | int __cconfig_init(config_t *cfg, const char *config_file); 71 | 72 | #ifdef __cplusplus 73 | } 74 | #endif 75 | 76 | #endif /* __CONFIG_H */ 77 | -------------------------------------------------------------------------------- /src/lib/cpu/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(nvmemul_cpu_src 2 | cpu.c 3 | pmc.c 4 | ) 5 | 6 | add_library(cpu OBJECT ${nvmemul_cpu_src}) 7 | -------------------------------------------------------------------------------- /src/lib/cpu/cpu.c: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include "cpu.h" 19 | #include "dev.h" 20 | #include "error.h" 21 | #include "misc.h" 22 | #include "known_cpus.h" 23 | #include "xeon-ex.h" 24 | #include 25 | 26 | // Mainline architectures and processors available here: 27 | // https://software.intel.com/en-us/articles/intel-architecture-and-processor-identification-with-cpuid-model-and-family-numbers 28 | // 29 | // It turns out that CPUID is not an accurate approach to identifying a 30 | // processor as different processors may have the same CPUID. 31 | // So instead we rely on the brand string returned by /proc/cpuinfo:model_name 32 | 33 | #define MASK(msb, lsb) (~((~0) << (msb + 1)) & ((~0) << lsb)) 34 | #define EXTRACT(val, msb, lsb) ((MASK(msb, lsb) & val) >> lsb) 35 | #define MODEL(eax) EXTRACT(eax, 7, 4) 36 | #define EXTENDED_MODEL(eax) EXTRACT(eax, 19, 16) 37 | #define MODEL_NUMBER(eax) ((EXTENDED_MODEL(eax) << 4) | MODEL(eax)) 38 | #define FAMILY(eax) EXTRACT(eax, 11, 8) 39 | #define Extended_Family(eax) EXTRACT(eax, 27, 20) 40 | #define Family_Number(eax) (FAMILY(eax) + Extended_Family(eax)) 41 | 42 | void cpuid(unsigned int info, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) 43 | { 44 | __asm__( 45 | "cpuid;" 46 | : "=a"(*eax), "=b"(*ebx), "=c"(*ecx), "=d"(*edx) 47 | : "a"(info)); 48 | } 49 | 50 | void get_family_model(int *family, int *model) 51 | { 52 | unsigned int eax, ebx, ecx, edx; 53 | int success = __get_cpuid(1, &eax, &ebx, &ecx, &edx); 54 | if (family != NULL) 55 | { 56 | *family = success ? Family_Number(eax) : 0; 57 | } 58 | 59 | if (model != NULL) 60 | { 61 | *model = success ? MODEL_NUMBER(eax) : 0; 62 | } 63 | } 64 | 65 | // caller is responsible for freeing memory allocated by this function 66 | char *cpuinfo(char *valname) 67 | { 68 | FILE *fp; 69 | char *line = NULL; 70 | size_t len = 0; 71 | ssize_t read; 72 | 73 | fp = fopen("/proc/cpuinfo", "r"); 74 | if (fp == NULL) 75 | { 76 | return NULL; 77 | } 78 | 79 | while ((read = getline(&line, &len, fp)) != -1) 80 | { 81 | if (strstr(line, valname)) 82 | { 83 | char *colon = strchr(line, ':'); 84 | int len = colon - line; 85 | char *buf = malloc(strlen(line) - len); 86 | strcpy(buf, &line[len + 2]); 87 | free(line); 88 | fclose(fp); 89 | return buf; 90 | } 91 | } 92 | 93 | free(line); 94 | fclose(fp); 95 | return NULL; 96 | } 97 | 98 | // reads current cpu frequency through the /proc/cpuinfo file 99 | // avoid calling this function often 100 | int cpu_speed_mhz() 101 | { 102 | size_t val; 103 | char *str = cpuinfo("cpu MHz"); 104 | val = string_to_size(str); 105 | free(str); 106 | return val; 107 | } 108 | 109 | // reads cpu LLC cache size through the /proc/cpuinfo file 110 | // avoid calling this function often 111 | size_t cpu_llc_size_bytes() 112 | { 113 | size_t val; 114 | char *str = cpuinfo("cache size"); 115 | val = string_to_size(str); 116 | free(str); 117 | return val; 118 | } 119 | 120 | // caller is responsible for freeing memory allocated by this function 121 | char *cpu_model_name() 122 | { 123 | return cpuinfo("model name"); 124 | } 125 | 126 | int match(const char *to_match, const char *regex_text) 127 | { 128 | int ret; 129 | const char *p = to_match; 130 | regex_t regex; 131 | regmatch_t m[1]; 132 | 133 | if ((ret = regcomp(®ex, regex_text, REG_EXTENDED | REG_NEWLINE)) != 0) 134 | { 135 | return E_ERROR; 136 | } 137 | if ((ret = regexec(®ex, p, 1, m, 0))) 138 | { 139 | regfree(®ex); 140 | return E_ERROR; // no match 141 | } 142 | regfree(®ex); 143 | return E_SUCCESS; 144 | } 145 | 146 | int is_Xeon() 147 | { 148 | char *model_name; 149 | if ((model_name = cpu_model_name()) == NULL) 150 | { 151 | return 0; 152 | } 153 | 154 | if (match(model_name, "Xeon") == E_SUCCESS) 155 | { 156 | free(model_name); 157 | return 1; 158 | } 159 | else 160 | { 161 | free(model_name); 162 | return 0; 163 | } 164 | } 165 | 166 | int is_Intel() 167 | { 168 | char *model_name; 169 | if ((model_name = cpu_model_name()) == NULL) 170 | { 171 | return 0; 172 | } 173 | 174 | if (match(model_name, "Intel") == E_SUCCESS) 175 | { 176 | free(model_name); 177 | return 1; 178 | } 179 | else 180 | { 181 | free(model_name); 182 | return 0; 183 | } 184 | } 185 | 186 | cpu_model_t *cpu_model() 187 | { 188 | int i, family, model; 189 | cpu_model_t *cpu_model = NULL; 190 | 191 | if (!is_Intel()) 192 | return NULL; 193 | 194 | get_family_model(&family, &model); 195 | 196 | int isXeon = is_Xeon(); 197 | 198 | for (i = 0; known_cpus[i].microarch != Invalid; i++) 199 | { 200 | microarch_ID_t c = known_cpus[i]; 201 | 202 | if (c.family == family && c.model == model) 203 | { 204 | switch (c.microarch) 205 | { 206 | case SandyBridge: 207 | cpu_model = &cpu_model_intel_xeon_ex; 208 | break; 209 | case IvyBridge: 210 | cpu_model = &cpu_model_intel_xeon_ex_v2; 211 | break; 212 | case Haswell: 213 | cpu_model = &cpu_model_intel_xeon_ex_v3; 214 | break; 215 | default: 216 | return NULL; 217 | } 218 | 219 | if (!isXeon) 220 | cpu_model->microarch = (microarch_t)(cpu_model->microarch - 1); 221 | 222 | DBG_LOG(INFO, "Detected CPU model '%s'\n", microarch_strings[cpu_model->microarch]); 223 | break; 224 | } 225 | } 226 | 227 | if (!cpu_model) 228 | { 229 | return NULL; 230 | } 231 | 232 | // complete the model with some runtime information 233 | cpu_model->llc_size_bytes = cpu_llc_size_bytes(); 234 | // cpu_model->speed_mhz = cpu_speed_mhz(); 235 | 236 | return cpu_model; 237 | } 238 | -------------------------------------------------------------------------------- /src/lib/cpu/cpu.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #ifndef __CPU_H 15 | #define __CPU_H 16 | 17 | #include 18 | #include 19 | #include "dev.h" 20 | 21 | #define MAX_THROTTLE_VALUE 1023 22 | 23 | int set_throttle_register(int node, uint64_t val); 24 | size_t cpu_llc_size_bytes(); 25 | 26 | struct pmc_set_s; 27 | 28 | typedef enum { 29 | THROTTLE_DDR_ACT = 0, 30 | THROTTLE_DDR_READ, 31 | THROTTLE_DDR_WRITE 32 | } throttle_type_t; 33 | 34 | // order matters. see cpu_model() 35 | typedef enum { 36 | Invalid, 37 | SandyBridge, 38 | SandyBridgeXeon, 39 | IvyBridge, 40 | IvyBridgeXeon, 41 | Haswell, 42 | HaswellXeon 43 | } microarch_t; 44 | 45 | typedef struct 46 | { 47 | int family; 48 | int model; 49 | microarch_t microarch; 50 | } microarch_ID_t; 51 | 52 | /** 53 | * CPU object that encapsulates processor-specific methods for accessing 54 | * performance counters and memory controller PCI registers 55 | */ 56 | typedef struct cpu_model_s { 57 | microarch_t microarch; // processor description 58 | size_t llc_size_bytes; // last level cache size 59 | // int speed_mhz; // cpu clock frequency 60 | struct pmc_events_s* pmc_events; // performance monitoring events supported by the processor 61 | int (*set_throttle_register)(pci_regs_t *regs, throttle_type_t throttle_type, uint16_t val); 62 | int (*get_throttle_register)(pci_regs_t *regs, throttle_type_t throttle_type, uint16_t* val); 63 | } cpu_model_t; 64 | 65 | cpu_model_t* cpu_model(); 66 | int cpu_speed_mhz(); 67 | 68 | #endif /* __CPU_H */ 69 | -------------------------------------------------------------------------------- /src/lib/cpu/haswell-papi.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #ifndef __CPU_HASWELL_H 15 | #define __CPU_HASWELL_H 16 | 17 | #include 18 | #include "debug.h" 19 | 20 | // Perfmon2 is a library that provides a generic interface to access the PMU. It also comes with 21 | // applications to list all available performance events with their architecture specific 22 | // detailed description and translate them to their respective event code. 'showevtinfo' application can 23 | // be used to list all available performance event names with detailed description and 'check_events' application 24 | // can be used to translate the performance event to the corresponding event code. 25 | 26 | // These events will be initialized and started. 27 | // Every event reading will return an array with the values for all these events. 28 | // The array index is the same index used to define the event in the *_native_events array below 29 | const char *haswell_native_events[MAX_NUM_EVENTS] = { 30 | "CYCLE_ACTIVITY:STALLS_L2_PENDING", 31 | "MEM_LOAD_UOPS_L3_HIT_RETIRED:XSNP_NONE", 32 | "MEM_LOAD_UOPS_L3_MISS_RETIRED:REMOTE_DRAM", 33 | "MEM_LOAD_UOPS_L3_MISS_RETIRED:LOCAL_DRAM" 34 | }; 35 | 36 | uint64_t haswell_read_stall_events_local() { 37 | long long values[MAX_NUM_EVENTS]; 38 | uint64_t events = 0; 39 | 40 | if (pmc_events_read_local_thread(values) == PAPI_OK) { 41 | uint64_t l2_pending = values[0]; 42 | uint64_t llc_hit = values[1]; 43 | uint64_t remote_dram = values[2]; 44 | uint64_t local_dram = values[3]; 45 | 46 | DBG_LOG(DEBUG, "read stall L2 cycles %lu; llc_hit %lu; remote_dram %lu; local_dram %lu\n", 47 | l2_pending, llc_hit, remote_dram, local_dram); 48 | 49 | double num = remote_dram + local_dram; 50 | double den = num + llc_hit; 51 | if (den == 0) return 0; 52 | 53 | events = (uint64_t)((double)l2_pending * ((double)num / den)); 54 | } else { 55 | DBG_LOG(ERROR, "read stall cycles failed\n"); 56 | } 57 | 58 | return events; 59 | } 60 | 61 | uint64_t haswell_read_stall_events_remote() { 62 | long long values[MAX_NUM_EVENTS]; 63 | uint64_t events = 0; 64 | 65 | if (pmc_events_read_local_thread(values) == PAPI_OK) { 66 | uint64_t l2_pending = values[0]; 67 | uint64_t llc_hit = values[1]; 68 | uint64_t remote_dram = values[2]; 69 | uint64_t local_dram = values[3]; 70 | 71 | DBG_LOG(DEBUG, "read stall L2 cycles %lu; llc_hit %lu; remote_dram %lu; local_dram %lu\n", 72 | l2_pending, llc_hit, remote_dram, local_dram); 73 | 74 | // calculate stalls based on l2 stalls and LLC miss/hit 75 | double num = remote_dram + local_dram; 76 | double den = num + llc_hit; 77 | if (den == 0) return 0; 78 | double stalls = (double)l2_pending * ((double)num / den); 79 | 80 | // calculate remote dram stalls based on total stalls and local/remote dram accesses 81 | den = remote_dram + local_dram; 82 | if (den == 0) return 0; 83 | events = (uint64_t) (stalls * ((double)remote_dram / den)); 84 | } else { 85 | DBG_LOG(ERROR, "read stall cycles failed\n"); 86 | } 87 | 88 | return events; 89 | } 90 | 91 | #endif /* __CPU_HASWELL_H */ 92 | -------------------------------------------------------------------------------- /src/lib/cpu/haswell.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #ifndef __CPU_HASWELL_H 15 | #define __CPU_HASWELL_H 16 | 17 | #include 18 | #include "thread.h" 19 | #include "cpu/pmc.h" 20 | #include "debug.h" 21 | 22 | // Perfmon2 is a library that provides a generic interface to access the PMU. It also comes with 23 | // applications to list all available performance events with their architecture specific 24 | // detailed description and translate them to their respective event code. 'showevtinfo' application can 25 | // be used to list all available performance event names with detailed description and 'check_events' application 26 | // can be used to translate the performance event to the corresponding event code. 27 | 28 | extern __thread int tls_hw_local_latency; 29 | extern __thread int tls_hw_remote_latency; 30 | #ifdef MEMLAT_SUPPORT 31 | extern __thread uint64_t tls_global_remote_dram; 32 | extern __thread uint64_t tls_global_local_dram; 33 | #endif 34 | 35 | #undef FOREACH_PMC_HW_EVENT 36 | #define FOREACH_PMC_HW_EVENT(ACTION) \ 37 | ACTION("CYCLE_ACTIVITY:STALLS_L2_PENDING", NULL, 0x55305a3) \ 38 | ACTION("MEM_LOAD_UOPS_L3_HIT_RETIRED:XSNP_NONE", NULL, 0x5308d2) \ 39 | ACTION("MEM_LOAD_UOPS_L3_MISS_RETIRED:REMOTE_DRAM", NULL, 0x530cd3) \ 40 | ACTION("MEM_LOAD_UOPS_L3_MISS_RETIRED:LOCAL_DRAM", NULL, 0x5303d3) 41 | 42 | #undef FOREACH_PMC_EVENT 43 | #define FOREACH_PMC_EVENT(ACTION, prefix) \ 44 | ACTION(ldm_stall_cycles, prefix) \ 45 | ACTION(remote_dram, prefix) 46 | 47 | #define L3_FACTOR 7.0 48 | 49 | DECLARE_ENABLE_PMC(haswell, ldm_stall_cycles) 50 | { 51 | ASSIGN_PMC_HW_EVENT_TO_ME("CYCLE_ACTIVITY:STALLS_L2_PENDING", 0); 52 | ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_L3_HIT_RETIRED:XSNP_NONE", 1); 53 | ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_L3_MISS_RETIRED:REMOTE_DRAM", 2); 54 | ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_L3_MISS_RETIRED:LOCAL_DRAM", 3); 55 | 56 | return E_SUCCESS; 57 | } 58 | 59 | DECLARE_CLEAR_PMC(haswell, ldm_stall_cycles) 60 | { 61 | } 62 | 63 | DECLARE_READ_PMC(haswell, ldm_stall_cycles) 64 | { 65 | uint64_t l2_pending_diff = READ_MY_HW_EVENT_DIFF(0); 66 | uint64_t llc_hit_diff = READ_MY_HW_EVENT_DIFF(1); 67 | uint64_t remote_dram_diff = READ_MY_HW_EVENT_DIFF(2); 68 | uint64_t local_dram_diff = READ_MY_HW_EVENT_DIFF(3); 69 | 70 | DBG_LOG(DEBUG, "read stall L2 cycles diff %lu; llc_hit %lu; cycles diff remote_dram %lu; local_dram %lu\n", 71 | l2_pending_diff, llc_hit_diff, remote_dram_diff, local_dram_diff); 72 | 73 | if ((remote_dram_diff == 0) && (local_dram_diff == 0)) return 0; 74 | #ifdef MEMLAT_SUPPORT 75 | tls_global_local_dram += local_dram_diff; 76 | #endif 77 | 78 | // calculate stalls based on L2 stalls and LLC miss/hit 79 | double num = L3_FACTOR * (remote_dram_diff + local_dram_diff); 80 | double den = num + llc_hit_diff; 81 | if (den == 0) return 0; 82 | return (uint64_t) ((double)l2_pending_diff * (num / den)); 83 | } 84 | 85 | 86 | DECLARE_ENABLE_PMC(haswell, remote_dram) 87 | { 88 | ASSIGN_PMC_HW_EVENT_TO_ME("CYCLE_ACTIVITY:STALLS_L2_PENDING", 0); 89 | ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_L3_HIT_RETIRED:XSNP_NONE", 1); 90 | ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_L3_MISS_RETIRED:REMOTE_DRAM", 2); 91 | ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_L3_MISS_RETIRED:LOCAL_DRAM", 3); 92 | 93 | return E_SUCCESS; 94 | } 95 | 96 | DECLARE_CLEAR_PMC(haswell, remote_dram) 97 | { 98 | } 99 | 100 | DECLARE_READ_PMC(haswell, remote_dram) 101 | { 102 | uint64_t l2_pending_diff = READ_MY_HW_EVENT_DIFF(0); 103 | uint64_t llc_hit_diff = READ_MY_HW_EVENT_DIFF(1); 104 | uint64_t remote_dram_diff = READ_MY_HW_EVENT_DIFF(2); 105 | uint64_t local_dram_diff = READ_MY_HW_EVENT_DIFF(3); 106 | 107 | DBG_LOG(DEBUG, "read stall L2 cycles diff %lu; llc_hit %lu; cycles diff remote_dram %lu; local_dram %lu\n", 108 | l2_pending_diff, llc_hit_diff, remote_dram_diff, local_dram_diff); 109 | 110 | if ((remote_dram_diff == 0) && (local_dram_diff == 0)) return 0; 111 | #ifdef MEMLAT_SUPPORT 112 | tls_global_remote_dram += remote_dram_diff; 113 | #endif 114 | 115 | // calculate stalls based on L2 stalls and LLC miss/hit 116 | double num = L3_FACTOR * (remote_dram_diff + local_dram_diff); 117 | double den = num + llc_hit_diff; 118 | if (den == 0) return 0; 119 | double stalls = (double)l2_pending_diff * (num / den); 120 | 121 | // calculate remote dram stalls based on total stalls and local/remote dram accesses 122 | // also consider the weight of remote memory access against local memory access 123 | den = (remote_dram_diff * tls_hw_remote_latency) + (local_dram_diff * tls_hw_local_latency); 124 | if (den == 0) return 0; 125 | return (uint64_t) (stalls * ((double)(remote_dram_diff * tls_hw_remote_latency) / den)); 126 | } 127 | 128 | 129 | PMC_EVENTS(haswell, 4) 130 | #endif /* __CPU_HASWELL_H */ 131 | -------------------------------------------------------------------------------- /src/lib/cpu/ivybridge-papi.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #ifndef __CPU_IVYBRIDGE_H 15 | #define __CPU_IVYBRIDGE_H 16 | 17 | #include 18 | #include "debug.h" 19 | 20 | // Perfmon2 is a library that provides a generic interface to access the PMU. It also comes with 21 | // applications to list all available performance events with their architecture specific 22 | // detailed description and translate them to their respective event code. 'showevtinfo' application can 23 | // be used to list all available performance event names with detailed description and 'check_events' application 24 | // can be used to translate the performance event to the corresponding event code. 25 | 26 | // These events will be initialized and started. 27 | // Every event reading will return an array with the values for all these events. 28 | // The array index is the same index used to define the event in the *_native_events array below 29 | const char *ivybridge_native_events[MAX_NUM_EVENTS] = { 30 | "CYCLE_ACTIVITY:STALLS_L2_PENDING", 31 | "MEM_LOAD_UOPS_LLC_HIT_RETIRED:XSNP_NONE", 32 | "MEM_LOAD_UOPS_LLC_MISS_RETIRED:REMOTE_DRAM", 33 | "MEM_LOAD_UOPS_LLC_MISS_RETIRED:LOCAL_DRAM" 34 | }; 35 | 36 | uint64_t ivybridge_read_stall_events_local() { 37 | long long values[MAX_NUM_EVENTS]; 38 | uint64_t events = 0; 39 | 40 | if (pmc_events_read_local_thread(values) == PAPI_OK) { 41 | uint64_t l2_pending = values[0]; 42 | uint64_t llc_hit = values[1]; 43 | uint64_t remote_dram = values[2]; 44 | uint64_t local_dram = values[3]; 45 | 46 | DBG_LOG(DEBUG, "read stall L2 cycles %lu; llc_hit %lu; remote_dram %lu; local_dram %lu\n", 47 | l2_pending, llc_hit, remote_dram, local_dram); 48 | 49 | double num = remote_dram + local_dram; 50 | double den = num + llc_hit; 51 | if (den == 0) return 0; 52 | 53 | events = (uint64_t)((double)l2_pending * ((double)num / den)); 54 | } else { 55 | DBG_LOG(ERROR, "read stall cycles failed\n"); 56 | } 57 | 58 | return events; 59 | } 60 | 61 | uint64_t ivybridge_read_stall_events_remote() { 62 | long long values[MAX_NUM_EVENTS]; 63 | uint64_t events = 0; 64 | 65 | if (pmc_events_read_local_thread(values) == PAPI_OK) { 66 | uint64_t l2_pending = values[0]; 67 | uint64_t llc_hit = values[1]; 68 | uint64_t remote_dram = values[2]; 69 | uint64_t local_dram = values[3]; 70 | 71 | DBG_LOG(DEBUG, "read stall L2 cycles %lu; llc_hit %lu; remote_dram %lu; local_dram %lu\n", 72 | l2_pending, llc_hit, remote_dram, local_dram); 73 | 74 | // calculate stalls based on l2 stalls and LLC miss/hit 75 | double num = remote_dram + local_dram; 76 | double den = num + llc_hit; 77 | if (den == 0) return 0; 78 | double stalls = (double)l2_pending * ((double)num / den); 79 | 80 | // calculate remote dram stalls based on total stalls and local/remote dram accesses 81 | den = remote_dram + local_dram; 82 | if (den == 0) return 0; 83 | events = (uint64_t) (stalls * ((double)remote_dram / den)); 84 | } else { 85 | DBG_LOG(ERROR, "read stall cycles failed\n"); 86 | } 87 | 88 | return events; 89 | } 90 | 91 | #endif /* __CPU_IVYBRIDGE_H */ 92 | -------------------------------------------------------------------------------- /src/lib/cpu/ivybridge.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #ifndef __CPU_IVYBRIDGE_H 15 | #define __CPU_IVYBRIDGE_H 16 | 17 | #include 18 | #include "thread.h" 19 | #include "cpu/pmc.h" 20 | #include "debug.h" 21 | 22 | // Perfmon2 is a library that provides a generic interface to access the PMU. It also comes with 23 | // applications to list all available performance events with their architecture specific 24 | // detailed description and translate them to their respective event code. 'showevtinfo' application can 25 | // be used to list all available performance event names with detailed description and 'check_events' application 26 | // can be used to translate the performance event to the corresponding event code. 27 | 28 | extern __thread int tls_hw_local_latency; 29 | extern __thread int tls_hw_remote_latency; 30 | #ifdef MEMLAT_SUPPORT 31 | extern __thread uint64_t tls_global_remote_dram; 32 | extern __thread uint64_t tls_global_local_dram; 33 | #endif 34 | 35 | #undef FOREACH_PMC_HW_EVENT 36 | #define FOREACH_PMC_HW_EVENT(ACTION) \ 37 | ACTION("CYCLE_ACTIVITY:STALLS_L2_PENDING", NULL, 0x55305a3) \ 38 | ACTION("MEM_LOAD_UOPS_LLC_HIT_RETIRED:XSNP_NONE", NULL, 0x5308d2) \ 39 | ACTION("MEM_LOAD_UOPS_LLC_MISS_RETIRED:REMOTE_DRAM", NULL, 0x530cd3) \ 40 | ACTION("MEM_LOAD_UOPS_LLC_MISS_RETIRED:LOCAL_DRAM", NULL, 0x5303d3) 41 | 42 | #undef FOREACH_PMC_EVENT 43 | #define FOREACH_PMC_EVENT(ACTION, prefix) \ 44 | ACTION(ldm_stall_cycles, prefix) \ 45 | ACTION(remote_dram, prefix) 46 | 47 | 48 | #define L3_FACTOR 7.0 49 | 50 | DECLARE_ENABLE_PMC(ivybridge, ldm_stall_cycles) 51 | { 52 | ASSIGN_PMC_HW_EVENT_TO_ME("CYCLE_ACTIVITY:STALLS_L2_PENDING", 0); 53 | ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_LLC_HIT_RETIRED:XSNP_NONE", 1); 54 | ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_LLC_MISS_RETIRED:REMOTE_DRAM", 2); 55 | ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_LLC_MISS_RETIRED:LOCAL_DRAM", 3); 56 | 57 | return E_SUCCESS; 58 | } 59 | 60 | DECLARE_CLEAR_PMC(ivybridge, ldm_stall_cycles) 61 | { 62 | } 63 | 64 | DECLARE_READ_PMC(ivybridge, ldm_stall_cycles) 65 | { 66 | uint64_t l2_pending_diff = READ_MY_HW_EVENT_DIFF(0); 67 | uint64_t llc_hit_diff = READ_MY_HW_EVENT_DIFF(1); 68 | uint64_t remote_dram_diff = READ_MY_HW_EVENT_DIFF(2); 69 | uint64_t local_dram_diff = READ_MY_HW_EVENT_DIFF(3); 70 | 71 | DBG_LOG(DEBUG, "read stall L2 cycles diff %lu; llc_hit %lu; cycles diff remote_dram %lu; local_dram %lu\n", 72 | l2_pending_diff, llc_hit_diff, remote_dram_diff, local_dram_diff); 73 | 74 | if ((remote_dram_diff == 0) && (local_dram_diff == 0)) return 0; 75 | #ifdef MEMLAT_SUPPORT 76 | tls_global_local_dram += local_dram_diff; 77 | #endif 78 | 79 | // calculate stalls based on L2 stalls and LLC miss/hit 80 | double num = L3_FACTOR * (remote_dram_diff + local_dram_diff); 81 | double den = num + llc_hit_diff; 82 | if (den == 0) return 0; 83 | return (uint64_t) ((double)l2_pending_diff * (num / den)); 84 | } 85 | 86 | 87 | DECLARE_ENABLE_PMC(ivybridge, remote_dram) 88 | { 89 | ASSIGN_PMC_HW_EVENT_TO_ME("CYCLE_ACTIVITY:STALLS_L2_PENDING", 0); 90 | ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_LLC_HIT_RETIRED:XSNP_NONE", 1); 91 | ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_LLC_MISS_RETIRED:REMOTE_DRAM", 2); 92 | ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_LLC_MISS_RETIRED:LOCAL_DRAM", 3); 93 | 94 | return E_SUCCESS; 95 | } 96 | 97 | DECLARE_CLEAR_PMC(ivybridge, remote_dram) 98 | { 99 | } 100 | 101 | DECLARE_READ_PMC(ivybridge, remote_dram) 102 | { 103 | uint64_t l2_pending_diff = READ_MY_HW_EVENT_DIFF(0); 104 | uint64_t llc_hit_diff = READ_MY_HW_EVENT_DIFF(1); 105 | uint64_t remote_dram_diff = READ_MY_HW_EVENT_DIFF(2); 106 | uint64_t local_dram_diff = READ_MY_HW_EVENT_DIFF(3); 107 | 108 | DBG_LOG(DEBUG, "read stall L2 cycles diff %lu; llc_hit %lu; cycles diff remote_dram %lu; local_dram %lu\n", 109 | l2_pending_diff, llc_hit_diff, remote_dram_diff, local_dram_diff); 110 | 111 | if ((remote_dram_diff == 0) && (local_dram_diff == 0)) return 0; 112 | #ifdef MEMLAT_SUPPORT 113 | tls_global_remote_dram += remote_dram_diff; 114 | #endif 115 | 116 | // calculate stalls based on L2 stalls and LLC miss/hit 117 | double num = L3_FACTOR * (remote_dram_diff + local_dram_diff); 118 | double den = num + llc_hit_diff; 119 | if (den == 0) return 0; 120 | double stalls = (double)l2_pending_diff * (num / den); 121 | 122 | // calculate remote dram stalls based on total stalls and local/remote dram accesses 123 | // also consider the weight of remote memory access against local memory access 124 | den = (remote_dram_diff * tls_hw_remote_latency) + (local_dram_diff * tls_hw_local_latency); 125 | if (den == 0) return 0; 126 | return (uint64_t) (stalls * ((double)(remote_dram_diff * tls_hw_remote_latency) / den)); 127 | } 128 | 129 | 130 | PMC_EVENTS(ivybridge, 4) 131 | #endif /* __CPU_IVYBRIDGE_H */ 132 | -------------------------------------------------------------------------------- /src/lib/cpu/known_cpus.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #ifndef __KNOWN_CPUS_H 15 | #define __KNOWN_CPUS_H 16 | 17 | #include "cpu.h" 18 | 19 | // later, cpu_model_name() is used to distinguish between 20 | // Xeon and non-Xeon processors. It's much easier here 21 | // to consider all processors non-Xeon. 22 | // references: 23 | // 1- http://a4lg.com/tech/x86/database/x86-families-and-models.en.html 24 | // 2- Intel® Xeon® Processor E7-8800/4800 v3 Product Family Specification 25 | // 3- https://software.intel.com/en-us/articles/intel-architecture-and-processor-identification-with-cpuid-model-and-family-numbers 26 | microarch_ID_t known_cpus[] = 27 | { 28 | // order does not matter 29 | {.family = 0x06, .model = 0x2A, .microarch = SandyBridge}, 30 | {.family = 0x06, .model = 0x2D, .microarch = SandyBridge}, 31 | 32 | {.family = 0x06, .model = 0x3A, .microarch = IvyBridge}, 33 | {.family = 0x06, .model = 0x3E, .microarch = IvyBridge}, 34 | 35 | {.family = 0x06, .model = 0x3C, .microarch = Haswell}, 36 | {.family = 0x06, .model = 0x3F, .microarch = Haswell}, 37 | {.family = 0x06, .model = 0x45, .microarch = Haswell}, 38 | {.family = 0x06, .model = 0x46, .microarch = Haswell}, 39 | 40 | // must be the last element 41 | {.family = 0x0, .model = 0x0, .microarch = Invalid}}; 42 | 43 | // order must correspond to microarch_t 44 | char *microarch_strings[] = 45 | { 46 | "Invalid", 47 | "Sandy Bridge", 48 | "Sandy Bridge Xeon", 49 | "Ivy Bridge", 50 | "Ivy Bridge Xeon", 51 | "Haswell", 52 | "Haswell Xeon"}; 53 | 54 | #endif /* __KNOWN_CPUS_H */ 55 | -------------------------------------------------------------------------------- /src/lib/cpu/pmc-papi.c: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #include 15 | #include 16 | #include 17 | #include "cpu/pmc-papi.h" 18 | #include "debug.h" 19 | 20 | __thread int tls_event_set = PAPI_NULL; 21 | 22 | #define STR_MAX_SIZE 256 23 | 24 | static void log_papi_critical(int ret_val, const char *msg) { 25 | //char papi_str[STR_MAX_SIZE]; 26 | //PAPI_perror(ret_val, (char *)papi_str, sizeof(papi_str)); 27 | DBG_LOG(CRITICAL, "%s (%s)\n", msg, PAPI_strerror(ret_val)); 28 | } 29 | 30 | int pmc_init() { 31 | int ret_val; 32 | 33 | if ((ret_val = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT) { 34 | log_papi_critical(ret_val, "PMC library init error"); 35 | return -1; 36 | } 37 | 38 | if ((ret_val = PAPI_thread_init(pthread_self)) != PAPI_OK) { 39 | log_papi_critical(ret_val, "PMC thread support init error"); 40 | return -1; 41 | } 42 | 43 | // if ((ret_val = PAPI_set_domain(PAPI_DOM_ALL)) != PAPI_OK) { 44 | // log_papi_critical(ret_val, "PMC set domain error"); 45 | // return -1; 46 | // } 47 | 48 | return 0; 49 | } 50 | 51 | void pmc_shutdown() { 52 | PAPI_shutdown(); 53 | } 54 | 55 | int pmc_create_event_set_local_thread() { 56 | int ret_val; 57 | 58 | if ((ret_val = PAPI_create_eventset(&tls_event_set)) != PAPI_OK) { 59 | log_papi_critical(ret_val, "PMC event set init error"); 60 | return -1; 61 | } 62 | 63 | // if ((ret_val = PAPI_set_granularity(PAPI_GRN_SYS)) != PAPI_OK) { 64 | // log_papi_critical(ret_val, "PMC set granularity error"); 65 | // return -1; 66 | // } 67 | 68 | return 0; 69 | } 70 | 71 | void pmc_destroy_event_set_local_thread() { 72 | PAPI_cleanup_eventset(tls_event_set); 73 | PAPI_destroy_eventset(&tls_event_set); 74 | } 75 | 76 | int pmc_register_thread() { 77 | return PAPI_register_thread(); 78 | } 79 | 80 | int pmc_unregister_thread() { 81 | return PAPI_unregister_thread(); 82 | } 83 | 84 | int pmc_register_event_local_thread(const char *event_name) { 85 | int ret_val; 86 | char msg[STR_MAX_SIZE]; 87 | 88 | // The pthread scope for each thread should be set to PTHREAD_SCOPE_SYSTEM. 89 | // On linux, pthread supports only PTHREAD_SCOPE_SYSTEM. 90 | 91 | assert(tls_event_set != PAPI_NULL); 92 | assert(event_name); 93 | 94 | if ((ret_val = PAPI_add_named_event(tls_event_set, (char *)event_name)) != PAPI_OK) { 95 | snprintf(msg, sizeof(msg), "PMC event (%s) register error", event_name); 96 | log_papi_critical(ret_val, msg); 97 | return -1; 98 | } 99 | 100 | return 0; 101 | } 102 | 103 | int pmc_events_start_local_thread() { 104 | int ret_val; 105 | 106 | assert(tls_event_set != PAPI_NULL); 107 | 108 | if ((ret_val = PAPI_start(tls_event_set)) != PAPI_OK) { 109 | log_papi_critical(ret_val, "PMC events start error"); 110 | return -1; 111 | } 112 | 113 | return 0; 114 | } 115 | 116 | void pmc_events_stop_local_thread() { 117 | long long values[MAX_NUM_EVENTS]; 118 | 119 | assert(tls_event_set != PAPI_NULL); 120 | 121 | PAPI_stop(tls_event_set, values); 122 | } 123 | 124 | int pmc_events_read_local_thread(long long *values) { 125 | int ret_val; 126 | // int status = 0; 127 | 128 | assert(values); 129 | 130 | // PAPI_state(event_set, &status); 131 | // if (status != PAPI_RUNNING) { 132 | // DBG_LOG(CRITICAL, "PMC event set not in running state"); 133 | // return -1; 134 | // } 135 | 136 | if ((ret_val = PAPI_read(tls_event_set, values)) != PAPI_OK) { 137 | log_papi_critical(ret_val, "PMC events read error"); 138 | return -1; 139 | } 140 | 141 | if ((ret_val = PAPI_reset(tls_event_set)) != PAPI_OK) { 142 | log_papi_critical(ret_val, "PMC events reset error"); 143 | return -1; 144 | } 145 | 146 | return 0; 147 | } 148 | -------------------------------------------------------------------------------- /src/lib/cpu/pmc-papi.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #ifndef __CPU_PMC_H 15 | #define __CPU_PMC_H 16 | 17 | #include 18 | 19 | 20 | // Usually the architectures support up to 4 counters enabled at the same 21 | // time per core when HT is enabled 22 | #define MAX_NUM_EVENTS 4 23 | 24 | typedef uint64_t (*read_stalls_t)(void); 25 | 26 | typedef struct { 27 | const char **native_events; 28 | read_stalls_t read_stalls_events_local; 29 | read_stalls_t read_stalls_events_remote; 30 | } pmc_event_t; 31 | 32 | int pmc_init(); 33 | void pmc_shutdown(); 34 | int pmc_create_event_set_local_thread(); 35 | void pmc_destroy_event_set_local_thread(); 36 | int pmc_register_event_local_thread(const char *event_name); 37 | int pmc_events_start_local_thread(); 38 | void pmc_events_stop_local_thread(); 39 | int pmc_events_read_local_thread(long long *values); 40 | 41 | int pmc_register_thread(); 42 | int pmc_unregister_thread(); 43 | 44 | #endif /* __CPU_PMC_H */ 45 | -------------------------------------------------------------------------------- /src/lib/cpu/pmc.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #ifndef __CPU_PMC_H 15 | #define __CPU_PMC_H 16 | 17 | #include "cpu/cpu.h" 18 | 19 | #define DECLARE_ENABLE_PMC(prefix, name) int prefix##_create_pmc_##name(struct pmc_events_s* events, struct pmc_event_s* event) 20 | #define DECLARE_CLEAR_PMC(prefix, name) void prefix##_clear_pmc_##name(struct pmc_event_s* event) 21 | #define DECLARE_READ_PMC(prefix, name) uint64_t prefix##_read_pmc_##name(struct pmc_event_s* event) 22 | #define ENABLE_PMC_FNAME(prefix, name) prefix##_create_pmc_##name 23 | #define CLEAR_PMC_FNAME(prefix, name) prefix##_clear_pmc_##name 24 | #define READ_PMC_FNAME(prefix, name) prefix##_read_pmc_##name 25 | 26 | #define PMC_HW_EVENT(name, os_name, encoding) { name, os_name, encoding, 0, 0}, 27 | #define PMC_EVENT(name, prefix) { #name, NULL, 0, 0, ENABLE_PMC_FNAME(prefix, name), CLEAR_PMC_FNAME(prefix, name), READ_PMC_FNAME(prefix, name)}, 28 | 29 | #define PMC_EVENTS_PTR(prefix) &prefix##_pmc_events 30 | 31 | #define PMC_EVENTS(prefix, num_hw_cntrs) \ 32 | pmc_hw_event_t prefix##_known_hw_event[] = { \ 33 | FOREACH_PMC_HW_EVENT(PMC_HW_EVENT) \ 34 | {NULL, NULL, 0, 0, 0} \ 35 | }; \ 36 | pmc_event_t prefix##_known_event[] = { \ 37 | FOREACH_PMC_EVENT(PMC_EVENT, prefix) \ 38 | {NULL, NULL, 0, 0, NULL, NULL, NULL} \ 39 | }; \ 40 | pmc_events_t prefix##_pmc_events = { \ 41 | num_hw_cntrs, \ 42 | prefix##_known_hw_event, \ 43 | prefix##_known_event \ 44 | }; 45 | 46 | #define ASSIGN_PMC_HW_EVENT_TO_ME(name, local_id) \ 47 | if (assign_pmc_hw_event_to_event(events, name, event, local_id) != E_SUCCESS) { \ 48 | release_all_pmc_hw_events_of_event(event); \ 49 | } 50 | 51 | #define READ_MY_HW_EVENT_DIFF(local_id) read_pmc_hw_event_diff(event->hw_events[local_id]) 52 | #define READ_MY_HW_EVENT_CUR(local_id) read_pmc_hw_event_cur(event->hw_events[local_id]) 53 | 54 | typedef struct { 55 | char* name; 56 | char* os_name; // perf name if known 57 | uint64_t encoding; 58 | int active; 59 | int hw_cntr_id; 60 | uint64_t* last_val; // array holding the last read values per processor (useful to calculate the diff since the last read) 61 | } pmc_hw_event_t; 62 | 63 | typedef struct pmc_event_s { 64 | const char* name; 65 | pmc_hw_event_t** hw_events; 66 | int num_hw_events; 67 | int active; 68 | int (*enable)(struct pmc_events_s* events, struct pmc_event_s* event); 69 | void (*clear)(struct pmc_event_s* event); 70 | uint64_t (*read)(struct pmc_event_s* event); 71 | } pmc_event_t; 72 | 73 | typedef struct pmc_events_s { 74 | int num_avail_hw_cntrs; 75 | pmc_hw_event_t* known_hw_events; 76 | pmc_event_t* known_events; 77 | } pmc_events_t; 78 | 79 | pmc_hw_event_t* enable_pmc_hw_event(pmc_events_t* events, const char* name); 80 | void disable_pmc_hw_event(pmc_events_t* events, const char* name); 81 | void clear_pmc_hw_event(pmc_hw_event_t* event); 82 | uint64_t read_pmc_hw_event_cur(pmc_hw_event_t* event); 83 | uint64_t read_pmc_hw_event_diff(pmc_hw_event_t* event); 84 | int assign_pmc_hw_event_to_event(pmc_events_t* events, const char* name, pmc_event_t* event, int local_id); 85 | void release_all_pmc_hw_events_of_event(pmc_event_t* event); 86 | 87 | pmc_event_t* enable_pmc_event(cpu_model_t* cpu, const char* name); 88 | void disable_pmc_event(cpu_model_t* cpu, const char* name); 89 | 90 | static inline void clear_pmc_event(pmc_event_t* event) 91 | { 92 | event->clear(event); 93 | } 94 | 95 | //#include "debug.h" 96 | 97 | static inline uint64_t read_pmc_event(pmc_event_t* event) 98 | { 99 | uint64_t ret; 100 | ret = event->read(event); 101 | return ret; 102 | } 103 | 104 | #endif /* __CPU_PMC_H */ 105 | -------------------------------------------------------------------------------- /src/lib/cpu/sandybridge-papi.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #ifndef __CPU_SANDYBRIDGE_H 15 | #define __CPU_SANDYBRIDGE_H 16 | 17 | #include 18 | #include 19 | #include "debug.h" 20 | 21 | // Perfmon2 is a library that provides a generic interface to access the PMU. It also comes with 22 | // applications to list all available performance events with their architecutre specific 23 | // detailed description and translate them to their respective event code. showevtinfo application can 24 | // be used to list all available performance event names with detailed desciption and check_events application 25 | // can be used to translate the performance event to the corresponding event code. 26 | 27 | // These events will be initialized and started. 28 | // Every event reading will return an array with the values for all these events. 29 | // The array index is the same index used to define the event in the *_native_events array below 30 | const char *sandybridge_native_events[MAX_NUM_EVENTS] = { 31 | "CYCLE_ACTIVITY:STALLS_L2_PENDING", 32 | "MEM_LOAD_UOPS_MISC_RETIRED:LLC_MISS", 33 | "MEM_LOAD_UOPS_RETIRED:L3_HIT", 34 | NULL 35 | }; 36 | 37 | 38 | void sandybridge_latency_calibration_local(int *hw_latency, int target_latency) { 39 | if ((*hw_latency + 10) < target_latency) 40 | *hw_latency += 10; 41 | } 42 | 43 | void sandybridge_latency_calibration_remote(int *hw_latency, int target_latency) { 44 | if ((*hw_latency + 30) < target_latency) 45 | *hw_latency += 30; 46 | } 47 | 48 | uint64_t sandybridge_read_stall_events_local() { 49 | long long values[MAX_NUM_EVENTS]; 50 | uint64_t events = 0; 51 | 52 | if (pmc_events_read_local_thread(values) == PAPI_OK) { 53 | uint64_t cycle_activity_stalls_l2_pending_diff = values[0]; 54 | uint64_t mem_load_uops_misc_retired_llc_miss_diff = values[1]; 55 | uint64_t mem_load_uops_retired_l3_hit_diff = values[2]; 56 | 57 | DBG_LOG(DEBUG, "read stall L2 cycles %lu, LLC miss %lu, L3 hit %lu\n", 58 | cycle_activity_stalls_l2_pending_diff, mem_load_uops_misc_retired_llc_miss_diff, 59 | mem_load_uops_retired_l3_hit_diff); 60 | 61 | uint64_t uden = 7.0 * mem_load_uops_misc_retired_llc_miss_diff + mem_load_uops_retired_l3_hit_diff; 62 | if (uden == 0) { 63 | return 0; 64 | } 65 | double den = uden; 66 | double num = 7.0 * mem_load_uops_misc_retired_llc_miss_diff; 67 | 68 | events = (uint64_t) floorl(cycle_activity_stalls_l2_pending_diff*num/den); 69 | } else { 70 | DBG_LOG(DEBUG, "read stall cycles failed\n"); 71 | } 72 | 73 | return events; 74 | } 75 | 76 | #endif /* __CPU_SANDYBRIDGE_H */ 77 | -------------------------------------------------------------------------------- /src/lib/cpu/sandybridge.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #ifndef __CPU_SANDYBRIDGE_H 15 | #define __CPU_SANDYBRIDGE_H 16 | 17 | #include 18 | #include "thread.h" 19 | #include "cpu/pmc.h" 20 | #include "debug.h" 21 | 22 | // Perfmon2 is a library that provides a generic interface to access the PMU. It also comes with 23 | // applications to list all available performance events with their architecutre specific 24 | // detailed description and translate them to their respective event code. showevtinfo application can 25 | // be used to list all available performance event names with detailed desciption and check_events application 26 | // can be used to translate the performance event to the corresponding event code. 27 | 28 | #undef FOREACH_PMC_HW_EVENT 29 | #define FOREACH_PMC_HW_EVENT(ACTION) \ 30 | ACTION("CYCLE_ACTIVITY:STALLS_L2_PENDING", NULL, 0x55305a3) \ 31 | ACTION("MEM_LOAD_UOPS_MISC_RETIRED:LLC_MISS", NULL, 0x5302d4) \ 32 | ACTION("MEM_LOAD_UOPS_RETIRED:L3_HIT", NULL, 0x5304d1) \ 33 | ACTION("INSTRUCTION_RETIRED", NULL, 0x5300c0) 34 | 35 | #undef FOREACH_PMC_EVENT 36 | #define FOREACH_PMC_EVENT(ACTION, prefix) \ 37 | ACTION(ldm_stall_cycles, prefix) 38 | 39 | 40 | DECLARE_ENABLE_PMC(sandybridge, ldm_stall_cycles) 41 | { 42 | ASSIGN_PMC_HW_EVENT_TO_ME("CYCLE_ACTIVITY:STALLS_L2_PENDING", 0); 43 | ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_MISC_RETIRED:LLC_MISS", 1); 44 | //ASSIGN_PMC_HW_EVENT_TO_ME("INSTRUCTION_RETIRED", 2); 45 | ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_RETIRED:L3_HIT", 2); 46 | 47 | return E_SUCCESS; 48 | } 49 | 50 | DECLARE_CLEAR_PMC(sandybridge, ldm_stall_cycles) 51 | { 52 | } 53 | 54 | DECLARE_READ_PMC(sandybridge, ldm_stall_cycles) 55 | { 56 | //return 0; 57 | uint64_t cycle_activity_stalls_l2_pending_diff = READ_MY_HW_EVENT_DIFF(0); 58 | uint64_t mem_load_uops_misc_retired_llc_miss_diff = READ_MY_HW_EVENT_DIFF(1); 59 | uint64_t mem_load_uops_retired_l3_hit_diff = READ_MY_HW_EVENT_DIFF(2); 60 | 61 | //return floor(cycle_activity_stalls_l2_pending_diff * (((double) (7*mem_load_uops_misc_retired_llc_miss_diff))/((double)(7*mem_load_uops_misc_retired_llc_miss_diff + mem_load_uops_retired_l3_hit_diff)))); 62 | uint64_t uden = 7.0 * mem_load_uops_misc_retired_llc_miss_diff + mem_load_uops_retired_l3_hit_diff; 63 | if (uden == 0) { 64 | return 0; 65 | } 66 | double den = uden; 67 | double num = 7.0 * mem_load_uops_misc_retired_llc_miss_diff; 68 | 69 | return (uint64_t) floorl(cycle_activity_stalls_l2_pending_diff*num/den); 70 | } 71 | 72 | 73 | PMC_EVENTS(sandybridge, 4) 74 | #endif /* __CPU_SANDYBRIDGE_H */ 75 | -------------------------------------------------------------------------------- /src/lib/cpu/xeon-ex.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #include "dev.h" 15 | 16 | #ifdef PAPI_SUPPORT 17 | #include "sandybridge-papi.h" 18 | #include "ivybridge-papi.h" 19 | #include "haswell-papi.h" 20 | #else 21 | #include "sandybridge.h" 22 | #include "ivybridge.h" 23 | #include "haswell.h" 24 | #endif 25 | 26 | int intel_xeon_ex_set_throttle_register(pci_regs_t *regs, throttle_type_t throttle_type, uint16_t val) 27 | { 28 | int offset; 29 | int i; 30 | 31 | switch(throttle_type) { 32 | case THROTTLE_DDR_ACT: 33 | offset = 0x190; break; 34 | case THROTTLE_DDR_READ: 35 | offset = 0x192; break; 36 | case THROTTLE_DDR_WRITE: 37 | offset = 0x194; break; 38 | default: 39 | offset = 0x190; 40 | } 41 | 42 | // write to all 4 channels 43 | 44 | // first Activate throttling 45 | /*set_pci(bus_id, 0x10, 0x0, 0x190, (uint16_t) val); 46 | set_pci(bus_id, 0x10, 0x1, 0x190, (uint16_t) val); 47 | set_pci(bus_id, 0x10, 0x4, 0x190, (uint16_t) val); 48 | set_pci(bus_id, 0x10, 0x5, 0x190, (uint16_t) val);*/ 49 | 50 | // then the Read or Write throttling 51 | for (i=0; i < regs->channels; ++i) { 52 | set_pci(regs->addr[i].bus_id, regs->addr[i].dev_id, regs->addr[i].funct, offset, (uint16_t) val); 53 | } 54 | 55 | return 0; 56 | } 57 | 58 | int intel_xeon_ex_get_throttle_register(pci_regs_t *regs, throttle_type_t throttle_type, uint16_t* val) 59 | { 60 | int offset; 61 | 62 | switch(throttle_type) { 63 | case THROTTLE_DDR_ACT: 64 | offset = 0x190; break; 65 | case THROTTLE_DDR_READ: 66 | offset = 0x192; break; 67 | case THROTTLE_DDR_WRITE: 68 | offset = 0x194; break; 69 | default: 70 | offset = 0x190; 71 | } 72 | 73 | // read just channel 1 74 | get_pci(regs->addr[0].bus_id, regs->addr[0].dev_id, regs->addr[0].funct, offset, val); 75 | return 0; 76 | } 77 | 78 | 79 | // desc is fixed in cpu_model() if not Xeon 80 | 81 | cpu_model_t cpu_model_intel_xeon_ex = { 82 | .microarch = SandyBridgeXeon, 83 | #ifdef PAPI_SUPPORT 84 | .pmc_events = {sandybridge_native_events, sandybridge_read_stall_events_local, NULL}, 85 | #else 86 | .pmc_events = PMC_EVENTS_PTR(sandybridge), 87 | #endif 88 | .set_throttle_register = intel_xeon_ex_set_throttle_register, 89 | .get_throttle_register = intel_xeon_ex_get_throttle_register 90 | }; 91 | 92 | cpu_model_t cpu_model_intel_xeon_ex_v2 = { 93 | .microarch = IvyBridgeXeon, 94 | #ifdef PAPI_SUPPORT 95 | .pmc_events = {ivybridge_native_events, ivybridge_read_stall_events_local, ivybridge_read_stall_events_remote}, 96 | #else 97 | .pmc_events = PMC_EVENTS_PTR(ivybridge), 98 | #endif 99 | .set_throttle_register = intel_xeon_ex_set_throttle_register, 100 | .get_throttle_register = intel_xeon_ex_get_throttle_register 101 | }; 102 | 103 | cpu_model_t cpu_model_intel_xeon_ex_v3 = { 104 | .microarch = HaswellXeon, 105 | #ifdef PAPI_SUPPORT 106 | .pmc_events = {haswell_native_events, haswell_read_stall_events_local, haswell_read_stall_events_remote}, 107 | #else 108 | .pmc_events = PMC_EVENTS_PTR(haswell), 109 | #endif 110 | .set_throttle_register = intel_xeon_ex_set_throttle_register, 111 | .get_throttle_register = intel_xeon_ex_get_throttle_register 112 | }; 113 | -------------------------------------------------------------------------------- /src/lib/debug.c: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #include "debug.h" 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include "config.h" 20 | 21 | 22 | int dbg_modules[dbg_module_count]; 23 | int dbg_level = 0; 24 | int dbg_verbose = 0; 25 | const char* dbg_identifier = ""; 26 | static char dbg_identifier_buf[128]; 27 | 28 | static int 29 | strrep(char *target, char *source, char oldc, char newc) 30 | { 31 | int i; 32 | 33 | for (i=0; source[i]; i++) { 34 | if (source[i] == oldc) { 35 | target[i] = newc; 36 | } else { 37 | target[i] = source[i]; 38 | } 39 | } 40 | target[i] = '\0'; 41 | return 0; 42 | } 43 | 44 | 45 | void 46 | dbg_set_level(int level) 47 | { 48 | dbg_level = level; 49 | } 50 | 51 | 52 | int 53 | dbg_init(config_t* dbg_cfg, int level, const char* identifier) 54 | { 55 | // if user hasn't provided a debugging level then get it from the 56 | // configuration env/file 57 | if (level < 0) { 58 | __cconfig_lookup_int(dbg_cfg, "debug.level", &dbg_level); 59 | } else { 60 | dbg_level = level; 61 | } 62 | 63 | __cconfig_lookup_int(dbg_cfg, "debug.verbose", &dbg_verbose); 64 | 65 | // if user hasn't provide an identifier then check whether the environment 66 | // provides one, othewise create one based on process' pid 67 | if (!identifier) { 68 | dbg_identifier = getenv("DEBUG_IDENTIFIER"); 69 | if (!dbg_identifier) { 70 | sprintf(dbg_identifier_buf, "%d", getpid()); 71 | dbg_identifier = dbg_identifier_buf; 72 | } 73 | } else { 74 | dbg_identifier = identifier; 75 | } 76 | 77 | 78 | // read per module debugging flags 79 | #define STR(name) #name 80 | #define ACTION(name) \ 81 | do { \ 82 | char dotstr[128]; \ 83 | strrep(dotstr, STR(debug_module_##name), '_', '.'); \ 84 | __cconfig_lookup_bool(dbg_cfg, dotstr, \ 85 | &dbg_modules[dbg_module_##name]); \ 86 | } while (0); 87 | 88 | FOREACH_DEBUG_MODULE(ACTION) 89 | #undef ACTION 90 | DBG_LOG(DEBUG, ""); // prevent compiler warning 91 | return 0; 92 | } 93 | 94 | 95 | void 96 | dbg_backtrace (void) 97 | { 98 | void *array[10]; 99 | size_t size; 100 | char **strings; 101 | size_t i; 102 | 103 | size = backtrace (array, 10); 104 | strings = backtrace_symbols (array, size); 105 | 106 | printf ("Obtained %zd stack frames.\n", size); 107 | 108 | for (i = 0; i < size; i++) 109 | printf ("%s\n", strings[i]); 110 | free (strings); 111 | } 112 | -------------------------------------------------------------------------------- /src/lib/debug.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #ifndef __DEBUG_H 15 | #define __DEBUG_H 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include "config.h" 22 | 23 | #define FOREACH_DEBUG_MODULE(ACTION) \ 24 | ACTION(all) /* special name that covers all modules */ 25 | 26 | 27 | #define ACTION(name) \ 28 | dbg_module_##name, 29 | 30 | enum { 31 | FOREACH_DEBUG_MODULE(ACTION) 32 | dbg_module_count 33 | }; 34 | #undef ACTION 35 | 36 | #ifndef NDEBUG 37 | #define DBG_CODE(code) DBG_##code 38 | 39 | enum dbg_code { 40 | DBG_OFF = 0, 41 | DBG_CODE(CRITICAL) = 1, // Critical 42 | DBG_CODE(ERROR) = 2, // Error 43 | DBG_CODE(WARNING) = 3, // Warning 44 | DBG_CODE(INFO) = 4, // Info 45 | DBG_CODE(DEBUG) = 5, // Debugging 46 | }; 47 | 48 | static const char* dbg_code2str[] = { 49 | (char*) "OFF", 50 | (char*) "CRITICAL", 51 | (char*) "ERROR", 52 | (char*) "WARNING", 53 | (char*) "INFO", 54 | (char*) "DEBUG", 55 | }; 56 | 57 | static const int dbg_terminate_level = DBG_ERROR; 58 | static const int dbg_stderr_level = DBG_WARNING; 59 | 60 | extern int dbg_modules[]; 61 | extern int dbg_level; 62 | extern int dbg_verbose; 63 | extern const char* dbg_identifier; 64 | 65 | #define DBG_MODULE(name) dbg_module_##name 66 | 67 | #define DBG_LOG(level, format, ...) \ 68 | do { \ 69 | FILE* ferr = stdout; \ 70 | time_t ctime; \ 71 | if (DBG_CODE(level) && (DBG_CODE(level) <= dbg_level || \ 72 | DBG_CODE(level) <= dbg_terminate_level)) \ 73 | { \ 74 | if (DBG_CODE(level) <= dbg_stderr_level) { \ 75 | ferr=stderr; \ 76 | } \ 77 | if (dbg_verbose) { \ 78 | ctime = time(NULL); \ 79 | fprintf(ferr, "[%s] [%lu] %s in %s <%s,%d>: " format, \ 80 | dbg_identifier, \ 81 | ctime, \ 82 | dbg_code2str[DBG_CODE(level)], \ 83 | __FUNCTION__, __FILE__, __LINE__, ##__VA_ARGS__); \ 84 | } else { \ 85 | fprintf(ferr, "[%s] %s: " format, \ 86 | dbg_identifier, \ 87 | dbg_code2str[DBG_CODE(level)], \ 88 | ##__VA_ARGS__); \ 89 | } \ 90 | if (DBG_CODE(level) <= dbg_terminate_level) { \ 91 | exit(-1); \ 92 | } \ 93 | } \ 94 | } while(0); 95 | 96 | 97 | #define DBG_LOG2(level, module, format, ...) \ 98 | do { \ 99 | FILE* ferr = stdout; \ 100 | if (DBG_CODE(level) && \ 101 | (dbg_modules[module] || dbg_modules[dbg_module_all] || \ 102 | DBG_CODE(level) <= dbg_terminate_level) && \ 103 | (DBG_CODE(level) <= dbg_level || \ 104 | DBG_CODE(level) <= dbg_terminate_level)) \ 105 | { \ 106 | if (DBG_CODE(level) <= dbg_stderr_level) { \ 107 | ferr=stderr; \ 108 | } \ 109 | fprintf(ferr, "[%s] %s in %s <%s,%d>: " format, \ 110 | dbg_identifier, \ 111 | dbg_code2str[DBG_CODE(level)], \ 112 | __FUNCTION__, __FILE__, __LINE__, ##__VA_ARGS__); \ 113 | if (DBG_CODE(level) <= dbg_terminate_level) { \ 114 | exit(-1); \ 115 | } \ 116 | } \ 117 | } while(0); 118 | 119 | #else /* NDEBUG */ 120 | 121 | #define DBG_LOG(level, format, ...) 122 | #define DBG_LOG2(level, module, format, ...) 123 | 124 | #endif /* NDEBUG */ 125 | 126 | 127 | #define VERIFY(condition) \ 128 | do { \ 129 | if (!(condition)) { \ 130 | fprintf(stderr, "Assumption \"%s\"\nFailed in file %s: at line:%i\n", \ 131 | #condition,__FILE__,__LINE__); \ 132 | DBG_LOG (DBG_CRITICAL, #condition);} \ 133 | fflush(stderr); \ 134 | } while (0); 135 | 136 | 137 | int dbg_init(config_t* dbg_cfg, int level, const char* identifier); 138 | void dbg_backtrace (void); 139 | void dbg_set_level(int level); 140 | 141 | #endif // __DEBUG_H 142 | -------------------------------------------------------------------------------- /src/lib/dev.c: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include "dev/ioctl_query.h" 22 | #include "error.h" 23 | #include "dev.h" 24 | 25 | // TODO: get this value from the config file 26 | #define DEV_PATH "/dev/nvmemul" 27 | 28 | int set_counter(unsigned int counter_id, unsigned int event_id) 29 | { 30 | int fd; 31 | int ret; 32 | 33 | ioctl_query_setcounter_t q; 34 | fd = open(DEV_PATH, O_RDONLY); 35 | if (fd < 0) { 36 | DBG_LOG(ERROR, "Can't open %s - Is the NVM emulator device driver installed?\n", DEV_PATH); 37 | return E_ERROR; 38 | } 39 | q.counter_id = counter_id; 40 | q.event_id = event_id; 41 | if ((ret = ioctl(fd, IOCTL_SETCOUNTER, &q)) < 0) { 42 | close(fd); 43 | return E_ERROR; 44 | } 45 | close(fd); 46 | return E_SUCCESS; 47 | } 48 | 49 | 50 | int set_pci(unsigned int bus_id, unsigned int device_id, unsigned int function_id, unsigned int offset, uint16_t val) 51 | { 52 | int fd; 53 | int ret; 54 | 55 | ioctl_query_setgetpci_t q; 56 | fd = open(DEV_PATH, O_RDONLY); 57 | if (fd < 0) { 58 | DBG_LOG(ERROR, "Can't open %s - Is the NVM emulator device driver installed?\n", DEV_PATH); 59 | return E_ERROR; 60 | } 61 | q.bus_id = bus_id; 62 | q.device_id = device_id; 63 | q.function_id = function_id; 64 | q.offset = offset; 65 | q.val = val; 66 | if ((ret = ioctl(fd, IOCTL_SETPCI, &q)) < 0) { 67 | close(fd); 68 | return E_ERROR; 69 | } 70 | close(fd); 71 | return E_SUCCESS; 72 | } 73 | 74 | int get_pci(unsigned int bus_id, unsigned int device_id, unsigned int function_id, unsigned int offset, uint16_t* val) 75 | { 76 | int fd; 77 | int ret; 78 | 79 | ioctl_query_setgetpci_t q; 80 | fd = open(DEV_PATH, O_RDWR); 81 | if (fd < 0) { 82 | DBG_LOG(ERROR, "Can't open %s - Is the NVM emulator device driver installed?\n", DEV_PATH); 83 | return E_ERROR; 84 | } 85 | q.bus_id = bus_id; 86 | q.device_id = device_id; 87 | q.function_id = function_id; 88 | q.offset = offset; 89 | q.val = 0; 90 | if ((ret = ioctl(fd, IOCTL_GETPCI, &q)) < 0) { 91 | close(fd); 92 | return E_ERROR; 93 | } 94 | *val = q.val; 95 | close(fd); 96 | return E_SUCCESS; 97 | } 98 | 99 | 100 | -------------------------------------------------------------------------------- /src/lib/dev.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #ifndef __DEVICE_DRIVER_API_H 15 | #define __DEVICE_DRIVER_API_H 16 | 17 | #include 18 | 19 | #define MAX_NUM_MC_PCI_BUS 16 20 | #define MAX_NUM_MC_CHANNELS 16 21 | 22 | typedef struct { 23 | unsigned int bus_id; 24 | unsigned int dev_id; 25 | unsigned int funct; 26 | } pci_addr; 27 | 28 | typedef struct { 29 | pci_addr addr[MAX_NUM_MC_CHANNELS]; 30 | unsigned int channels; 31 | } pci_regs_t; 32 | 33 | int set_counter(unsigned int counter_id, unsigned int event_id); 34 | int set_pci(unsigned bus_id, unsigned int device_id, unsigned int function_id, unsigned int offset, uint16_t val); 35 | int get_pci(unsigned bus_id, unsigned int device_id, unsigned int function_id, unsigned int offset, uint16_t* val); 36 | 37 | #endif /* __DEVICE_DRIVER_API_H */ 38 | -------------------------------------------------------------------------------- /src/lib/errno.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #ifndef __ERRNO_H 15 | #define __ERRNO_H 16 | 17 | #ifdef __DEFINE_ERRNO 18 | # error "__DEFINE_ERRNO previously defined" 19 | #endif 20 | 21 | /* 22 | * Define error codes and error messages here 23 | */ 24 | #define __DEFINE_ERRNO(ACTION) \ 25 | ACTION(E_SUCCESS, "Success") \ 26 | ACTION(E_ERROR, "Generic error") \ 27 | ACTION(E_NOMEM, "No memory") \ 28 | ACTION(E_EXIST, "Name already exists") \ 29 | ACTION(E_NOENT, "Name does not exist") \ 30 | ACTION(E_INVAL, "Invalid argument") \ 31 | ACTION(E_BUSY, "Resource busy") \ 32 | ACTION(E_NOTEMPTY, "Not empty") \ 33 | ACTION(E_ERRNO, "Standard C library error; check errno for details") 34 | 35 | 36 | #ifdef __ENUM_MEMBER 37 | # error "__ENUM_MEMBER previously defined" 38 | #endif 39 | 40 | #define __ENUM_MEMBER(name, str) name, 41 | 42 | enum { 43 | __DEFINE_ERRNO(__ENUM_MEMBER) 44 | E_MAXERRNO 45 | }; 46 | 47 | #undef __ENUM_MEMBER /* don't polute the macro namespace */ 48 | 49 | #ifdef __ERRNO_STRING 50 | # error "__ERRNO_STRING previously defined" 51 | #endif 52 | 53 | #define __ERRNO_STRING(name, str) str, 54 | 55 | /* 56 | TODO: not used for now 57 | static const char* 58 | ErrorToString(int err) { 59 | static const char* errstr[] = { 60 | __DEFINE_ERRNO(__ERRNO_STRING) 61 | "Unknown error code" 62 | }; 63 | if (err >= 0 && err < E_MAXERRNO) { 64 | return errstr[err]; 65 | } 66 | return errstr[E_MAXERRNO]; 67 | } 68 | */ 69 | #undef __ERRNO_STRING /* don't polute the macro namespace */ 70 | #undef __DEFINE_ERRNO /* don't polute the macro namespace */ 71 | 72 | #endif /* __ERRNO_H */ 73 | -------------------------------------------------------------------------------- /src/lib/error.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #ifndef __ERROR_H 15 | #define __ERROR_H 16 | 17 | #include "errno.h" 18 | #include "debug.h" 19 | 20 | #endif /* __ERROR_H */ 21 | -------------------------------------------------------------------------------- /src/lib/init.c: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #include 15 | #include "cpu/cpu.h" 16 | #include "config.h" 17 | #include "error.h" 18 | #include "model.h" 19 | #include "measure.h" 20 | #include "thread.h" 21 | #include "topology.h" 22 | #include "interpose.h" 23 | #include "monotonic_timer.h" 24 | #include "pflush.h" 25 | #include "stat.h" 26 | 27 | static void init() __attribute__((constructor)); 28 | static void finalize() __attribute__((destructor)); 29 | 30 | int set_process_local_rank(); 31 | int unset_process_local_rank(); 32 | int partition_cpus(virtual_topology_t* virtual_topology); 33 | 34 | static virtual_topology_t* virtual_topology = NULL; 35 | 36 | void finalize() { 37 | int i; 38 | if (latency_model.enabled) { 39 | unregister_self(); 40 | } 41 | 42 | if (read_bw_model.enabled) { 43 | for (i=0; i < virtual_topology->num_virtual_nodes; i++) { 44 | // FIXME: currently we keep a single bandwidth model and not per-node BW model 45 | physical_node_t* phys_node = virtual_topology->virtual_nodes[i].nvram_node; 46 | pci_regs_t *regs = phys_node->mc_pci_regs; 47 | 48 | // reset throttling 49 | phys_node->cpu_model->set_throttle_register(regs, THROTTLE_DDR_ACT, 0x8FFF); 50 | } 51 | } 52 | #ifdef USE_STATISTICS 53 | stats_report(); 54 | #endif 55 | // finalize libraries and release resources 56 | #ifdef PAPI_SUPPORT 57 | pmc_shutdown(); 58 | #endif 59 | 60 | unset_process_local_rank(); 61 | 62 | //__cconfig_destroy(&cfg); 63 | } 64 | 65 | void init() 66 | { 67 | config_t cfg; 68 | cpu_model_t* cpu; 69 | char* ld_preload_path; 70 | double start_time, end_time; 71 | #ifdef CALIBRATION_SUPPORT 72 | int i; 73 | #endif 74 | 75 | // FIXME: do we need to register the main thread with our system? 76 | // YES: for sure for single-threaded apps 77 | 78 | start_time = monotonic_time_us(); 79 | 80 | // we reset LD_PRELOAD to ensure we don't get into recursive preloads when 81 | // calling popen during initialization. before exiting we reactivate LD_PRELOAD 82 | // to allow LD_PRELOADS on children 83 | ld_preload_path = getenv("LD_PRELOAD"); 84 | unsetenv("LD_PRELOAD"); 85 | 86 | if (__cconfig_init(&cfg, "nvmemul.ini") == CONFIG_FALSE) { 87 | goto error; 88 | } 89 | 90 | __cconfig_lookup_bool(&cfg, "latency.enable", &latency_model.enabled); 91 | __cconfig_lookup_bool(&cfg, "bandwidth.enable", &read_bw_model.enabled); 92 | 93 | if (dbg_init(&cfg, -1, NULL) != E_SUCCESS) { 94 | goto error; 95 | } 96 | 97 | if (init_interposition() != E_SUCCESS) { 98 | goto error; 99 | } 100 | 101 | if ((cpu = cpu_model()) == NULL) { 102 | DBG_LOG(ERROR, "No supported processor found\n"); 103 | goto error; 104 | } 105 | 106 | init_virtual_topology(&cfg, cpu, &virtual_topology); 107 | 108 | if (init_bandwidth_model(&cfg, virtual_topology) != E_SUCCESS) { 109 | goto error; 110 | } 111 | 112 | if (latency_model.enabled) { 113 | if (init_latency_model(&cfg, cpu, virtual_topology) != E_SUCCESS) { 114 | goto error; 115 | } 116 | 117 | init_thread_manager(&cfg, virtual_topology); 118 | 119 | #ifdef USE_STATISTICS 120 | // statistics makes use of the thread manager and is used by the register_self() 121 | stats_enable(&cfg); 122 | #endif 123 | 124 | set_process_local_rank(); 125 | 126 | // thread manager must be initialized and local rank set 127 | // CPU partitioning must be made before the first thread is registered 128 | if (partition_cpus(virtual_topology) != E_SUCCESS) { 129 | goto error; 130 | } 131 | 132 | if (register_self() != E_SUCCESS) { 133 | goto error; 134 | } 135 | 136 | #ifdef CALIBRATION_SUPPORT 137 | // main thread is now tracked by the latency emulator 138 | // first, calibrate the latency emulation 139 | if (latency_model.calibration) { 140 | for (i = 0; i < virtual_topology->num_virtual_nodes; ++i) { 141 | latency_calibration(&virtual_topology->virtual_nodes[i]); 142 | } 143 | } 144 | #endif 145 | int write_latency; 146 | __cconfig_lookup_int(&cfg, "latency.write", &write_latency); 147 | init_pflush(cpu_speed_mhz(), write_latency); 148 | } 149 | 150 | end_time = monotonic_time_us(); 151 | 152 | #ifdef USE_STATISTICS 153 | if (latency_model.enabled) { 154 | stats_set_init_time(end_time - start_time); 155 | } 156 | #endif 157 | 158 | if (ld_preload_path) 159 | setenv("LD_PRELOAD", ld_preload_path, 1); 160 | 161 | return; 162 | 163 | error: 164 | /* Cannot initialize library -- catastrophic error */ 165 | if (ld_preload_path) 166 | setenv("LD_PRELOAD", ld_preload_path, 1); 167 | 168 | fprintf(stderr, "ERROR: nvmemul: Initialization failed. Running without non-volatile memory emulation.\n"); 169 | } 170 | -------------------------------------------------------------------------------- /src/lib/interpose.c: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #define _GNU_SOURCE 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include "error.h" 21 | #include "model.h" 22 | #include "thread.h" 23 | #include "cpu/cpu.h" 24 | #ifdef PAPI_SUPPORT 25 | #include "cpu/pmc-papi.h" 26 | #else 27 | #include "cpu/pmc.h" 28 | #endif 29 | 30 | 31 | // WARNING: Our library MUST directly use the functions we interpose on by 32 | // calling __lib_X to avoid interposition on ourselves. 33 | 34 | 35 | int (*__lib_pthread_create)(pthread_t *thread, const pthread_attr_t *attr, 36 | void *(*start_routine) (void *), void *arg); 37 | int (*__lib_pthread_mutex_lock)(pthread_mutex_t *mutex); 38 | int (*__lib_pthread_mutex_trylock)(pthread_mutex_t *mutex); 39 | int (*__lib_pthread_mutex_unlock)(pthread_mutex_t *mutex); 40 | int (*__lib_pthread_detach)(pthread_t thread); 41 | 42 | extern inline hrtime_t hrtime_cycles(void); 43 | extern inline int cycles_to_us(cpu_model_t* cpu, hrtime_t cycles); 44 | 45 | 46 | int init_interposition() 47 | { 48 | char *error; 49 | // if no symbol is returned then no interposition needed 50 | __lib_pthread_create = dlsym(RTLD_NEXT, "pthread_create"); 51 | __lib_pthread_mutex_lock = dlsym(RTLD_NEXT, "pthread_mutex_lock"); 52 | __lib_pthread_mutex_trylock = dlsym(RTLD_NEXT, "pthread_mutex_trylock"); 53 | __lib_pthread_mutex_unlock = dlsym(RTLD_NEXT, "pthread_mutex_unlock"); 54 | __lib_pthread_detach = dlsym(RTLD_NEXT, "pthread_detach"); 55 | 56 | if (__lib_pthread_mutex_lock == NULL || __lib_pthread_mutex_unlock == NULL || 57 | __lib_pthread_create == NULL || __lib_pthread_mutex_trylock == NULL || 58 | __lib_pthread_detach == NULL) { 59 | error = dlerror(); 60 | DBG_LOG(ERROR, "Interposition failed: %s\n", error != NULL ? error : "unknown reason"); 61 | return E_ERROR; 62 | } 63 | 64 | return E_SUCCESS; 65 | } 66 | 67 | 68 | // Interposing on pthread_create requires interposing on the thread created as we 69 | // require the TID of that thread which we can only get by executing the gettid() 70 | // system call from that thread. So we interpose on the start_routine which is 71 | // called by the new thread 72 | typedef struct { 73 | void *(*start_routine) (void *); 74 | void *arg; 75 | } pthread_create_functor_t; 76 | 77 | void* __interposed_start_routine(void* args) 78 | { 79 | void* ret; 80 | pthread_create_functor_t* f = (pthread_create_functor_t*) args; 81 | if (register_self() != E_SUCCESS) { 82 | free(args); 83 | return NULL; 84 | } 85 | ret = f->start_routine(f->arg); 86 | // FIXME: directly calling unregister may miss cases where the 87 | // thread terminates prematurely (such as pthread_exit or cancel) 88 | // consider using a key destructor function instead 89 | //fprintf(stderr, "stall cycles: %lu\n", thread_self()->stall_cycles); 90 | //fprintf(stderr, "signals_sent: %lu signals_recv: %lu\n", thread_self()->signals_sent, thread_self()->signals_recv); 91 | unregister_self(); 92 | free(args); 93 | return ret; 94 | } 95 | 96 | int pthread_create(pthread_t *thread, const pthread_attr_t *attr, 97 | void *(*start_routine) (void *), void *arg) 98 | { 99 | int ret; 100 | 101 | //DBG_LOG(DEBUG, "interposing pthread_create\n"); 102 | 103 | //assert(__lib_pthread_create); 104 | if (__lib_pthread_create == NULL) 105 | init_interposition(); 106 | 107 | if (latency_model.enabled) { 108 | pthread_create_functor_t *functor = malloc(sizeof(pthread_create_functor_t)); 109 | functor->arg = arg; 110 | functor->start_routine = start_routine; 111 | 112 | if ((ret = __lib_pthread_create(thread, attr, __interposed_start_routine, (void*) functor)) != 0) { 113 | DBG_LOG(ERROR, "call to __lib_pthread_create failed\n"); 114 | return ret; 115 | } 116 | } else { 117 | ret = __lib_pthread_create(thread, attr, start_routine, arg); 118 | } 119 | 120 | return ret; 121 | } 122 | 123 | int pthread_mutex_lock(pthread_mutex_t *mutex) 124 | { 125 | int err; 126 | 127 | if (latency_model.enabled) { 128 | if(reached_min_epoch_duration(thread_self())) { 129 | // create new epoch here in order to propagate only the critical session delay to other threads 130 | // the thread monitor will keep trying to create new epoch, unless the min duration has not been reached 131 | create_latency_epoch(); 132 | } 133 | } 134 | 135 | //DBG_LOG(DEBUG, "interposing pthread_mutex_lock\n"); 136 | 137 | //assert(__lib_pthread_mutex_lock); 138 | if (__lib_pthread_mutex_lock == NULL) 139 | init_interposition(); 140 | err = __lib_pthread_mutex_lock(mutex); 141 | 142 | return err; 143 | } 144 | 145 | int pthread_mutex_trylock(pthread_mutex_t *mutex) 146 | { 147 | int err; 148 | 149 | if (latency_model.enabled) { 150 | if(reached_min_epoch_duration(thread_self())) { 151 | create_latency_epoch(); 152 | } 153 | } 154 | 155 | //DBG_LOG(DEBUG, "interposing pthread_mutex_trylock\n"); 156 | 157 | //assert(__lib_pthread_mutex_trylock); 158 | if (__lib_pthread_mutex_trylock == NULL) 159 | init_interposition(); 160 | err = __lib_pthread_mutex_trylock(mutex); 161 | 162 | return err; 163 | } 164 | 165 | int pthread_mutex_unlock(pthread_mutex_t *mutex) 166 | { 167 | int err; 168 | 169 | if (latency_model.enabled) { 170 | if (reached_min_epoch_duration(thread_self())) { 171 | create_latency_epoch(); 172 | } 173 | } 174 | 175 | //DBG_LOG(DEBUG, "interposing pthread_mutex_unlock\n"); 176 | 177 | //assert(__lib_pthread_mutex_unlock); 178 | if (__lib_pthread_mutex_unlock == NULL) 179 | init_interposition(); 180 | err = __lib_pthread_mutex_unlock(mutex); 181 | 182 | return err; 183 | } 184 | -------------------------------------------------------------------------------- /src/lib/interpose.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #ifndef __INTERPOSE_H 15 | #define __INTERPOSE_H 16 | 17 | 18 | /** 19 | * 20 | * \page library_interposition Library interposition 21 | * 22 | * The emulator intercepts several events of interest. It achieves this 23 | * by interposing on corresponding functions. 24 | * Currently this includes thread creation and POSIX synchronization mechanisms. 25 | */ 26 | 27 | extern int (*__lib_pthread_create)(pthread_t *thread, const pthread_attr_t *attr, 28 | void *(*start_routine) (void *), void *arg); 29 | extern int (*__lib_pthread_mutex_lock)(pthread_mutex_t *mutex); 30 | extern int (*__lib_pthread_mutex_trylock)(pthread_mutex_t *mutex); 31 | extern int (*__lib_pthread_mutex_unlock)(pthread_mutex_t *mutex); 32 | extern int (*__lib_pthread_detach)(pthread_t thread); 33 | 34 | int init_interposition(); 35 | 36 | #endif /* __INTERPOSE_H */ 37 | -------------------------------------------------------------------------------- /src/lib/measure.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #ifndef __MEASURE_H 15 | #define __MEASURE_H 16 | 17 | /** 18 | * \file 19 | * 20 | * Memory latency and bandwidth measurements 21 | */ 22 | 23 | /** 24 | * \brief Measure memory read bandwidth 25 | * 26 | * Measures memory read bandwidth from a local socket (cpu_node) 27 | * to the memory of a remote socket (mem_node). It does this 28 | * by firing a bunch of threads issuing streaming instructions 29 | * to saturate memory bandwidth. 30 | */ 31 | double measure_read_bw(int cpu_node, int mem_node); 32 | 33 | /** 34 | * \brief Measure memory write bandwidth 35 | * 36 | * Measures memory write bandwidth from a local socket (cpu_node) 37 | * to the memory of a remote socket (mem_node). 38 | * See measure_read_bw for how this is done. 39 | */ 40 | double measure_write_bw(int cpu_node, int mem_node); 41 | 42 | 43 | /** 44 | * \brief Measure memory latency 45 | * 46 | * Measures memory read latency from one local socket to the memory of a 47 | * remote socket. It does this using a pointer chasing microbenchmark. 48 | * The microbenchmark setups an array where each element determines the 49 | * element to be read next. 50 | */ 51 | int measure_latency(cpu_model_t* cpu, int from_node_id, int to_node_id); 52 | 53 | /** 54 | * \brief Calibrate memory latency 55 | * 56 | * Automatically tweaks the memory latency based on the detected hardware latency 57 | * on the target systems. 58 | */ 59 | void latency_calibration(); 60 | 61 | #endif 62 | -------------------------------------------------------------------------------- /src/lib/misc.c: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | 20 | #include 21 | size_t string_to_size(char* str) 22 | { 23 | size_t factor = 1; 24 | size_t size; 25 | long val; 26 | char* endptr = 0; 27 | 28 | val = strtoull(str, &endptr, 10); 29 | while(endptr && (endptr - str) < strlen(str) && !isalpha(*endptr)) {endptr++;} 30 | 31 | switch (endptr[0]) { 32 | case 'K': case 'k': 33 | factor = 1024LLU; 34 | break; 35 | case 'M': case 'm': 36 | factor = 1024LLU*1024LLU; 37 | break; 38 | case 'G': case 'g': 39 | factor = 1024LLU*1024LLU*1024LLU; 40 | break; 41 | default: 42 | factor = 1; 43 | } 44 | size = factor * val; 45 | return size; 46 | } 47 | -------------------------------------------------------------------------------- /src/lib/misc.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #ifndef __MISC_H 15 | #define __MISC_H 16 | 17 | size_t string_to_size(char* str); 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /src/lib/model.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #ifndef __MODEL_H 15 | #define __MODEL_H 16 | 17 | #include "config.h" 18 | #include "cpu/cpu.h" 19 | #include "thread.h" 20 | #ifdef PAPI_SUPPORT 21 | #include "cpu/pmc-papi.h" 22 | #else 23 | #include "cpu/pmc.h" 24 | #endif 25 | 26 | #define MAX_EPOCH_DURATION_US 1000000 27 | #define MIN_EPOCH_DURATION_US 1 28 | 29 | typedef struct { 30 | int enabled; 31 | int read_latency; 32 | int write_latency; 33 | int inject_delay; 34 | #ifdef CALIBRATION_SUPPORT 35 | int calibration; 36 | #endif 37 | #ifdef PAPI_SUPPORT 38 | read_stalls_t pmc_stall_local; 39 | read_stalls_t pmc_stall_remote; 40 | #else 41 | pmc_event_t* pmc_stall_cycles; 42 | pmc_event_t* pmc_remote_dram; 43 | int process_local_rank; 44 | int max_local_processe_ranks; 45 | #endif 46 | 47 | double stalls_calibration_factor; 48 | } latency_model_t; 49 | 50 | extern latency_model_t latency_model; 51 | 52 | typedef struct { 53 | unsigned int throttle_reg_val[MAX_THROTTLE_VALUE]; 54 | double bandwidth[MAX_THROTTLE_VALUE]; 55 | int npoints; 56 | int enabled; 57 | } bw_model_t; 58 | 59 | extern bw_model_t read_bw_model; 60 | extern bw_model_t write_bw_model; 61 | 62 | int init_bandwidth_model(config_t* cfg, struct virtual_topology_s* topology); 63 | int init_latency_model(config_t* cfg, cpu_model_t* cpu, struct virtual_topology_s* virtual_topology); 64 | void init_thread_latency_model(thread_t *thread); 65 | 66 | void create_latency_epoch(); 67 | 68 | #endif /* __MODEL_H */ 69 | -------------------------------------------------------------------------------- /src/lib/monotonic_timer.c: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Alex Reece. 2 | // 3 | // A cross platform monotonic timer. 4 | 5 | #include 6 | #include "monotonic_timer.h" 7 | 8 | #if _POSIX_TIMERS > 0 && defined(_POSIX_MONOTONIC_CLOCK) 9 | // If we have it, use clock_gettime and CLOCK_MONOTONIC. 10 | 11 | #include 12 | 13 | double monotonic_time() { 14 | struct timespec time; 15 | // Note: Make sure to link with -lrt to define clock_gettime. 16 | clock_gettime(CLOCK_MONOTONIC, &time); 17 | return ((double) time.tv_sec) + ((double) time.tv_nsec / (NANOS_PER_SECF)); 18 | } 19 | 20 | double monotonic_time_us() { 21 | struct timespec time; 22 | // Note: Make sure to link with -lrt to define clock_gettime. 23 | clock_gettime(CLOCK_MONOTONIC, &time); 24 | return ((double) (time.tv_sec * USECS_PER_SEC)) + ((double) time.tv_nsec / NANOS_PER_USECF); 25 | } 26 | 27 | #else 28 | // Fall back to rdtsc. The reason we don't use clock() is this scary message 29 | // from the man page: 30 | // "On several other implementations, the value returned by clock() also 31 | // includes the times of any children whose status has been collected via 32 | // wait(2) (or another wait-type call)." 33 | // 34 | // Also, clock() only has microsecond accuracy. 35 | // 36 | // This whitepaper offered excellent advice on how to use rdtscp for 37 | // profiling: http://download.intel.com/embedded/software/IA/324264.pdf 38 | // 39 | // Unfortunately, we can't follow its advice exactly with our semantics, 40 | // so we're just going to use rdtscp with cpuid. 41 | // 42 | // Note that rdtscp will only be available on new processors. 43 | 44 | #include 45 | 46 | static inline uint64_t rdtsc() { 47 | uint32_t hi, lo; 48 | asm volatile("rdtscp\n" 49 | "movl %%edx, %0\n" 50 | "movl %%eax, %1\n" 51 | "cpuid" 52 | : "=r" (hi), "=r" (lo) : : "%rax", "%rbx", "%rcx", "%rdx"); 53 | return (((uint64_t)hi) << 32) | (uint64_t)lo; 54 | } 55 | 56 | static uint64_t rdtsc_per_sec = 0; 57 | static uint64_t rdtsc_per_usec = 0; 58 | static void __attribute__((constructor)) init_rdtsc_per_sec() { 59 | uint64_t before, after; 60 | 61 | before = rdtsc(); 62 | usleep(USECS_PER_SEC); 63 | after = rdtsc(); 64 | 65 | rdtsc_per_sec = after - before; 66 | 67 | before = rdtsc(); 68 | usleep(1); 69 | after = rdtsc(); 70 | 71 | rdtsc_per_usec = after - before; 72 | } 73 | 74 | double monotonic_time() { 75 | return (double) rdtsc() / (double) rdtsc_per_sec; 76 | } 77 | 78 | // TODO: not tested, it is core specific and callers must be aware 79 | double monotonic_time_us() { 80 | return ((double) rdtsc() / (double) rdtsc_per_usec); 81 | } 82 | 83 | #endif 84 | -------------------------------------------------------------------------------- /src/lib/monotonic_timer.h: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Alex Reece. 2 | // 3 | // A cross platform monotonic timer. 4 | 5 | #ifndef MONOTONIC_TIMER_H_ 6 | #define MONOTONIC_TIMER_H_ 7 | 8 | #define NANOS_PER_SECF 1000000000.0 9 | #define NANOS_PER_USECF 1000.0 10 | #define NANOS_PER_USEC 1000 11 | #define USECS_PER_SEC 1000000 12 | 13 | // Returns seconds since some unspecified start time (guaranteed to be 14 | // monotonically increasing). 15 | double monotonic_time(); 16 | double monotonic_time_us(); 17 | 18 | #endif // MONOTONIC_TIMER_H_ 19 | -------------------------------------------------------------------------------- /src/lib/pflush.c: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #include "pflush.h" 15 | 16 | #include 17 | 18 | typedef uint64_t hrtime_t; 19 | 20 | #if defined(__i386__) 21 | 22 | static inline unsigned long long asm_rdtsc(void) 23 | { 24 | unsigned long long int x; 25 | __asm__ volatile (".byte 0x0f, 0x31" : "=A" (x)); 26 | return x; 27 | } 28 | 29 | static inline unsigned long long asm_rdtscp(void) 30 | { 31 | unsigned hi, lo; 32 | __asm__ __volatile__ ("rdtscp" : "=a"(lo), "=d"(hi)::"ecx"); 33 | return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 ); 34 | 35 | } 36 | #elif defined(__x86_64__) 37 | 38 | static inline unsigned long long asm_rdtsc(void) 39 | { 40 | unsigned hi, lo; 41 | __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi)); 42 | return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 ); 43 | } 44 | 45 | static inline unsigned long long asm_rdtscp(void) 46 | { 47 | unsigned hi, lo; 48 | __asm__ __volatile__ ("rdtscp" : "=a"(lo), "=d"(hi)::"rcx"); 49 | return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 ); 50 | } 51 | #else 52 | #error "What architecture is this???" 53 | #endif 54 | 55 | /* Flush cacheline */ 56 | #define asm_clflush(addr) \ 57 | ({ \ 58 | __asm__ __volatile__ ("clflush %0" : : "m"(*addr)); \ 59 | }) 60 | 61 | /* Memory fence */ 62 | #define asm_mfence() \ 63 | ({ \ 64 | PM_FENCE(); \ 65 | __asm__ __volatile__ ("mfence"); \ 66 | }) 67 | 68 | static int global_cpu_speed_mhz = 0; 69 | static int global_write_latency_ns = 0; 70 | 71 | void init_pflush(int cpu_speed_mhz, int write_latency_ns) 72 | { 73 | global_cpu_speed_mhz = cpu_speed_mhz; 74 | global_write_latency_ns = write_latency_ns; 75 | } 76 | 77 | inline hrtime_t cycles_to_ns(int cpu_speed_mhz, hrtime_t cycles) 78 | { 79 | return (cycles*1000/cpu_speed_mhz); 80 | } 81 | 82 | inline hrtime_t ns_to_cycles(int cpu_speed_mhz, hrtime_t ns) 83 | { 84 | return (ns*cpu_speed_mhz/1000); 85 | } 86 | 87 | static inline 88 | void 89 | emulate_latency_ns(int ns) 90 | { 91 | hrtime_t cycles; 92 | hrtime_t start; 93 | hrtime_t stop; 94 | 95 | start = asm_rdtsc(); 96 | cycles = ns_to_cycles(global_cpu_speed_mhz, ns); 97 | 98 | do { 99 | /* RDTSC doesn't necessarily wait for previous instructions to complete 100 | * so a serializing instruction is usually used to ensure previous 101 | * instructions have completed. However, in our case this is a desirable 102 | * property since we want to overlap the latency we emulate with the 103 | * actual latency of the emulated instruction. 104 | */ 105 | stop = asm_rdtsc(); 106 | } while (stop - start < cycles); 107 | } 108 | 109 | void 110 | pflush(uint64_t *addr) 111 | { 112 | if (global_write_latency_ns == 0) { 113 | return; 114 | } 115 | 116 | /* Measure the latency of a clflush and add an additional delay to 117 | * meet the latency to write to NVM */ 118 | hrtime_t start; 119 | hrtime_t stop; 120 | start = asm_rdtscp(); 121 | asm_clflush(addr); 122 | stop = asm_rdtscp(); 123 | int to_insert_ns = global_write_latency_ns - cycles_to_ns(global_cpu_speed_mhz, stop-start); 124 | if (to_insert_ns <= 0) { 125 | return; 126 | } 127 | emulate_latency_ns(to_insert_ns); 128 | } 129 | -------------------------------------------------------------------------------- /src/lib/pflush.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #ifndef __PFLUSH_H 15 | #define __PFLUSH_H 16 | 17 | /** 18 | * \file 19 | * 20 | * \page pflush_api Persistent Memory API 21 | * 22 | * Method to be used by client to inject a write latency. 23 | */ 24 | 25 | #include 26 | 27 | #ifdef __cplusplus 28 | extern "C" { 29 | #endif 30 | 31 | void init_pflush(int cpu_speed_mhz, int write_latency_ns); 32 | 33 | /** 34 | * \brief Flush the cacheline containing address addr. 35 | */ 36 | void pflush(uint64_t *addr); 37 | 38 | #ifdef __cplusplus 39 | } 40 | #endif 41 | 42 | #endif /* __PFLUSH_H */ 43 | -------------------------------------------------------------------------------- /src/lib/pmalloc.c: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #include 15 | #include "topology.h" 16 | #include "pmalloc.h" 17 | #include "thread.h" 18 | #include "debug.h" 19 | 20 | // pmalloc should be implemented as a separate library 21 | 22 | // FIXME: pmalloc currently uses numa_alloc_onnode() which is slower than regular malloc. 23 | // Consider layering another malloc on top of a emulated nvram 24 | 25 | 26 | void* pmalloc(size_t size) 27 | { 28 | thread_t* thread = thread_self(); 29 | 30 | if (thread == NULL) { 31 | // FIXME: JVM for instance create threads using a mechanism not traced by this emulator 32 | // for now we make sure the current thread is registered right when it makes the 33 | // first explicit NVM allocation. A better solution is to trace the thread creation 34 | // done by JVM. 35 | register_self(); 36 | thread = thread_self(); 37 | } 38 | 39 | if (thread) { 40 | return numa_alloc_onnode(size, thread->virtual_node->nvram_node->node_id); 41 | } else { 42 | DBG_LOG(ERROR, "pmalloc called with NULL thread\n"); 43 | } 44 | 45 | return NULL; 46 | } 47 | 48 | void *prealloc(void *old_addr, size_t old_size, size_t new_size) 49 | { 50 | return numa_realloc(old_addr, old_size, new_size); 51 | } 52 | 53 | void pfree(void* start, size_t size) 54 | { 55 | numa_free(start, size); 56 | } 57 | -------------------------------------------------------------------------------- /src/lib/pmalloc.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #ifndef __PMALLOC_H 15 | #define __PMALLOC_H 16 | 17 | /** 18 | * \file 19 | * 20 | * \page pmalloc_api Persistent Memory API 21 | * 22 | * Methods to be used by clients to allocate and free emulated NVRAM. 23 | */ 24 | 25 | #include 26 | 27 | #ifdef __cplusplus 28 | extern "C" { 29 | #endif 30 | 31 | void *pmalloc(size_t size); 32 | void *prealloc(void *old_addr, size_t old_size, size_t new_size); 33 | void pfree(void *start, size_t size); 34 | 35 | #ifdef __cplusplus 36 | } 37 | #endif 38 | 39 | #endif /* __PMALLOC_H */ 40 | -------------------------------------------------------------------------------- /src/lib/process_rank.c: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | /* 15 | * process_rank.c 16 | * 17 | * Created on: Jun 16, 2015 18 | * Author: root 19 | */ 20 | 21 | 22 | #include 23 | #include "model.h" 24 | #include "error.h" 25 | 26 | #define EMUL_LOCAL_PROCESSES_VAR "EMUL_LOCAL_PROCESSES" 27 | 28 | #define EMUL_LOCK_FILE "/tmp/emul_lock_file" 29 | #define EMUL_PROCESS_LOCAL_RANK_FILE "/tmp/emul_process_local_rank" 30 | #define LOCKED_WAIT_US 1000 31 | #define MAX_LOCKED_RETRIES 50 32 | 33 | extern latency_model_t latency_model; 34 | 35 | int set_process_local_rank() 36 | { 37 | FILE *flock = NULL; 38 | FILE *fcounter = NULL; 39 | int expired = 0; 40 | int process_id = 0; 41 | char *processes; 42 | int ret = E_SUCCESS; 43 | #ifndef NDEBUG 44 | char hname[64]; 45 | #endif 46 | 47 | processes = getenv(EMUL_LOCAL_PROCESSES_VAR); 48 | 49 | if (!processes) { 50 | DBG_LOG(WARNING, "No %s variable set, skipping rank setting\n", EMUL_LOCAL_PROCESSES_VAR); 51 | return E_SUCCESS; 52 | } else { 53 | if (sscanf(processes, "%d", &latency_model.max_local_processe_ranks) != 1) { 54 | DBG_LOG(WARNING, "Ignoring EMUL_PROCESSES_PER_SYSTEM variable with invalid value '%s'\n", processes); 55 | return E_SUCCESS; 56 | } 57 | } 58 | 59 | if (latency_model.max_local_processe_ranks < 2) { 60 | DBG_LOG(WARNING, "EMUL_PROCESSES_PER_SYSTEM value is %d, skipping rank setting\n", 61 | latency_model.max_local_processe_ranks); 62 | return E_SUCCESS; 63 | } 64 | 65 | DBG_LOG(DEBUG, "setting process local rank for %d local processes\n", 66 | latency_model.max_local_processe_ranks); 67 | 68 | while (expired < MAX_LOCKED_RETRIES) { 69 | // open lock file on exclusive mode 70 | flock = fopen(EMUL_LOCK_FILE, "wx"); 71 | 72 | if (flock == NULL) { 73 | // DBG_LOG(DEBUG, "failed to create lock file\n"); 74 | usleep(LOCKED_WAIT_US); 75 | expired++; 76 | } 77 | if (flock) break; 78 | } 79 | if (expired >= MAX_LOCKED_RETRIES) { 80 | DBG_LOG(ERROR, "failed to set process local rank\n"); 81 | return E_ERROR; 82 | } 83 | 84 | // lock acquired, read process counter file 85 | if (access(EMUL_PROCESS_LOCAL_RANK_FILE, R_OK | W_OK) < 0) { 86 | // rank file does not exist, create it and write "1" for next process 87 | // this process rank id is 1 88 | process_id = 1; 89 | fcounter = fopen(EMUL_PROCESS_LOCAL_RANK_FILE, "w"); 90 | fwrite(&process_id, sizeof(int), 1, fcounter); 91 | fclose(fcounter); 92 | } else { 93 | // rank file exists, read the current rank max value and use it as this process local 94 | // rank id and increment the value in the rank file for the next process 95 | fcounter = fopen(EMUL_PROCESS_LOCAL_RANK_FILE, "r+"); 96 | if (fread(&process_id, sizeof(int), 1, fcounter) == 0) { 97 | abort(); 98 | } 99 | DBG_LOG(DEBUG, "read from file current max rank %d\n", process_id); 100 | latency_model.process_local_rank = process_id; 101 | process_id++; 102 | if (process_id >= latency_model.max_local_processe_ranks) { 103 | DBG_LOG(ERROR, "process rank %d exceeded limit of %d max emulated processes\n", 104 | process_id, latency_model.max_local_processe_ranks); 105 | fclose(fcounter); 106 | ret = E_ERROR; 107 | } else { 108 | DBG_LOG(DEBUG, "write to file new max rank %d\n", process_id); 109 | rewind(fcounter); 110 | fwrite(&process_id, sizeof(int), 1, fcounter); 111 | fclose(fcounter); 112 | } 113 | } 114 | 115 | // close and delete lock file 116 | fclose(flock); 117 | remove(EMUL_LOCK_FILE); 118 | 119 | #ifndef NDEBUG 120 | gethostname(hname, sizeof(hname)); 121 | DBG_LOG(DEBUG, "process local rank is %d on system %s\n", latency_model.process_local_rank, hname); 122 | #endif 123 | 124 | return ret; 125 | } 126 | 127 | int unset_process_local_rank() 128 | { 129 | FILE *flock = NULL; 130 | FILE *fcounter = NULL; 131 | int expired = 0; 132 | int process_id; 133 | 134 | if (latency_model.max_local_processe_ranks < 2) { 135 | return E_SUCCESS; 136 | } 137 | 138 | DBG_LOG(DEBUG, "Unsetting process local rank\n"); 139 | 140 | while (expired < MAX_LOCKED_RETRIES) { 141 | // open lock file on Exclusive mode 142 | flock = fopen(EMUL_LOCK_FILE, "wx"); 143 | 144 | if (flock == NULL) { 145 | // DBG_LOG(DEBUG, "failed to create lock file\n"); 146 | usleep(LOCKED_WAIT_US); 147 | expired++; 148 | } 149 | if (flock) break; 150 | } 151 | if (expired >= MAX_LOCKED_RETRIES) { 152 | DBG_LOG(ERROR, "failed to unset process local rank\n"); 153 | return E_ERROR; 154 | } 155 | 156 | // lock acquired, read process counter file 157 | if (access(EMUL_PROCESS_LOCAL_RANK_FILE, R_OK | W_OK) == 0) { 158 | // if rank file does not exist, nothing to be done 159 | // file exists, read the current value and decrement it 160 | fcounter = fopen(EMUL_PROCESS_LOCAL_RANK_FILE, "r+"); 161 | if (fread(&process_id, sizeof(int), 1, fcounter) == 0) { 162 | abort(); 163 | } 164 | DBG_LOG(DEBUG, "Exiting process and reading current rank max %d\n", process_id); 165 | if (process_id > 0) process_id--; 166 | { 167 | char hname[64]; 168 | gethostname(hname, sizeof(hname)); 169 | DBG_LOG(DEBUG, "Exiting process and writing new rank max %d on %s\n", process_id, hname); 170 | } 171 | rewind(fcounter); 172 | fwrite(&process_id, sizeof(int), 1, fcounter); 173 | fclose(fcounter); 174 | } 175 | 176 | // close and delete lock file 177 | fclose(flock); 178 | remove(EMUL_LOCK_FILE); 179 | 180 | return E_SUCCESS; 181 | } 182 | -------------------------------------------------------------------------------- /src/lib/stat.c: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #include "utlist.h" 20 | #include "stat.h" 21 | #include "thread.h" 22 | #include "interpose.h" 23 | #include "model.h" 24 | 25 | thread_manager_t* get_thread_manager(); 26 | hrtime_t cycles_to_us(int cpu_speed_mhz, hrtime_t cycles); 27 | 28 | #ifdef USE_STATISTICS 29 | void stats_set_init_time(double init_time_us) { 30 | thread_manager_t* thread_manager = get_thread_manager(); 31 | 32 | __lib_pthread_mutex_lock(&thread_manager->mutex); 33 | thread_manager->stats.init_time_us = init_time_us; 34 | __lib_pthread_mutex_unlock(&thread_manager->mutex); 35 | } 36 | 37 | void stats_enable(config_t *cfg) { 38 | thread_manager_t* thread_manager = get_thread_manager(); 39 | 40 | __cconfig_lookup_bool(cfg, "statistics.enable", &thread_manager->stats.enabled); 41 | if (__cconfig_lookup_string(cfg, "statistics.file", &thread_manager->stats.output_file) == CONFIG_FALSE) { 42 | __lib_pthread_mutex_lock(&thread_manager->mutex); 43 | thread_manager->stats.output_file = NULL; 44 | __lib_pthread_mutex_unlock(&thread_manager->mutex); 45 | } 46 | } 47 | 48 | static char *get_current_time() { 49 | time_t curtime; 50 | char *str_time; 51 | 52 | time(&curtime); 53 | str_time = ctime(&curtime); 54 | str_time[strlen(str_time) - 1] = 0; 55 | 56 | return str_time; 57 | } 58 | 59 | static inline hrtime_t ns_to_cycles(int cpu_speed_mhz, int ns) 60 | { 61 | return (cpu_speed_mhz * ns) / 1000; 62 | } 63 | 64 | extern __thread int tls_hw_local_latency; 65 | extern __thread int tls_hw_remote_latency; 66 | 67 | static void show_thread_stats(thread_t *thread, FILE *out_file) { 68 | uint64_t fixed_value; 69 | uint64_t cycles; 70 | 71 | fprintf(out_file, "\tThread id [%d]\n", thread->tid); 72 | fprintf(out_file, "\t\t: cpu id: %d\n", thread->cpu_id); 73 | fprintf(out_file, "\t\t: spawn timestamp: %lu\n", thread->stats.register_timestamp); 74 | fprintf(out_file, "\t\t: termination timestamp: %lu\n", thread->stats.unregister_timestamp); 75 | fixed_value = thread->stats.unregister_timestamp > 0 ? (thread->stats.unregister_timestamp - thread->stats.register_timestamp) : 0; 76 | fprintf(out_file, "\t\t: execution time: %lu usecs\n", fixed_value); 77 | fprintf(out_file, "\t\t: stall cycles: %lu\n", thread->stats.stall_cycles); 78 | 79 | if (thread->virtual_node->dram_node != thread->virtual_node->nvram_node && 80 | latency_model.pmc_remote_dram) { 81 | cycles = ns_to_cycles(thread->cpu_speed_mhz, tls_hw_remote_latency); 82 | fixed_value = cycles ? thread->stats.stall_cycles / cycles : 0; 83 | } 84 | else { 85 | cycles = ns_to_cycles(thread->cpu_speed_mhz, tls_hw_local_latency); 86 | fixed_value = cycles ? thread->stats.stall_cycles / cycles : 0; 87 | } 88 | fprintf(out_file, "\t\t: NVM accesses: %lu\n", fixed_value); 89 | 90 | 91 | fprintf(out_file, "\t\t: latency calculation overhead cycles: %lu\n", thread->stats.overhead_cycles); 92 | fprintf(out_file, "\t\t: injected delay cycles: %lu\n", thread->stats.delay_cycles); 93 | if (thread->cpu_speed_mhz) { 94 | fprintf(out_file, "\t\t: injected delay in usec: %lu\n", cycles_to_us(thread->cpu_speed_mhz, thread->stats.delay_cycles)); 95 | } 96 | fprintf(out_file, "\t\t: longest epoch duration: %lu usec\n", thread->stats.longest_epoch_duration_us); 97 | fixed_value = (thread->stats.shortest_epoch_duration_us == UINT64_MAX) ? 0 : thread->stats.shortest_epoch_duration_us; 98 | fprintf(out_file, "\t\t: shortest epoch duration: %lu usec\n", fixed_value); 99 | fixed_value = thread->stats.epochs ? (thread->stats.overall_epoch_duration_us / thread->stats.epochs) : 100 | thread->stats.overall_epoch_duration_us; 101 | fprintf(out_file, "\t\t: average epoch duration: %lu usec\n", fixed_value); 102 | fprintf(out_file, "\t\t: number of epochs: %lu\n", thread->stats.epochs); 103 | fprintf(out_file, "\t\t: epochs which didn't reach min duration: %lu\n", thread->stats.min_epoch_not_reached); 104 | fprintf(out_file, "\t\t: static epochs requested: %lu\n", thread->stats.signals_sent); 105 | } 106 | 107 | void stats_report() { 108 | thread_t *thread; 109 | FILE *out_file; 110 | uint64_t running_threads = 0; 111 | thread_manager_t* thread_manager = get_thread_manager(); 112 | uint64_t terminated_threads; 113 | 114 | if (!thread_manager) return; 115 | if (!thread_manager->stats.enabled) return; 116 | 117 | if (thread_manager->stats.output_file) { 118 | out_file = fopen(thread_manager->stats.output_file, "a"); 119 | if (!out_file) { 120 | fprintf(stderr, "Failed to open statistics file for writing: %s\n", thread_manager->stats.output_file); 121 | return; 122 | } 123 | } else { 124 | out_file = stdout; 125 | } 126 | 127 | __lib_pthread_mutex_lock(&thread_manager->mutex); 128 | LL_FOREACH(thread_manager->thread_list, thread) { 129 | running_threads++; 130 | } 131 | __lib_pthread_mutex_unlock(&thread_manager->mutex); 132 | 133 | fprintf(out_file, "\n\n===== STATISTICS (%s) =====\n\n", get_current_time()); 134 | if (!latency_model.inject_delay) { 135 | fprintf(out_file, "WARNING: delay injection is disabled\n"); 136 | } 137 | fprintf(out_file, "PID: %d\n", getpid()); 138 | fprintf(out_file, "Initialization duration: %lu usec\n", thread_manager->stats.init_time_us); 139 | fprintf(out_file, "Running threads: %lu\n", running_threads); 140 | terminated_threads = thread_manager->stats.n_threads > 0 ? (thread_manager->stats.n_threads - running_threads) : 0; 141 | fprintf(out_file, "Terminated threads: %lu\n", terminated_threads); 142 | fprintf(out_file, "\n"); 143 | 144 | fprintf(out_file, "== Running threads == \n"); 145 | 146 | __lib_pthread_mutex_lock(&thread_manager->mutex); 147 | LL_FOREACH(thread_manager->thread_list, thread) { 148 | show_thread_stats(thread, out_file); 149 | } 150 | __lib_pthread_mutex_unlock(&thread_manager->mutex); 151 | 152 | fprintf(out_file, "\n== Terminated threads == \n"); 153 | 154 | __lib_pthread_mutex_lock(&thread_manager->mutex); 155 | LL_FOREACH(thread_manager->stats.thread_list, thread) { 156 | show_thread_stats(thread, out_file); 157 | } 158 | __lib_pthread_mutex_unlock(&thread_manager->mutex); 159 | 160 | if (out_file != stdout) { 161 | fclose(out_file); 162 | } 163 | } 164 | #endif 165 | 166 | double sum(double array[], int n) 167 | { 168 | int i; 169 | double s = 0; 170 | 171 | for (i=0; i 18 | #include 19 | #include "config.h" 20 | 21 | #ifdef USE_STATISTICS 22 | struct thread_s; 23 | 24 | typedef struct { 25 | int enabled; 26 | struct thread_s* thread_list; 27 | uint64_t n_threads; 28 | uint64_t init_time_us; 29 | char *output_file; 30 | } stats_t; 31 | 32 | typedef struct { 33 | uint64_t stall_cycles; 34 | uint64_t overhead_cycles; 35 | uint64_t delay_cycles; 36 | uint64_t signals_sent; 37 | uint64_t epochs; 38 | double last_epoch_timestamp; 39 | uint64_t shortest_epoch_duration_us; 40 | uint64_t longest_epoch_duration_us; 41 | uint64_t overall_epoch_duration_us; 42 | uint64_t min_epoch_not_reached; 43 | uint64_t register_timestamp; 44 | uint64_t unregister_timestamp; 45 | } thread_stats_t; 46 | 47 | void stats_enable(config_t *cfg); 48 | void stats_set_init_time(double init_time_us); 49 | void stats_report(); 50 | #endif 51 | 52 | double sum(double array[], int n); 53 | double sumxy(double x[], double y[], int n); 54 | double avg(double array[], int n); 55 | double slope(double x[], double y[], int n); 56 | 57 | #endif /* __STATISTICS_H */ 58 | -------------------------------------------------------------------------------- /src/lib/thread.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #ifndef __THREAD_H 15 | #define __THREAD_H 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include "topology.h" 23 | #include "cpu/cpu.h" 24 | #include "stat.h" 25 | 26 | 27 | struct thread_manager_s; // opaque 28 | 29 | typedef uint64_t hrtime_t; 30 | 31 | // TODO: Used by memlat benchmark, should be disabled on a release version 32 | #define MEMLAT_SUPPORT 33 | 34 | typedef struct thread_s { 35 | struct virtual_node_s* virtual_node; 36 | pthread_t pthread; 37 | pid_t tid; 38 | int cpu_id; // the processor the thread is bound on 39 | int cpu_speed_mhz; 40 | struct thread_manager_s* thread_manager; 41 | struct thread_s* next; 42 | int signaled; 43 | #ifdef MEMLAT_SUPPORT 44 | uint64_t stall_cycles; 45 | #endif 46 | #ifdef USE_STATISTICS 47 | thread_stats_t stats; 48 | #else 49 | double last_epoch_timestamp; 50 | #endif 51 | } thread_t; 52 | 53 | typedef struct thread_manager_s { 54 | pthread_mutex_t mutex; 55 | thread_t* thread_list; 56 | int max_epoch_duration_us; // maximum epoch duration in microseconds 57 | int min_epoch_duration_us; // minimum epoch duration in microseconds 58 | int next_virtual_node_id; // used by the round-robin policy -- next virtual node to run on 59 | int next_cpu_id; // used by the round-robin policy -- next cpu to run on 60 | struct virtual_topology_s* virtual_topology; 61 | #ifdef USE_STATISTICS 62 | stats_t stats; 63 | #endif 64 | } thread_manager_t; 65 | 66 | int init_thread_manager(config_t* cfg, struct virtual_topology_s* virtual_topology); 67 | int register_self(); 68 | int unregister_self(); 69 | thread_t* thread_self(); 70 | int reached_min_epoch_duration(thread_t* thread); 71 | void block_new_epoch(); 72 | void unblock_new_epoch(); 73 | 74 | #endif /* __THREAD_H */ 75 | -------------------------------------------------------------------------------- /src/lib/topology.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #ifndef __TOPOLOGY_H 15 | #define __TOPOLOGY_H 16 | 17 | #include 18 | #include "config.h" 19 | #include "cpu/cpu.h" 20 | #include "dev.h" 21 | 22 | /* DOXYGEN Documentation : */ 23 | 24 | /** 25 | \page virtual_topology Virtual topology 26 | 27 | The emulator constructs a topology of virtual nodes out of physical nodes 28 | (i.e., NUMA sockets) that represents the arrangement of processors, DRAM, 29 | and NVRAM of the virtual machine that the emulator emulates. 30 | 31 | Currently, the emulator supports a NUMA virtual topology where essentially 32 | two physical sockets are fused into a single virtual node. Each virtual 33 | node comprises the processors from one socket only (active socket), and 34 | DRAM from both two physical sockets. The DRAM attached to the active socket 35 | is used as the virtual node's locally attached DRAM and the DRAM of the other 36 | socket (passive) is used as the virtual node's locally attached NVRAM. 37 | This topology allows us to emulate a machine that has both DRAM and NVRAM but 38 | reduces the computation capacity of the machine to half. 39 | 40 | In the future we would like to support a topology that matches the shared NVRAM 41 | storage of The Machine. 42 | 43 | */ 44 | 45 | 46 | 47 | typedef struct { 48 | int node_id; 49 | cpu_model_t* cpu_model; 50 | pci_regs_t *mc_pci_regs; 51 | int num_cpus; // number of node's cpus 52 | struct bitmask* cpu_bitmask; // a bitmask of the node's CPUs 53 | 54 | // this is actual physical latency. the latency number though depends on 55 | // whether the node corresponds to a dram node or a nvram node. 56 | // if dram then latency is the measured local latency to dram. 57 | // if nvram then latency is the measured remote latency to the sibling nvram node 58 | int latency; 59 | } physical_node_t; 60 | 61 | typedef struct virtual_node_s { 62 | int node_id; 63 | physical_node_t* dram_node; 64 | physical_node_t* nvram_node; 65 | //cpu_model_t* cpu_model; 66 | } virtual_node_t; 67 | 68 | typedef struct virtual_topology_s { 69 | virtual_node_t* virtual_nodes; // pointer to an array of virtual nodes 70 | int num_virtual_nodes; 71 | } virtual_topology_t; 72 | 73 | int init_virtual_topology(config_t* cfg, cpu_model_t* cpu_model, virtual_topology_t** virtual_topologyp); 74 | int system_num_cpus(); 75 | int first_cpu(struct bitmask* bitmask); 76 | int next_cpu(struct bitmask* bitmask, int cpu_id); 77 | 78 | #endif /* __TOPOLOGY_H */ 79 | -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include_directories(${CMAKE_SOURCE_DIR}/third_party/gtest-1.7.0/include) 2 | include_directories(${CMAKE_SOURCE_DIR}/src/lib) 3 | 4 | add_definitions(-g) 5 | add_definitions(-Wall) 6 | #add_definitions(-DNDEBUG) 7 | 8 | add_executable(test_interpose ${CMAKE_CURRENT_SOURCE_DIR}/test_interpose.cc) 9 | target_link_libraries(test_interpose pthread gtest) 10 | 11 | add_executable(test_dev ${CMAKE_CURRENT_SOURCE_DIR}/test_dev.cc) 12 | target_link_libraries(test_dev pthread nvmemul) 13 | 14 | add_executable(test_thread ${CMAKE_CURRENT_SOURCE_DIR}/test_thread.cc) 15 | target_link_libraries(test_thread nvmemul pthread) 16 | 17 | add_executable(test_mutex ${CMAKE_CURRENT_SOURCE_DIR}/test_mutex.cc) 18 | target_link_libraries(test_mutex nvmemul pthread) 19 | 20 | add_executable(test_nvm_remote_dram ${CMAKE_CURRENT_SOURCE_DIR}/test_nvm_remote_dram.c) 21 | target_link_libraries(test_nvm_remote_dram nvmemul) 22 | 23 | add_executable(test_nvm ${CMAKE_CURRENT_SOURCE_DIR}/test_nvm.c) 24 | target_link_libraries(test_nvm nvmemul) 25 | 26 | add_executable(test_multithread ${CMAKE_CURRENT_SOURCE_DIR}/test_multithread.c) 27 | #target_link_libraries(test_multithread rt) 28 | target_link_libraries(test_multithread nvmemul pthread) 29 | 30 | add_test(NAME interpose COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test_interpose) 31 | 32 | set(ENV_COMMON "LD_PRELOAD=${CMAKE_BINARY_DIR}/src/emul/libnvmemul.so") 33 | 34 | SET_PROPERTY(TEST interpose PROPERTY ENVIRONMENT ${ENV_COMMON} "ENUM_INI=emul.ini") 35 | -------------------------------------------------------------------------------- /test/test_dev.cc: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #include 15 | #include 16 | #include 17 | #include "gtest/gtest.h" 18 | #include "pmalloc.h" 19 | 20 | int main(int argc, char** argv) 21 | { 22 | // ::testing::InitGoogleTest(&argc, argv); 23 | // return RUN_ALL_TESTS(); 24 | printf("PID: %d\n", getpid()); 25 | printf("malloc: %p\n", malloc(8)); 26 | printf("malloc: %p\n", malloc(8)); 27 | printf("pmalloc: %p\n", pmalloc(8)); 28 | } 29 | 30 | -------------------------------------------------------------------------------- /test/test_interpose.cc: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #include 15 | #include 16 | #include 17 | #include "gtest/gtest.h" 18 | 19 | static int interpose_pthread_create_success = 0; 20 | 21 | 22 | // Ugly hack: we want to test whether interposition works. To do this we 23 | // hook on the functions that the interposition code calls by redefining these 24 | // functions. As those functions are written in C, we need to make sure we force 25 | // the C++ compiler use C linkage. 26 | 27 | #ifdef __cplusplus 28 | extern "C" { 29 | #endif 30 | 31 | // this function is called when interposition of pthread_create is successful 32 | int register_thread(pthread_t thread) 33 | { 34 | interpose_pthread_create_success = 1; 35 | return 0; 36 | } 37 | 38 | #ifdef __cplusplus 39 | } 40 | #endif 41 | 42 | void* interpose_pthread_create_start_routine(void* args) 43 | { 44 | return NULL; 45 | } 46 | 47 | void interpose_pthread_create() 48 | { 49 | pthread_t thread; 50 | 51 | pthread_create (&thread, NULL, &interpose_pthread_create_start_routine, NULL); 52 | 53 | pthread_join(thread, NULL); 54 | 55 | } 56 | 57 | void interpose_pthread_mutex_lock(pthread_mutex_t* lock) 58 | { 59 | pthread_mutex_lock(lock); 60 | } 61 | 62 | void interpose_pthread_mutex_unlock(pthread_mutex_t* lock) 63 | { 64 | pthread_mutex_unlock(lock); 65 | } 66 | 67 | TEST(Interpose, pthread_create) 68 | { 69 | EXPECT_EQ(0, interpose_pthread_create_success); 70 | interpose_pthread_create(); 71 | EXPECT_EQ(1, interpose_pthread_create_success); 72 | } 73 | 74 | TEST(Interpose, pthread_mutex_lock) 75 | { 76 | //EXPECT_EQ(1, 0); 77 | } 78 | 79 | 80 | int main(int argc, char** argv) 81 | { 82 | ::testing::InitGoogleTest(&argc, argv); 83 | return RUN_ALL_TESTS(); 84 | 85 | pthread_mutex_t lock; 86 | pthread_mutex_init(&lock, NULL); 87 | interpose_pthread_mutex_lock(&lock); 88 | interpose_pthread_mutex_unlock(&lock); 89 | } 90 | -------------------------------------------------------------------------------- /test/test_mutex.cc: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include "gtest/gtest.h" 20 | 21 | #define MAX_NUM_THREADS 128 22 | 23 | pthread_mutex_t mutex; 24 | 25 | void* worker(void* args) 26 | { 27 | // int i; 28 | // char* array = (char*) malloc(1024*1024); 29 | 30 | pthread_mutex_lock(&mutex); 31 | 32 | pthread_mutex_unlock(&mutex); 33 | return NULL; 34 | } 35 | 36 | 37 | int main(int argc, char** argv) 38 | { 39 | pthread_t thread[MAX_NUM_THREADS]; 40 | int thread_count = 4; 41 | int i; 42 | // int sum; 43 | 44 | pthread_mutex_init(&mutex, NULL); 45 | pthread_mutex_lock(&mutex); 46 | pthread_mutex_unlock(&mutex); 47 | for (i = 0; i< thread_count; i++) 48 | pthread_create(&thread[i], NULL, worker, NULL); 49 | 50 | for(i = 0 ; i < thread_count ; i++) 51 | pthread_join(thread[i], NULL); 52 | } 53 | -------------------------------------------------------------------------------- /test/test_nvm.c: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | 15 | #include 16 | #include 17 | 18 | 19 | #define BUF_SIZE (2048) 20 | 21 | unsigned long mem[BUF_SIZE][BUF_SIZE]; 22 | 23 | void iter() 24 | { 25 | int i; 26 | int j; 27 | unsigned long k; 28 | 29 | for (i=0; i < BUF_SIZE; ++i) { 30 | for (j=0; j < BUF_SIZE; ++j) { 31 | mem[i][j] = i * j; 32 | } 33 | } 34 | 35 | k = 0; 36 | while(1) { 37 | for (i=0; i < BUF_SIZE; ++i) { 38 | __asm__ __volatile__(""); 39 | for (j=0; j < BUF_SIZE; ++j) { 40 | k += mem[j][i] + i*j; 41 | mem[j][i] = k; 42 | } 43 | } 44 | // fprintf(stdout, "k is %lu\n", (unsigned long)k); 45 | usleep(1000); 46 | } 47 | } 48 | 49 | int main() 50 | { 51 | iter(); 52 | return 0; 53 | } 54 | -------------------------------------------------------------------------------- /test/test_nvm_remote_dram.c: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | 15 | #include 16 | #include 17 | #include "pmalloc.h" 18 | 19 | 20 | #define BUF_SIZE (4 * 1024) 21 | 22 | unsigned long **mem; 23 | 24 | void iter() 25 | { 26 | int i; 27 | int j; 28 | unsigned long k; 29 | 30 | mem = (unsigned long **) pmalloc(BUF_SIZE * sizeof(unsigned long *)); 31 | for (i=0; i < BUF_SIZE; ++i) { 32 | mem[i] = (unsigned long *) pmalloc(BUF_SIZE * sizeof(unsigned long)); 33 | for (j=0; j < BUF_SIZE; ++j) { 34 | mem[i][j] = i * j; 35 | } 36 | } 37 | 38 | k = 0; 39 | while(1) { 40 | for (i=0; i < BUF_SIZE; ++i) { 41 | __asm__ __volatile__(""); 42 | for (j=0; j < BUF_SIZE; ++j) { 43 | k += mem[j][i] + i*j; 44 | mem[j][i] = k; 45 | } 46 | } 47 | // usleep(1000); 48 | } 49 | 50 | for (i=0; i < BUF_SIZE; ++i) { 51 | pfree(mem[i], BUF_SIZE * sizeof(unsigned long)); 52 | } 53 | pfree(mem, BUF_SIZE * sizeof(unsigned long *)); 54 | } 55 | 56 | int main() 57 | { 58 | iter(); 59 | return 0; 60 | } 61 | -------------------------------------------------------------------------------- /test/test_thread.cc: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | Copyright 2016 Hewlett Packard Enterprise Development LP. 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or (at 6 | your option) any later version. This program is distributed in the 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | PURPOSE. See the GNU General Public License for more details. You 10 | should have received a copy of the GNU General Public License along 11 | with this program; if not, write to the Free Software Foundation, 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 | ***************************************************************************/ 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include "gtest/gtest.h" 20 | 21 | #define MAX_NUM_THREADS 128 22 | 23 | void* worker(void* args) 24 | { 25 | int i; 26 | char* array = (char*) malloc(1024*1024); 27 | 28 | //while(1) { 29 | for (i=0; i<1024*1024; i++) { 30 | array[i] += 1; 31 | } 32 | //} 33 | //pthread_exit(NULL); 34 | printf("exiting\n"); 35 | return NULL; 36 | } 37 | 38 | 39 | int main(int argc, char** argv) 40 | { 41 | pthread_t thread[MAX_NUM_THREADS]; 42 | int thread_count = 4; 43 | int i; 44 | // int sum; 45 | 46 | for (i = 0; i< thread_count; i++) 47 | pthread_create(&thread[i], NULL, worker, NULL); 48 | 49 | for(i = 0 ; i < thread_count ; i++) 50 | pthread_join(thread[i], NULL); 51 | } 52 | --------------------------------------------------------------------------------