├── AUTHORS
├── CMakeLists.txt
├── Doxyfile
├── README-BENCHMARKS-TESTING.md
├── README.md
├── TODO.dox
├── bench
    ├── CMakeLists.txt
    ├── memlat
    │   ├── CMakeLists.txt
    │   └── memlat.c
    ├── multilat
    │   ├── CMakeLists.txt
    │   └── multilat.c
    └── new_memlat
    │   ├── CMakeLists.txt
    │   ├── memlat.c
    │   └── memlat.sh
├── benchmark-tests
    ├── bandwidth-model-building.sh
    ├── memlat-bench-test-10M-single-socket.sh
    ├── memlat-bench-test-10M.sh
    ├── memlat-orig-lat-test-single-socket.sh
    ├── memlat-orig-lat-test.sh
    ├── nvmemul-bandwidth.ini
    ├── nvmemul-debug.ini
    ├── nvmemul-orig.ini
    └── nvmemul.ini
├── license.txt
├── nvmemul-orig.ini
├── nvmemul.dox
├── nvmemul.ini
├── scripts
    ├── install.sh
    ├── runenv.sh
    ├── setupdev.sh
    └── turboboost.sh
├── src
    ├── CMakeLists.txt
    ├── dev
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── ioctl_query.h
    │   └── pmc.c
    └── lib
    │   ├── CMakeLists.txt
    │   ├── config.c
    │   ├── config.h
    │   ├── cpu
    │       ├── CMakeLists.txt
    │       ├── cpu.c
    │       ├── cpu.h
    │       ├── haswell-papi.h
    │       ├── haswell.h
    │       ├── ivybridge-papi.h
    │       ├── ivybridge.h
    │       ├── known_cpus.h
    │       ├── pmc-papi.c
    │       ├── pmc-papi.h
    │       ├── pmc.c
    │       ├── pmc.h
    │       ├── sandybridge-papi.h
    │       ├── sandybridge.h
    │       └── xeon-ex.h
    │   ├── debug.c
    │   ├── debug.h
    │   ├── dev.c
    │   ├── dev.h
    │   ├── errno.h
    │   ├── error.h
    │   ├── init.c
    │   ├── interpose.c
    │   ├── interpose.h
    │   ├── measure.h
    │   ├── measure_bw.c
    │   ├── measure_lat.c
    │   ├── misc.c
    │   ├── misc.h
    │   ├── model.h
    │   ├── model_bw.c
    │   ├── model_lat.c
    │   ├── monotonic_timer.c
    │   ├── monotonic_timer.h
    │   ├── pflush.c
    │   ├── pflush.h
    │   ├── pmalloc.c
    │   ├── pmalloc.h
    │   ├── process_rank.c
    │   ├── stat.c
    │   ├── stat.h
    │   ├── thread.c
    │   ├── thread.h
    │   ├── topology.c
    │   └── topology.h
└── test
    ├── CMakeLists.txt
    ├── test_dev.cc
    ├── test_interpose.cc
    ├── test_multithread.c
    ├── test_mutex.cc
    ├── test_nvm.c
    ├── test_nvm_remote_dram.c
    └── test_thread.cc


/AUTHORS:
--------------------------------------------------------------------------------
1 | Haris Volos           (haris.volos@hpe.com)
2 | Guilherme Magalhaes   (guilherme.magalhaes@hpe.com)
3 | Lucy Cherkasova       (lucy.cherkasova@gmail.com)
4 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 2.8)
2 | 
3 | #add_subdirectory(third_party)
4 | add_subdirectory(src)
5 | add_subdirectory(bench)
6 | enable_testing()
7 | #add_subdirectory(test)
8 | 


--------------------------------------------------------------------------------
/README-BENCHMARKS-TESTING.md:
--------------------------------------------------------------------------------
 1 | **For testing whether your environment is configured correctly for
 2 | running Quartz** (e.g., whether you set all the required environmental
 3 | variables, etc.) **we have created a few scripts with benchmarks, which
 4 | can be executed automatically** and which can provide you with a
 5 | feedback on Quartz performance in your environment.
 6 | 
 7 | **The directory with these scripts is called: *benchmark-tests*. There are three scripts which you can run:**
 8 | - **bandwidth-model-building.sh**
 9 | 
10 |    This script will execute for approximately **10 min** and will build a memory
11 |    bandwidth model that can be used in the experiments with memory bandwidth
12 |    throttling. The configuration file uses a "debug" mode on purpose -- that
13 |    you can see the messages on the screen about the progress of the memory
14 |    bandwidth  model building, which can be found at */tmp/bandwidth_model*
15 | 
16 | - **memlat-orig-lat-test.sh**
17 | 
18 |     This script will measure your server hardware *memory access latency* in nanoseconds: local
19 |     and remote (for two sockets servers).  It will execute the test 20 times, and   write the results in directory *ORIG-lat-test*.
20 |     You can find the summary of the results in the file *ORIG-lat-test/final-hw-latency.txt*.
21 |     It will have measurements like:
22 |     
23 |                FORMAT:  1_min_local  2_aver_local  3_max_local  4_min_remote  5_aver_remote  6_max_remote
24 |                            91             91.9           92           152        163.9           176
25 |    
26 |     First three numbers show: minimal, average and maximum measured local
27 |     memory access latency (in ns, over 20 measurements). The last three numbers
28 |     show show similar measurements for  access latency of the remote memory,
29 |     i.e., in the second socket.
30 | 
31 | -  **memlat-bench-test-10M.sh**
32 | 
33 |     This script will execute memlat benchmark (pointer-chasing benchmark) with
34 |     nine emulated memory access latencies: 200 ns, 300 ns,..., 1000 ns.
35 |     It will run the benchmark with these emulated latencies in two settings:
36 |     in the local socket (.i.e., emulating a higher memory access latency in the
37 |     local socket) and similarly, in the remote socket.
38 |     Each test is repeated 10 times: this is used for assessing the variability
39 |     of  your environment. In some cases, we had issues with TurboBoost mode, \
40 |     which did impact the quality of the emulation...
41 |     This test might take **approx. 30 min to finish** (since it executes 180 tests),
42 |     and will create two output directories:  *FULL-RESULTS-test*  and
43 |     *SUMMARY-RESULTS-test*
44 |     In the directory SUMMARY-RESULTS-test, you will find two files that
45 |     summarize the outcome of the experiments in the local and remote sockets.
46 |     The outcome should look like this:
47 |     
48 |           FORMAT: 1_emul_lat  2_min_meas_lat  3_aver_meas_lat  4_max_meas_lat  5_aver_error(%) 6_max_error(%)
49 |                    200           177            197.9             204              1.05            11.5
50 |                    300           259            289.5             300              3.5             13.6  
51 |                    400           354            382.6             395              4.3             11.5
52 |                    500           468            485.8             490              2.8             6.4
53 |                    600           554            575.3             585              4.1             7.6
54 |                    700           640            666.6             681              4.7             8.5
55 |                    800           749            766.4             776              4.2             6.3
56 |                    900           851            866.2             871              3.7             5.4
57 |                    1000          926            956.5             966              4.35            7.4
58 |     
59 |           The format is the following:
60 |           1st column:    emulated latency (in nanoseconds)
61 |           2nd column:    minimum measured  latency (across 10 tests, in ns)
62 |           3d column:     average measured  latency (across 10 tests, in ns)
63 |           4th column:    maximum measured  latency (across 10 tests, in ns)
64 |           5th column:    average error (between emulated and measured latencies, in %)
65 |           6th column:    max error (between emulated and measured latencies, in %)
66 | 
67 | One of the goals of the designed performance emulator is to provide a
68 | framework for application sensitivity studies under different
69 | latencies and memory bw. Even if you have 15% deviation (error) from
70 | the targeted emulated latencies, but the benchmark measurements are
71 | consistent -- this is a good sign that you can perform a good
72 | sensitivity study.
73 | 


--------------------------------------------------------------------------------
/TODO.dox:
--------------------------------------------------------------------------------
 1 | /**
 2 | \file
 3 | 
 4 | \todo Improve performance counter API by making it more generic. For example, autogenerate pmc event_id using perf.
 5 | \todo Currently we may interrupt a thread to form a new epoch while it is blocked. This might cause accumulation of overhead cycles.
 6 | \todo Currently our bandwidth model cannot independently throttle read and write bandwidth as it relies on throttling DDR ACT transactions. We tried throttling DDR READ and DDR WRITE transactions but this didn't work.
 7 | \todo Extend library to interpose on other synchronization events we care: semaphores, barriers, context switches, openMP sync primitives, etc.
 8 | \todo Currently our library does not support context switching. Extent the device driver to properly handle context switching: keep track of per-thread cpu counters, introduce proper delay at context switch points.
 9 | \todo Support uncacheable and write-through memory.
10 | \todo Signal SIGUSR1 should be dedicated to the emulator. If the application makes use of this signal, the emulator will not work. Figure out a way to fix this limitation.
11 | \todo Interpose pthread_cancel() e pthread_exit() to make sure the thread is always deregistered internally to the emulator?
12 | \todo CPU counters overflow is not currently handled.
13 | \todo Multiple processes emulation must be reviewed: log file per process, statistics report by process, process id and thread id indications in the log messages.
14 | \todo See Limitations section in the README file.
15 | */
16 | 


--------------------------------------------------------------------------------
/bench/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_subdirectory(memlat)
2 | add_subdirectory(new_memlat)
3 | add_subdirectory(multilat)
4 | 


--------------------------------------------------------------------------------
/bench/memlat/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | include_directories(${CMAKE_SOURCE_DIR}/src/lib)
2 | add_executable(memlat memlat.c)
3 | target_link_libraries(memlat nvmemul pthread)
4 | 


--------------------------------------------------------------------------------
/bench/memlat/memlat.c:
--------------------------------------------------------------------------------
 1 | /***************************************************************************
 2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
 3 | This program is free software; you can redistribute it and/or modify
 4 | it under the terms of the GNU General Public License as published by
 5 | the Free Software Foundation; either version 2 of the License, or (at
 6 | your option) any later version. This program is distributed in the
 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | PURPOSE. See the GNU General Public License for more details. You
10 | should have received a copy of the GNU General Public License along
11 | with this program; if not, write to the Free Software Foundation,
12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
13 | ***************************************************************************/
14 | #include <stddef.h>
15 | #include <stdint.h>
16 | #include <stdio.h>
17 | #include <assert.h>
18 | #include <pthread.h>
19 | 
20 | #define MAX_NUM_THREADS 512
21 | 
22 | uint64_t g_seed, g_nchains, g_nelems, g_from_node_id, g_to_node_id, g_element_size, g_access_size;
23 | 
24 | extern int measure_latency2(uint64_t seedin, int nchains, size_t nelems, int element_size, int access_size, int from_node_id, int to_node_id);
25 | 
26 | static uint64_t safe_strtoull(const char *s) {
27 |     char *ep;
28 |     uint64_t r;
29 |     assert(NULL != s && '\0' != *s);
30 |     r = strtoull(s, &ep, 10);
31 |     assert('\0' == *ep);
32 |     return r;
33 | }
34 | 
35 | 
36 | void* worker(void* arg) 
37 | {
38 |     int latency_ns;
39 | 
40 |     latency_ns = measure_latency2(g_seed, g_nchains, g_nelems, g_element_size, g_access_size, g_from_node_id, g_to_node_id);
41 |     printf("latency_ns: %d\n", latency_ns);
42 | 
43 |     return NULL;
44 | }
45 | int main(int argc, char *argv[]) {
46 | 	int i;
47 |     uint64_t nthreads;
48 |     pthread_t thread[MAX_NUM_THREADS];
49 | 
50 |     if (9 != argc) {
51 |         fprintf(stderr, "usage: %s PRNGseed Nthreads Nchains Nelems SZelem SZaccess from_node to_node\n", argv[0]);
52 |         return 1;
53 |     }
54 |     g_seed  = safe_strtoull(argv[1]);
55 |     nthreads = safe_strtoull(argv[2]);
56 |     g_nchains = safe_strtoull(argv[3]);
57 |     g_nelems = safe_strtoull(argv[4]);
58 |     g_element_size = safe_strtoull(argv[5]);
59 |     g_access_size = safe_strtoull(argv[6]);
60 |     g_from_node_id = safe_strtoull(argv[7]);
61 |     g_to_node_id = safe_strtoull(argv[8]);
62 | 
63 | 	for (i = 0; i< nthreads; i++) {
64 | 		pthread_create(&thread[i], NULL, worker, NULL);
65 |     }
66 | 	for(i = 0 ; i < nthreads; i++) {
67 | 		pthread_join(thread[i], NULL);
68 |     }
69 |     return 0;
70 | }
71 | 


--------------------------------------------------------------------------------
/bench/multilat/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | include_directories(${CMAKE_SOURCE_DIR}/src/lib)
2 | 
3 | add_executable(multilat multilat.c)
4 | target_link_libraries(multilat nvmemul pthread)
5 | 


--------------------------------------------------------------------------------
/bench/new_memlat/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | include_directories(${CMAKE_SOURCE_DIR}/src/lib)
2 | add_executable(new_memlat memlat.c)
3 | target_link_libraries(new_memlat nvmemul pthread)
4 | 


--------------------------------------------------------------------------------
/bench/new_memlat/memlat.c:
--------------------------------------------------------------------------------
  1 | /***************************************************************************
  2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
  3 | This program is free software; you can redistribute it and/or modify
  4 | it under the terms of the GNU General Public License as published by
  5 | the Free Software Foundation; either version 2 of the License, or (at
  6 | your option) any later version. This program is distributed in the
  7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
  8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  9 | PURPOSE. See the GNU General Public License for more details. You
 10 | should have received a copy of the GNU General Public License along
 11 | with this program; if not, write to the Free Software Foundation,
 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 13 | ***************************************************************************/
 14 | #include <stddef.h>
 15 | #include <stdint.h>
 16 | #include <stdio.h>
 17 | #include <assert.h>
 18 | #include <pthread.h>
 19 | #include "model.h"
 20 | #include "thread.h"
 21 | 
 22 | #define MAX_NUM_THREADS 512
 23 | 
 24 | uint64_t g_seed, g_nchains, g_nelems, g_from_node_id, g_to_node_id, g_element_size, g_access_size;
 25 | 
 26 | extern int measure_latency2(uint64_t seedin, int nchains, size_t nelems, int element_size, int access_size, int from_node_id, int to_node_id);
 27 | 
 28 | static uint64_t safe_strtoull(const char *s) {
 29 |     char *ep;
 30 |     uint64_t r;
 31 |     assert(NULL != s && '\0' != *s);
 32 |     r = strtoull(s, &ep, 10);
 33 |     assert('\0' == *ep);
 34 |     return r;
 35 | }
 36 | 
 37 | extern latency_model_t latency_model;
 38 | 
 39 | #ifdef MEMLAT_SUPPORT
 40 | extern __thread int tls_hw_local_latency;
 41 | extern __thread int tls_hw_remote_latency;
 42 | extern __thread uint64_t tls_global_remote_dram;
 43 | extern __thread uint64_t tls_global_local_dram;
 44 | 
 45 | static inline uint64_t ns_to_cycles(int cpu_speed_mhz, int ns)
 46 | {
 47 |     return (cpu_speed_mhz * ns) / 1000;
 48 | }
 49 | #endif
 50 | 
 51 | void* worker(void* arg) 
 52 | {
 53 |     int latency_ns;
 54 | #ifdef MEMLAT_SUPPORT
 55 |     uint64_t exp_stalls;
 56 |     uint64_t calc_nvm_accesses;
 57 |     uint64_t detected_hw_lat;
 58 |     uint64_t actual_lat = 0;
 59 |     uint64_t total_time;
 60 |     uint64_t fixed_latency_ns = 0;
 61 |     uint64_t nvm_accesses = 0;
 62 |     uint64_t nvm_hw_latency;
 63 | #endif
 64 | 
 65 |     latency_ns = measure_latency2(g_seed, g_nchains, g_nelems, g_element_size, g_access_size, g_from_node_id, g_to_node_id);
 66 |     printf("latency_ns: %d ns\n", latency_ns);
 67 | 
 68 | #ifdef MEMLAT_SUPPORT
 69 |     total_time = g_nelems * latency_ns;
 70 |     if (thread_self()->virtual_node->dram_node != thread_self()->virtual_node->nvram_node) {
 71 |         detected_hw_lat = ns_to_cycles(thread_self()->cpu_speed_mhz, tls_hw_remote_latency);
 72 |         if (tls_global_remote_dram > 0) {
 73 |     	    actual_lat = thread_self()->stall_cycles / tls_global_remote_dram;
 74 |     	    fixed_latency_ns = total_time / tls_global_remote_dram;
 75 |     	    nvm_accesses = tls_global_remote_dram;
 76 |     	}
 77 |     	nvm_hw_latency = tls_hw_remote_latency;
 78 |     } else {
 79 |         detected_hw_lat = ns_to_cycles(thread_self()->cpu_speed_mhz, tls_hw_local_latency);
 80 |         if (tls_global_local_dram > 0) {
 81 |     	    actual_lat = thread_self()->stall_cycles / tls_global_local_dram;
 82 |     	    fixed_latency_ns = total_time / tls_global_local_dram;
 83 |     	    nvm_accesses = tls_global_local_dram;
 84 |     	}
 85 |     	nvm_hw_latency = tls_hw_local_latency;
 86 |     }
 87 |     exp_stalls = g_nelems * detected_hw_lat;
 88 |     calc_nvm_accesses = thread_self()->stall_cycles / detected_hw_lat;
 89 | 
 90 |     printf("target latency: %d ns\n", latency_model.read_latency);
 91 |     printf("Error: %3.1f%%\n", (double)(abs(latency_model.read_latency - latency_ns)*100) / (double)latency_model.read_latency);
 92 |     printf("target NVM accesses: %ld\n", g_nelems);
 93 |     printf("detected HW latency: %ld ns\n", nvm_hw_latency);
 94 |     printf("detected HW latency: %ld cycles (detected_hw_lat making use of cpu_speed_mhz)\n", detected_hw_lat);
 95 |     printf("expected CPU stalls: %ld cycles (target_nvm_accesses * detected_hw_lat)\n", exp_stalls);
 96 |     printf("actual CPU stalls: %ld cycles\n", thread_self()->stall_cycles);
 97 |     printf("calculated NVM accesses: %ld (actual_cpu_stalls / detected_hw_lat)\n", calc_nvm_accesses);
 98 |     if (nvm_accesses != 0) {
 99 |         printf("actual NVM accesses: %ld\n", nvm_accesses);
100 |         printf("actual latency: %ld cyles (actual_stalls / actual_nvm_accesses)\n", actual_lat);
101 |         printf("fixed measured latency: %ld ns (total_chasing_time / actual_nvm_accesses)\n", fixed_latency_ns);
102 |         printf("fixed latency error: %3.1f%%\n", (double)(abs(latency_model.read_latency - fixed_latency_ns)*100) / (double)latency_model.read_latency);
103 |     } else {
104 |         fixed_latency_ns = total_time / calc_nvm_accesses;
105 |         printf("fixed measured latency: %ld ns (total_chasing_time / calculated_nvm_accesses)\n", fixed_latency_ns);
106 |         printf("fixed latency error: %3.1f%%\n", (double)(abs(latency_model.read_latency - fixed_latency_ns)*100) / (double)latency_model.read_latency);
107 |     }
108 | #endif
109 |     return NULL;
110 | }
111 | int main(int argc, char *argv[]) {
112 | 	int i;
113 |     uint64_t nthreads;
114 |     pthread_t thread[MAX_NUM_THREADS];
115 | 
116 |     if (9 != argc) {
117 |         fprintf(stderr, "usage: %s PRNGseed Nthreads Nchains Nelems SZelem SZaccess from_node to_node\n", argv[0]);
118 |         return 1;
119 |     }
120 |     g_seed  = safe_strtoull(argv[1]);
121 |     nthreads = safe_strtoull(argv[2]);
122 |     g_nchains = safe_strtoull(argv[3]);
123 |     g_nelems = safe_strtoull(argv[4]);
124 |     g_element_size = safe_strtoull(argv[5]);
125 |     g_access_size = safe_strtoull(argv[6]);
126 |     g_from_node_id = safe_strtoull(argv[7]);
127 |     g_to_node_id = safe_strtoull(argv[8]);
128 | 
129 | 	for (i = 0; i< nthreads; i++) {
130 | 		pthread_create(&thread[i], NULL, worker, NULL);
131 |     }
132 | 	for(i = 0 ; i < nthreads; i++) {
133 | 		pthread_join(thread[i], NULL);
134 |     }
135 |     return 0;
136 | }
137 | 


--------------------------------------------------------------------------------
/bench/new_memlat/memlat.sh:
--------------------------------------------------------------------------------
  1 | #################################################################
  2 | #Copyright 2016 Hewlett Packard Enterprise Development LP.  
  3 | #This program is free software; you can redistribute it and/or modify
  4 | #it under the terms of the GNU General Public License as published by
  5 | #the Free Software Foundation; either version 2 of the License, or (at
  6 | #your option) any later version. This program is distributed in the
  7 | #hope that it will be useful, but WITHOUT ANY WARRANTY; without even
  8 | #the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  9 | #PURPOSE. See the GNU General Public License for more details. You
 10 | #should have received a copy of the GNU General Public License along
 11 | #with this program; if not, write to the Free Software Foundation,
 12 | #Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 13 | #################################################################
 14 | #!/bin/bash
 15 | 
 16 | # percentage of error as threshold to discard outliers, anything above this percentage will be discarded
 17 | MAX_ERROR_PERCENTAGE=10
 18 | # max number of tries to execute memlat
 19 | MAX_TRIES=10
 20 | 
 21 | 
 22 | TEMP_FILE=/tmp/tmp_memlat.out
 23 | 
 24 | 
 25 | NVM_EMUL_PATH="`dirname $0`/../.."
 26 | NELEMS=$1
 27 | TARGET_DRAM=$2
 28 | 
 29 | 
 30 | function usage()
 31 | {
 32 |     echo "$0 [number of elements] [0=local dram|1=remote dram]"
 33 |     exit 1
 34 | }
 35 | 
 36 | function validate_decimal()
 37 | {
 38 |     re='^[0-9]+$'
 39 |     if ! [[ $1 =~ $re ]] ; then
 40 |         return 1
 41 |     fi
 42 |     return 0
 43 | }
 44 | 
 45 | function check_parameters()
 46 | {
 47 |     if [ $# -ne 2 ]; then
 48 |         echo "Incorrect arguments"
 49 |         usage
 50 |     fi
 51 | 
 52 |     validate_decimal ${NELEMS}
 53 | 
 54 |     if [ $? -ne 0 ]; then
 55 |         echo "Invalid number of arguments"
 56 |         usage
 57 |     fi
 58 | 
 59 |     if [ ${TARGET_DRAM} -ne 0 -a ${TARGET_DRAM} -ne 1 ]; then
 60 |         echo "Incorret dram target"
 61 |         usage
 62 |     fi
 63 | }
 64 | 
 65 | function verify_run
 66 | {
 67 |     target=$(cat ${TEMP_FILE} | grep "target latency" | awk '{ print $3 }')
 68 |     measured=$(cat ${TEMP_FILE} | grep "measured latency" | awk '{ print $4 }')
 69 | 
 70 |     if [ ${measured} -gt ${target} ]; then
 71 |         delta=$(expr ${measured} - ${target});
 72 |     else
 73 |         delta=$(expr ${target} - ${measured});
 74 |     fi
 75 | 
 76 |     if [ ${target} -gt 0 ]; then
 77 |         error=$(expr ${delta} \* 100)
 78 |         error=$(expr ${error} \/ ${target})
 79 |     else
 80 |         error=0
 81 |     fi
 82 | 
 83 | 
 84 |     if [ ${error} -gt ${MAX_ERROR_PERCENTAGE} ]; then
 85 |         return 1
 86 |     fi
 87 | 
 88 |     return 0
 89 | }
 90 | 
 91 | ############ MAIN ######################
 92 | 
 93 | check_parameters $*
 94 | 
 95 | # execute memlat in loop until the result is within the threshold or the max tries is reached
 96 | for (( c=0; c<${MAX_TRIES}; c++ )); do
 97 |     ${NVM_EMUL_PATH}/scripts/runenv.sh ${NVM_EMUL_PATH}/build/bench/new_memlat/new_memlat 1 1 1 ${NELEMS} 64 8 0 ${TARGET_DRAM} &> ${TEMP_FILE}
 98 | 
 99 |     verify_run
100 | 
101 |     ret=$?
102 | 
103 |     if [ ${ret} -eq 0 ]; then
104 |         cat ${TEMP_FILE} | grep "measured latency"
105 |         break
106 |     fi
107 | done
108 | 
109 | if [ ${ret} -ne 0 ]; then
110 |     echo "Could not produce a valid run"
111 | fi
112 | 
113 | rm -f ${TEMP_FILE}
114 | 
115 | exit ${ret}
116 | 


--------------------------------------------------------------------------------
/benchmark-tests/bandwidth-model-building.sh:
--------------------------------------------------------------------------------
 1 | #################################################################
 2 | #Copyright 2016 Hewlett Packard Enterprise Development LP.  
 3 | #This program is free software; you can redistribute it and/or modify
 4 | #it under the terms of the GNU General Public License as published by
 5 | #the Free Software Foundation; either version 2 of the License, or (at
 6 | #your option) any later version. This program is distributed in the
 7 | #hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 8 | #the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | #PURPOSE. See the GNU General Public License for more details. You
10 | #should have received a copy of the GNU General Public License along
11 | #with this program; if not, write to the Free Software Foundation,
12 | #Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
13 | #################################################################
14 | #!/bin/bash
15 | 
16 | echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
17 | 
18 | cp  nvmemul-bandwidth.ini  nvmemul.ini
19 | rm /tmp/bandwidth_model
20 | ../build/bench/memlat/memlat 1 1 1 1000000 64 8 0 0
21 | ../build/bench/memlat/memlat 1 1 1 1000000 64 8 0 0
22 | 


--------------------------------------------------------------------------------
/benchmark-tests/memlat-bench-test-10M-single-socket.sh:
--------------------------------------------------------------------------------
 1 | #################################################################
 2 | #Copyright 2016 Hewlett Packard Enterprise Development LP.  
 3 | #This program is free software; you can redistribute it and/or modify
 4 | #it under the terms of the GNU General Public License as published by
 5 | #the Free Software Foundation; either version 2 of the License, or (at
 6 | #your option) any later version. This program is distributed in the
 7 | #hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 8 | #the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | #PURPOSE. See the GNU General Public License for more details. You
10 | #should have received a copy of the GNU General Public License along
11 | #with this program; if not, write to the Free Software Foundation,
12 | #Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
13 | #################################################################
14 | #!/bin/bash
15 | 
16 | #awk '($1~/physical_nodes/) {print;}'  nvmemul.ini
17 | 
18 | echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
19 | 
20 | dir_name_res=FULL-RESULTS-test
21 | dir_name_sum=SUMMARY-RESULTS-test
22 | 
23 | rm -rf $dir_name_sum
24 | mkdir  $dir_name_sum
25 | 
26 | rm -f foo*
27 | rm -rf $dir_name_res
28 | mkdir $dir_name_res
29 | 
30 | cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor >> $dir_name_res/foo-runs-test
31 | 
32 | cp nvmemul-orig.ini nvmemul.ini
33 | ../build/bench/memlat/memlat 1 1 1 1000000 64 8 0 0 >foo
34 | 
35 | 
36 |     for numchains in 1 
37 |     do
38 | 	for epoch in 10000 
39 | 	do 
40 | 	    echo "#FORMAT #1_emul_lat(ns) #2_min_meas_lat(ns)  #3_aver_meas_lat(ns)  #4_max_meas_lat(ns)  #5_aver_error(%) #6_max_error(%)" >  $dir_name_sum/summary-nvm-lat-accuracy-epoch-$epoch-numchains-$numchains.txt
41 | 
42 | 	    for lat in 200 300 400 500 600 700 800 900 1000
43 | 	    do
44 | 		awk 'BEGIN {read_lat = substr(ARGV[2],3); epoch_lat = substr(ARGV[3],3);}
45 | (!(NR==7 || NR==9 || NR==10 || $1~/physical_nodes/)){ print;}
46 | (NR==7){ print $1,$2, read_lat,";";}
47 | (NR==9){ print $1,$2, epoch_lat,";";}
48 | (NR==10){ print $1,$2, epoch_lat,";";}
49 | ($1~/physical_nodes/) {print $1,$2,"\"0\""";";}
50 | ' nvmemul-orig.ini v=$lat v=$epoch > foo-nvmemul-$lat-$epoch.ini
51 | 		mv foo-nvmemul-$lat-$epoch.ini  nvmemul.ini
52 | 		echo "lat epoch chains" $lat $epoch $numchains >>   $dir_name_res/foo-runs
53 | 		
54 | 		for time in 1 2 3 4 5 6 7 8 9 10
55 | 		do
56 | 		    ../build/bench/memlat/memlat 1 1 $numchains 10000000 64 8 0 0 >> $dir_name_res/full_results-$lat-$epoch-$numchains.txt
57 |  		done
58 |                 grep latency_ns $dir_name_res/full_results-$lat-$epoch-$numchains.txt > $dir_name_res/results-$lat-$epoch-$numchains.txt
59 | 		awk 'BEGIN {max = 0; min = 1000000; sum = 0; aver=0.0; max_error=0.0; aver_error=0.0;read_lat = substr(ARGV[2],3);epoch_lat = substr(ARGV[3],3); MPL = substr(ARGV[4],3); }
60 | ($2 > max){max = $2;}
61 | ($2 < min){min = $2;}
62 | {sum=sum+$2; if ($2 < read_lat*1.0) {error=read_lat -$2} else {error=$2 - read_lat}; if (error > max_error) max_error=error;}
63 | END {aver=sum/NR; if (aver < read_lat*1.0) {aver_error = (read_lat - aver)*100.0/read_lat} else {aver_error = (aver - read_lat )*100.0/read_lat}; print read_lat, min,aver,max, aver_error,max_error*100.0/read_lat;} '   $dir_name_res/results-$lat-$epoch-$numchains.txt v=$lat v=$epoch v=$numchains >> $dir_name_sum/summary-nvm-lat-accuracy-epoch-$epoch-numchains-$numchains.txt
64 | 		
65 | 	    done
66 | 	done
67 |     done
68 | 
69 | 
70 | #FORMAT_summary-results: #1_nvm_lat(ns) #2_min_nvm_lat(ns)  #3_aver_nvm_lat(ns)  #4_max_nvm_lat(ns)  #5_aver_error(%) #6_max_error(%)
71 | 
72 | #parameter is nvm_lat
73 | 
74 | 
75 | 
76 | 


--------------------------------------------------------------------------------
/benchmark-tests/memlat-bench-test-10M.sh:
--------------------------------------------------------------------------------
 1 | #################################################################
 2 | #Copyright 2016 Hewlett Packard Enterprise Development LP.  
 3 | #This program is free software; you can redistribute it and/or modify
 4 | #it under the terms of the GNU General Public License as published by
 5 | #the Free Software Foundation; either version 2 of the License, or (at
 6 | #your option) any later version. This program is distributed in the
 7 | #hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 8 | #the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | #PURPOSE. See the GNU General Public License for more details. You
10 | #should have received a copy of the GNU General Public License along
11 | #with this program; if not, write to the Free Software Foundation,
12 | #Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
13 | #################################################################
14 | #!/bin/bash
15 | 
16 | #awk '($1~/physical_nodes/) {print;}'  nvmemul.ini
17 | 
18 | num_sockets=$(cat /proc/cpuinfo | grep "physical id" | sort -u | wc -l)
19 | if [ $num_sockets -eq 1 ]; 
20 | then
21 | echo "Single Socket"
22 | ./memlat-bench-test-10M-single-socket.sh
23 | exit 0
24 | fi
25 | 
26 | echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
27 | 
28 | dir_name_res=FULL-RESULTS-test
29 | dir_name_sum=SUMMARY-RESULTS-test
30 | 
31 | rm -rf $dir_name_sum
32 | mkdir  $dir_name_sum
33 | 
34 | rm -f foo*
35 | rm -rf $dir_name_res
36 | mkdir $dir_name_res
37 | 
38 | cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor >> $dir_name_res/foo-runs-test
39 | 
40 | cp nvmemul-orig.ini nvmemul.ini
41 | ../build/bench/memlat/memlat 1 1 1 1000000 64 8 0 1 >foo
42 | 
43 | for conf in local remote
44 | do
45 |     if [ $conf = local ]; then confpar=0 
46 |     else confpar=1
47 |     fi
48 |     for numchains in 1 
49 |     do
50 | 	for epoch in 10000 
51 | 	do 
52 | 	    echo "#FORMAT #1_emul_lat(ns) #2_min_meas_lat(ns)  #3_aver_meas_lat(ns)  #4_max_meas_lat(ns)  #5_aver_error(%) #6_max_error(%)" >  $dir_name_sum/summary-nvm-lat-accuracy-$conf-epoch-$epoch-numchains-$numchains.txt
53 | 
54 | 	    for lat in 200 300 400 500 600 700 800 900 1000
55 | 	    do
56 | 		awk 'BEGIN {read_lat = substr(ARGV[2],3); epoch_lat = substr(ARGV[3],3); config = substr(ARGV[4],3);}
57 | (!(NR==7 || NR==9 || NR==10 || $1~/physical_nodes/)){ print;}
58 | (NR==7){ print $1,$2, read_lat,";";}
59 | (NR==9){ print $1,$2, epoch_lat,";";}
60 | (NR==10){ print $1,$2, epoch_lat,";";}
61 | ($1~/physical_nodes/ && config ~ /local/) {print $1,$2,"\"0\""";";}
62 | ($1~/physical_nodes/ && config ~ /remote/) {print $1,$2,"\"0,1\""";";}
63 | ' nvmemul-orig.ini v=$lat v=$epoch v=$conf > foo-nvmemul-$lat-$epoch.ini
64 | 		mv foo-nvmemul-$lat-$epoch.ini  nvmemul.ini
65 | 		echo "lat epoch chains" $lat $epoch $numchains >>   $dir_name_res/foo-runs
66 | 		
67 | 		for time in 1 2 3 4 5 6 7 8 9 10
68 | 		do
69 | 		    ../build/bench/memlat/memlat 1 1 $numchains 10000000 64 8 0 $confpar >> $dir_name_res/full_results-$conf-$lat-$epoch-$numchains.txt
70 |  		done
71 |                 grep latency_ns $dir_name_res/full_results-$conf-$lat-$epoch-$numchains.txt > $dir_name_res/results-$conf-$lat-$epoch-$numchains.txt
72 | 		awk 'BEGIN {max = 0; min = 1000000; sum = 0; aver=0.0; max_error=0.0; aver_error=0.0;read_lat = substr(ARGV[2],3);epoch_lat = substr(ARGV[3],3); MPL = substr(ARGV[4],3); }
73 | ($2 > max){max = $2;}
74 | ($2 < min){min = $2;}
75 | {sum=sum+$2; if ($2 < read_lat*1.0) {error=read_lat -$2} else {error=$2 - read_lat}; if (error > max_error) max_error=error;}
76 | END {aver=sum/NR; if (aver < read_lat*1.0) {aver_error = (read_lat - aver)*100.0/read_lat} else {aver_error = (aver - read_lat )*100.0/read_lat}; print read_lat, min,aver,max, aver_error,max_error*100.0/read_lat;} '   $dir_name_res/results-$conf-$lat-$epoch-$numchains.txt v=$lat v=$epoch v=$numchains >> $dir_name_sum/summary-nvm-lat-accuracy-$conf-epoch-$epoch-numchains-$numchains.txt
77 | 		
78 | 	    done
79 | 	done
80 |     done
81 | done
82 | 
83 | 
84 | #FORMAT_summary-results: #1_nvm_lat(ns) #2_min_nvm_lat(ns)  #3_aver_nvm_lat(ns)  #4_max_nvm_lat(ns)  #5_aver_error(%) #6_max_error(%)
85 | 
86 | #parameter is nvm_lat
87 | 
88 | 
89 | 
90 | 


--------------------------------------------------------------------------------
/benchmark-tests/memlat-orig-lat-test-single-socket.sh:
--------------------------------------------------------------------------------
 1 | #################################################################
 2 | #Copyright 2016 Hewlett Packard Enterprise Development LP.  
 3 | #This program is free software; you can redistribute it and/or modify
 4 | #it under the terms of the GNU General Public License as published by
 5 | #the Free Software Foundation; either version 2 of the License, or (at
 6 | #your option) any later version. This program is distributed in the
 7 | #hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 8 | #the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | #PURPOSE. See the GNU General Public License for more details. You
10 | #should have received a copy of the GNU General Public License along
11 | #with this program; if not, write to the Free Software Foundation,
12 | #Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
13 | #################################################################
14 | #!/bin/bash
15 | 
16 | #awk '($1~/physical_nodes/) {print;}'  nvmemul.ini
17 | 
18 | echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
19 | 
20 | dir_name_res=ORIG-lat-test
21 | 
22 | rm -f foo*
23 | rm -rf $dir_name_res
24 | mkdir $dir_name_res
25 | 
26 | 
27 | cp  nvmemul-debug.ini  nvmemul.ini
28 | ../build/bench/memlat/memlat 1 1 1 1000000 64 8 0 0
29 | 
30 | for time in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
31 | do
32 |     ../build/bench/memlat/memlat 1 1 1 1000000 64 8 0 0 > $dir_name_res/foo-hw-latency.txt
33 |     grep "measuring latency: latency is" $dir_name_res/foo-hw-latency.txt > $dir_name_res/foo
34 |     awk 'NR==1 {local=$7;}
35 |          END {print local}'  $dir_name_res/foo >>  $dir_name_res/list-hw-latency.txt
36 | done
37 | 
38 | echo "#FORMAT:#1_min #2_aver #3_max" > $dir_name_res/final-hw-latency.txt  
39 | 
40 | awk 'BEGIN {max1 = 0.0; min1 = 10000000.0; sum1 = 0.0;}
41 |          ($1 > max1){max1 = $1;}
42 |          ($1 < min1){min1 = $1;}
43 |          {sum1=sum1+$1;sum2=sum2+$2;}
44 |          END {print min1, sum1/NR, max1;}'  $dir_name_res/list-hw-latency.txt  >> $dir_name_res/final-hw-latency.txt  
45 | 
46 | rm  $dir_name_res/foo*
47 | 
48 | 
49 | 
50 | 
51 | 
52 | 
53 | 
54 | 
55 | 
56 | 
57 | 
58 | 
59 | 
60 | 
61 | 
62 | 
63 | 
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/benchmark-tests/memlat-orig-lat-test.sh:
--------------------------------------------------------------------------------
 1 | #################################################################
 2 | #Copyright 2016 Hewlett Packard Enterprise Development LP.  
 3 | #This program is free software; you can redistribute it and/or modify
 4 | #it under the terms of the GNU General Public License as published by
 5 | #the Free Software Foundation; either version 2 of the License, or (at
 6 | #your option) any later version. This program is distributed in the
 7 | #hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 8 | #the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | #PURPOSE. See the GNU General Public License for more details. You
10 | #should have received a copy of the GNU General Public License along
11 | #with this program; if not, write to the Free Software Foundation,
12 | #Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
13 | #################################################################
14 | #!/bin/bash
15 | 
16 | #awk '($1~/physical_nodes/) {print;}'  nvmemul.ini
17 | 
18 | num_sockets=$(cat /proc/cpuinfo | grep "physical id" | sort -u | wc -l)
19 | if [ $num_sockets -eq 1 ]; 
20 | then
21 | echo "Single Socket"
22 | ./memlat-orig-lat-test-single-socket.sh
23 | exit 0
24 | fi
25 | 
26 | echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
27 | 
28 | dir_name_res=ORIG-lat-test
29 | 
30 | rm -f foo*
31 | rm -rf $dir_name_res
32 | mkdir $dir_name_res
33 | 
34 | 
35 | cp  nvmemul-debug.ini  nvmemul.ini
36 | ../build/bench/memlat/memlat 1 1 1 1000000 64 8 0 1
37 | 
38 | #FORMAT: ns
39 | #FORMAT: min_local #2_aver_local max_local min_remote #5_aver_remote max_remote 
40 | #FORMAT: 
41 | 
42 | for time in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
43 | do
44 |     ../build/bench/memlat/memlat 1 1 1 1000000 64 8 0 1 > $dir_name_res/foo-hw-latency.txt
45 |     grep "measuring latency: latency is" $dir_name_res/foo-hw-latency.txt > $dir_name_res/foo
46 |     awk 'NR==1 {local=$7;}
47 |          NR==2 {remote=$7;}
48 |          END {print local , remote}'  $dir_name_res/foo >>  $dir_name_res/list-hw-latency.txt
49 | done
50 | 
51 | echo "#FORMAT:#1_min_local #2_aver_local #3_max_local #4_min_remote #5_aver_remote #6_max_remote" > $dir_name_res/final-hw-latency.txt  
52 | 
53 | awk 'BEGIN {max1 = 0.0; min1 = 10000000.0; max2 = 0.0; min2 = 10000000.0; sum1 = 0.0; sum2 = 0.0;}
54 |          ($1 > max1){max1 = $1;}
55 |          ($1 < min1){min1 = $1;}
56 |          ($2 > max2){max2 = $2;}
57 |          ($2 < min2){min2 = $2;}
58 |          {sum1=sum1+$1;sum2=sum2+$2;}
59 |          END {print min1, sum1/NR, max1,  min2, sum2/NR, max2 ;}'  $dir_name_res/list-hw-latency.txt  >> $dir_name_res/final-hw-latency.txt  
60 | 
61 | rm  $dir_name_res/foo*
62 | 
63 | #FORMAT:   ns
64 | #FORMAT:#1_min_local #2_aver_local #3_max_local #4_min_remote #5_aver_remote #6_max_remote 
65 | 
66 | 
67 | 
68 | 
69 | 
70 | 
71 | 
72 | 
73 | 
74 | 
75 | 
76 | 
77 | 
78 | 
79 | 
80 | 
81 | 
82 | 
83 | 
84 | 


--------------------------------------------------------------------------------
/benchmark-tests/nvmemul-bandwidth.ini:
--------------------------------------------------------------------------------
 1 | # Configuration file 
 2 | 
 3 | latency:
 4 | {
 5 |     enable = true;
 6 |     inject_delay = true;
 7 |     read = 1000;
 8 |     write = 1000;
 9 |     max_epoch_duration_us = 10000;
10 |     min_epoch_duration_us = 10000;
11 |     calibration = false;
12 | };
13 | 
14 | bandwidth:
15 | {
16 |     enable = true;
17 |     model = "/tmp/bandwidth_model";
18 |     read = 2000;
19 |     write = 2000;
20 | };
21 | 
22 | topology:
23 | {
24 |     mc_pci = "/tmp/mc_pci_bus";
25 |     physical_nodes = "0";
26 |     hyperthreading = true; # do not use multiple hardware threads per core
27 | };
28 | 
29 | statistics:
30 | {
31 |     enable = true;
32 |     #file = "/tmp/statistics";
33 | };
34 | 
35 | debug:
36 | {
37 |     # debugging level
38 |     level = 5;
39 |     verbose = 0;
40 | 
41 |     # modules set to True produce debugging output
42 |     module:
43 |     {
44 |         all = False;
45 |     };
46 | };
47 | 


--------------------------------------------------------------------------------
/benchmark-tests/nvmemul-debug.ini:
--------------------------------------------------------------------------------
 1 | # Configuration file 
 2 | 
 3 | latency:
 4 | {
 5 |     enable = true;
 6 |     inject_delay = true;
 7 | read = 1000 ;
 8 |     write = 1000;
 9 | max_epoch_duration_us = 10000 ;
10 | min_epoch_duration_us = 10000 ;
11 |     calibration = false;
12 | };
13 | 
14 | bandwidth:
15 | {
16 |     enable = false;
17 |     model = "/tmp/bandwidth_model";
18 |     read = 2000;
19 |     write = 2000;
20 | };
21 | 
22 | topology:
23 | {
24 |     mc_pci = "/tmp/mc_pci_bus";
25 | physical_nodes = "0,1";
26 |     hyperthreading = true; # do not use multiple hardware threads per core
27 | };
28 | 
29 | statistics:
30 | {
31 |     enable = true;
32 |     #file = "/tmp/statistics";
33 | };
34 | 
35 | debug:
36 | {
37 |     # debugging level
38 |     level = 5;
39 |     verbose = 0;
40 | 
41 |     # modules set to True produce debugging output
42 |     module:
43 |     {
44 |         all = False;
45 |     };
46 | };
47 | 


--------------------------------------------------------------------------------
/benchmark-tests/nvmemul-orig.ini:
--------------------------------------------------------------------------------
 1 | # Configuration file 
 2 | 
 3 | latency:
 4 | {
 5 |     enable = true;
 6 |     inject_delay = true;
 7 | read = 1000 ;
 8 |     write = 1000;
 9 | max_epoch_duration_us = 10000 ;
10 | min_epoch_duration_us = 10000 ;
11 |     calibration = false;
12 | };
13 | 
14 | bandwidth:
15 | {
16 |     enable = false;
17 |     model = "/tmp/bandwidth_model";
18 |     read = 2000;
19 |     write = 2000;
20 | };
21 | 
22 | topology:
23 | {
24 |     mc_pci = "/tmp/mc_pci_bus";
25 | physical_nodes = "0,1";
26 |     hyperthreading = true; # do not use multiple hardware threads per core
27 | };
28 | 
29 | statistics:
30 | {
31 |     enable = true;
32 |     #file = "/tmp/statistics";
33 | };
34 | 
35 | debug:
36 | {
37 |     # debugging level
38 |     level = 3;
39 |     verbose = 0;
40 | 
41 |     # modules set to True produce debugging output
42 |     module:
43 |     {
44 |         all = False;
45 |     };
46 | };
47 | 


--------------------------------------------------------------------------------
/benchmark-tests/nvmemul.ini:
--------------------------------------------------------------------------------
 1 | # Configuration file 
 2 | 
 3 | latency:
 4 | {
 5 |     enable = true;
 6 |     inject_delay = true;
 7 | read = 300 ;
 8 |     write = 200;
 9 | max_epoch_duration_us = 10000 ;
10 | min_epoch_duration_us = 10000 ;
11 |     calibration = false;
12 | };
13 | 
14 | bandwidth:
15 | {
16 |     enable = false;
17 |     model = "/tmp/bandwidth_model";
18 |     read = 2000;
19 |     write = 2000;
20 | };
21 | 
22 | topology:
23 | {
24 |     mc_pci = "/tmp/mc_pci_bus";
25 | physical_nodes = "0,1";
26 |     hyperthreading = true; # do not use multiple hardware threads per core
27 | };
28 | 
29 | statistics:
30 | {
31 |     enable = true;
32 |     #file = "/tmp/statistics";
33 | };
34 | 
35 | debug:
36 | {
37 |     # debugging level
38 |     level = 5;
39 |     verbose = 0;
40 | 
41 |     # modules set to True produce debugging output
42 |     module:
43 |     {
44 |         all = False;
45 |     };
46 | };
47 | 


--------------------------------------------------------------------------------
/license.txt:
--------------------------------------------------------------------------------
 1 | /***************************************************************************
 2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
 3 | This program is free software; you can redistribute it and/or modify
 4 | it under the terms of the GNU General Public License as published by
 5 | the Free Software Foundation; either version 2 of the License, or (at
 6 | your option) any later version. This program is distributed in the
 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | PURPOSE. See the GNU General Public License for more details. You
10 | should have received a copy of the GNU General Public License along
11 | with this program; if not, write to the Free Software Foundation,
12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
13 | ***************************************************************************/
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/nvmemul-orig.ini:
--------------------------------------------------------------------------------
 1 | # Configuration file 
 2 | 
 3 | latency:
 4 | {
 5 |     enable = true;
 6 |     inject_delay = true;
 7 | read = 1000 ;
 8 |     write = 1000;
 9 | max_epoch_duration_us = 10000 ;
10 | min_epoch_duration_us = 10000 ;
11 |     calibration = false;
12 | };
13 | 
14 | bandwidth:
15 | {
16 |     enable = false;
17 |     model = "/tmp/bandwidth_model";
18 |     read = 2000;
19 |     write = 2000;
20 | };
21 | 
22 | topology:
23 | {
24 |     mc_pci = "/tmp/mc_pci_bus";
25 | physical_nodes = "0,1";
26 |     hyperthreading = true; # do not use multiple hardware threads per core
27 | };
28 | 
29 | statistics:
30 | {
31 |     enable = true;
32 |     #file = "/tmp/statistics";
33 | };
34 | 
35 | debug:
36 | {
37 |     # debugging level
38 |     level = 3;
39 |     verbose = 0;
40 | 
41 |     # modules set to True produce debugging output
42 |     module:
43 |     {
44 |         all = False;
45 |     };
46 | };
47 | 


--------------------------------------------------------------------------------
/nvmemul.dox:
--------------------------------------------------------------------------------
 1 | /**
 2 | 
 3 | @mainpage Quartz:  A Lightweight  Performance Emulator for  Persistent Memory Software.
 4 | 
 5 | 
 6 | \section section-intro Introduction
 7 | 
 8 | Quartz: A DRAM-based performance emulation platform that leverages features 
 9 | available in commodity hardware to emulate different latency and bandwidth 
10 | characteristics of future byte-addressable NVM technologies.
11 | 
12 | */
13 | 
14 |     
15 | 
16 | 
17 | 
18 | 
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/nvmemul.ini:
--------------------------------------------------------------------------------
 1 | # Configuration file 
 2 | 
 3 | latency:
 4 | {
 5 |     enable = true;
 6 |     inject_delay = true;
 7 | read = 1000 ;
 8 |     write = 1000;
 9 | max_epoch_duration_us = 10000 ;
10 | min_epoch_duration_us = 10000 ;
11 |     calibration = false;
12 | };
13 | 
14 | bandwidth:
15 | {
16 |     enable = false;
17 |     model = "/tmp/bandwidth_model";
18 |     read = 500;
19 |     write = 500;
20 | };
21 | 
22 | topology:
23 | {
24 |     mc_pci = "/tmp/mc_pci_bus";
25 | physical_nodes = "0,1";
26 |     hyperthreading = true; # do not use multiple hardware threads per core
27 | };
28 | 
29 | statistics:
30 | {
31 |     enable = true;
32 |     #file = "/tmp/statistics";
33 | };
34 | 
35 | debug:
36 | {
37 |     # debugging level
38 |     level = 1;
39 |     verbose = 0;
40 | 
41 |     # modules set to True produce debugging output
42 |     module:
43 |     {
44 |         all = False;
45 |     };
46 | };
47 | 


--------------------------------------------------------------------------------
/scripts/install.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | #################################################################
  3 | #Copyright 2016 Hewlett Packard Enterprise Development LP.  
  4 | #This program is free software; you can redistribute it and/or modify
  5 | #it under the terms of the GNU General Public License as published by
  6 | #the Free Software Foundation; either version 2 of the License, or (at
  7 | #your option) any later version. This program is distributed in the
  8 | #hope that it will be useful, but WITHOUT ANY WARRANTY; without even
  9 | #the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 10 | #PURPOSE. See the GNU General Public License for more details. You
 11 | #should have received a copy of the GNU General Public License along
 12 | #with this program; if not, write to the Free Software Foundation,
 13 | #Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 14 | #################################################################
 15 | 
 16 | PAPI_MAJOR=5
 17 | PAPI_MINOR=1
 18 | PAPI_RELEASE=1
 19 | 
 20 | CMAKE_MAJOR=2
 21 | CMAKE_MINOR=8
 22 | 
 23 | function install_deps_rpm() {
 24 |     yum install -q -y numactl-devel libconfig libconfig-devel cmake kernel-devel-`uname -r` msr-tools uthash-devel
 25 | 
 26 |     if [ $? -ne 0 ]; then
 27 |         echo "Dependencies installation failed"
 28 |         exit -1
 29 |     fi
 30 | }
 31 | 
 32 | function install_deps_deb() {
 33 |     apt-get install -y libnuma-dev libconfig-dev cmake  msr-tools uthash-dev
 34 | 
 35 |     if [ $? -ne 0 ]; then
 36 |         echo "Dependencies installation failed"
 37 |         exit -1
 38 |     fi
 39 | }
 40 | 
 41 | function check_supported_papi() {
 42 |     major=`papi_version | cut -d ' ' -f3 | cut -d '-' -f1 | cut -d '.' -f1`
 43 |     minor=`papi_version | cut -d ' ' -f3 | cut -d '-' -f1 | cut -d '.' -f2`
 44 |     release=`papi_version | cut -d ' ' -f3 | cut -d '-' -f1 | cut -d '.' -f3`
 45 | 
 46 |     if [ ${major} -ne ${PAPI_MAJOR} ]; then
 47 |         echo "CMake version (${major}.${minor}.${release}) not supported (=${PAPI_MAJOR}.${PAPI_MINOR}.${PAPI_RELEASE})"
 48 |         exit -1
 49 |     fi
 50 |     if [ ${minor} -ne ${PAPI_MINOR} ]; then
 51 |         echo "CMake version (${major}.${minor}.${release}) not supported (=${PAPI_MAJOR}.${PAPI_MINOR}.${PAPI_RELEASE})"
 52 |         exit -1
 53 |     fi
 54 |     if [ ${release} -ne ${PAPI_RELEASE} ]; then
 55 |         echo "CMake version (${major}.${minor}.${release}) not supported (=${PAPI_MAJOR}.${PAPI_MINOR}.${PAPI_RELEASE})"
 56 |         exit -1
 57 |     fi
 58 | }
 59 | 
 60 | function check_supported_cmake() {
 61 |     major=`cmake -version | head -1 | cut -d ' ' -f3 | cut -d '-' -f1 | cut -d '.' -f1`
 62 |     minor=`cmake -version | head -1 | cut -d ' ' -f3 | cut -d '-' -f1 | cut -d '.' -f2`
 63 |     
 64 |     if [ ${major} -lt ${CMAKE_MAJOR} ]; then
 65 |         echo "CMake version (${major}.${minor}) not supported (>=${CMAKE_MAJOR}.${CMAKE_MINOR})"
 66 |         exit -1
 67 |     fi
 68 |     if [ ${major} -eq ${CMAKE_MAJOR} ]; then
 69 |         if [ ${minor} -lt ${CMAKE_MINOR} ]; then
 70 |             echo "CMake version (${major}.${minor}) not supported (>=${CMAKE_MAJOR}.${CMAKE_MINOR})"
 71 |             exit -1
 72 |         fi
 73 |     fi
 74 | }
 75 | 
 76 | function check_supported_versions() {
 77 |     check_supported_cmake
 78 | #    check_supported_papi
 79 | }
 80 | 
 81 | 
 82 | #################### MAIN ####################
 83 | 
 84 | if [ $(id -u) -ne 0 ]; then
 85 |    echo "You mut be root to execute this script"
 86 |    exit -1
 87 | fi
 88 | 
 89 | if [ -f /etc/redhat-release ]; then
 90 |     install_deps_rpm
 91 | elif [ -f /etc/centos-release ]; then
 92 |     install_deps_rpm
 93 | elif [ -f /etc/debian_version -o -f /etc/debian-release ]; then
 94 |     install_deps_deb
 95 | else
 96 |     echo "Linux distribution not supported"
 97 |     exit -1
 98 | fi
 99 | 
100 | check_supported_versions
101 | 
102 | 


--------------------------------------------------------------------------------
/scripts/runenv.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #################################################################
 3 | #Copyright 2016 Hewlett Packard Enterprise Development LP.  
 4 | #This program is free software; you can redistribute it and/or modify
 5 | #it under the terms of the GNU General Public License as published by
 6 | #the Free Software Foundation; either version 2 of the License, or (at
 7 | #your option) any later version. This program is distributed in the
 8 | #hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 9 | #the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
10 | #PURPOSE. See the GNU General Public License for more details. You
11 | #should have received a copy of the GNU General Public License along
12 | #with this program; if not, write to the Free Software Foundation,
13 | #Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 | #################################################################
15 | 
16 | NVM_EMUL_PATH="`dirname $0`/.."
17 | 
18 | 
19 | if [ -z "$1" ]; then
20 |     echo "runenv.sh [cmd to run]"
21 |     exit 1
22 | fi
23 | 
24 | rootdir="$NVM_EMUL_PATH"
25 | bindir=$rootdir"/build"
26 | 
27 | if [ -f /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor ]; then
28 |     current_scaling=$(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor);
29 | 
30 |     if [ "${current_scaling}" != "performance" ]; then
31 |         file_list=$(ls /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor)
32 |         for cpu_file in ${file_list}; do
33 |             echo "performance" | sudo tee ${cpu_file} > /dev/null
34 |         done
35 |     fi
36 | fi
37 | 
38 | $rootdir/scripts/turboboost.sh disable
39 | 
40 | v=$(uname -r | cut -d '.' -f1)
41 | if [ $v -ge 4 ]; then
42 |     echo "2" | sudo tee /sys/bus/event_source/devices/cpu/rdpmc
43 | fi
44 | 
45 | export LD_PRELOAD=$bindir"/src/lib/libnvmemul.so"
46 | export NVMEMUL_INI=$rootdir"/nvmemul.ini"
47 | 
48 | if [ ! -f ${LD_PRELOAD} ]; then
49 |     echo "Library not found. Compile the emulator's library first."
50 |     exit -1
51 | fi
52 | 
53 | echo $LD_PRELOAD
54 | echo $NVMEMUL_INI
55 | 
56 | # execute the command passed as argument
57 | $@
58 | 
59 | 


--------------------------------------------------------------------------------
/scripts/setupdev.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | #################################################################
  3 | #Copyright 2016 Hewlett Packard Enterprise Development LP.  
  4 | #This program is free software; you can redistribute it and/or modify
  5 | #it under the terms of the GNU General Public License as published by
  6 | #the Free Software Foundation; either version 2 of the License, or (at
  7 | #your option) any later version. This program is distributed in the
  8 | #hope that it will be useful, but WITHOUT ANY WARRANTY; without even
  9 | #the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 10 | #PURPOSE. See the GNU General Public License for more details. You
 11 | #should have received a copy of the GNU General Public License along
 12 | #with this program; if not, write to the Free Software Foundation,
 13 | #Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 14 | #################################################################
 15 | 
 16 | NVM_EMUL_PATH="`dirname $0`/.."
 17 | 
 18 | device_name="nvmemul"
 19 | device_module_name=${device_name}".ko"
 20 | device_path="/dev/${device_name}"
 21 | device_module_path=`find ${NVM_EMUL_PATH}/build -name ${device_module_name}`
 22 | 
 23 | 
 24 | function loaddev {
 25 |     if [ -z "${device_module_path}" ]; then
 26 |         echo "Module not found. Compile the emulator's source code first."
 27 |         exit -1
 28 |     fi
 29 | 
 30 |     /sbin/insmod ${device_module_path} 2> /dev/null
 31 | 
 32 |     if [ $? -ne 0 ]; then
 33 |         lsmod | grep ${device_name} > /dev/null
 34 |         if [ $? -eq 0 ]; then
 35 |             echo "Kernel module already loaded, please reload it."
 36 |             exit 1
 37 |         fi
 38 |         echo "Kernel module loading failed"
 39 |         exit 1
 40 |     fi
 41 | 
 42 |     device_major=`grep ${device_name} /proc/devices | awk '{ print $1 }'`
 43 |     if [ $? -ne 0 -o -z "${device_major}" ]; then
 44 |         echo "Failed to detect module major"
 45 |         exit 1
 46 |     fi
 47 | 
 48 |     rm -f ${device_path}
 49 |     if [ $? -ne 0 ]; then
 50 |         echo "Failed to delete kernel module device file"
 51 |         exit 1
 52 |     fi
 53 | 
 54 |     mknod ${device_path} c ${device_major} 0
 55 |     chmod a+wr ${device_path}
 56 | 
 57 |     lsmod | grep ${device_name} > /dev/null
 58 | 
 59 |     if [ $? -eq 0 ]; then
 60 |         echo "Kernel module loaded successfully"
 61 |     else
 62 |         echo "kernel module loading failed"
 63 |         exit 1
 64 |     fi
 65 | }
 66 | 
 67 | function unloaddev {
 68 |     /sbin/rmmod ${device_name} 2> /dev/null
 69 |     rm -f ${device_path}
 70 |     if [ $? -eq 0 ]; then
 71 |         echo "Kernel module unloaded successfully"
 72 |     else
 73 |         echo "Failed to delete kernel module device file"
 74 |         exit 1
 75 |     fi
 76 | }
 77 | 
 78 | function help() {
 79 |     echo "$0 <load|unload|reload>"
 80 | }
 81 | 
 82 | ### MAIN ###
 83 | 
 84 | if [ $(id -u) -ne 0 ]; then
 85 |    echo "You mut be root to execute this script"
 86 |    exit -1
 87 | fi
 88 | 
 89 | if [ $# -eq 0 ]; then
 90 |     help
 91 |     exit 1
 92 | fi
 93 | 
 94 | if [ "$1" = "load" ] || [ "$1" = "l" ]; then
 95 |     loaddev
 96 | elif [ "$1" = "unload" ] || [ "$1" = "u" ]; then
 97 |     unloaddev
 98 | elif [ "$1" = "reload" ] || [ "$1" = "r" ]; then
 99 |     unloaddev
100 |     loaddev
101 | else
102 |     help
103 |     exit 1
104 | fi
105 | 
106 | exit 0
107 | 


--------------------------------------------------------------------------------
/scripts/turboboost.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | #################################################################
  3 | #Copyright 2016 Hewlett Packard Enterprise Development LP.  
  4 | #This program is free software; you can redistribute it and/or modify
  5 | #it under the terms of the GNU General Public License as published by
  6 | #the Free Software Foundation; either version 2 of the License, or (at
  7 | #your option) any later version. This program is distributed in the
  8 | #hope that it will be useful, but WITHOUT ANY WARRANTY; without even
  9 | #the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 10 | #PURPOSE. See the GNU General Public License for more details. You
 11 | #should have received a copy of the GNU General Public License along
 12 | #with this program; if not, write to the Free Software Foundation,
 13 | #Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 14 | #################################################################
 15 | 
 16 | function usage()
 17 | {
 18 |     echo "$0 <function> [target CPU id]"
 19 |     echo -e "\tfunctions:"
 20 |     echo -e "\t\t check: verifies if a given CPU id has Turbo Boost enabled"
 21 |     echo -e "\t\t disable: disables a given CPU id or all CPUs if not specified"
 22 |     echo -e "\t\t enabled: enables a given CPU id or all CPUs if not specified"
 23 | }
 24 | 
 25 | function verify_cpu_id()
 26 | {
 27 |     re='^[0-9]+$'
 28 |     if ! [[ $1 =~ $re ]]; then
 29 |         echo "CPU id is not a number"
 30 |         exit 1
 31 |     fi
 32 | }
 33 | 
 34 | function check_msr_module()
 35 | {
 36 |     lsmod | grep msr > /dev/null
 37 |     if [ $? -ne 0 ]; then
 38 |          # some systems need this, others don't
 39 |         sudo modprobe msr &> /dev/null
 40 |         #if [ $? -ne 0 ]; then
 41 |         #    echo "Failed to load MSR module"
 42 |         #    exit 1
 43 |         #fi
 44 |     fi
 45 | }
 46 | 
 47 | function check()
 48 | {
 49 |     cpu=$1
 50 | 
 51 |     if [ -z "${cpu}" ]; then
 52 |         usage
 53 |         exit 1
 54 |     fi
 55 | 
 56 |     cpus=$(lscpu | sed -n 4p | awk '{ print $2 }')
 57 | 
 58 |     if [ ${cpu} -ge ${cpus} ]; then
 59 |         echo "CPU id out of range"
 60 |         exit 1
 61 |     fi
 62 | 
 63 |     disabled=$(sudo rdmsr -p${cpu} 0x1a0 -f 38:38)
 64 | 
 65 |     if [ "${disabled}" == "1" ]; then
 66 |         echo "Turbo Boost for processor ${cpu} is disabled"
 67 |     else
 68 |         echo "Turbo Boost for processor ${cpu} is enabled"
 69 |     fi
 70 | }
 71 | 
 72 | function enable()
 73 | {
 74 |     cpu=$1
 75 | 
 76 |     cpus=$(lscpu | sed -n 4p | awk '{ print $2 }')
 77 | 
 78 |     if [ -z "${cpu}" ]; then
 79 |         for (( i=0; i<${cpus}; i++ )); do 
 80 |             sudo wrmsr -p$i 0x1a0 0x850089
 81 |         done
 82 |         echo "Turbo Boost enabled for all CPUs"
 83 |     else
 84 |         if [ ${cpu} -ge ${cpus} ]; then
 85 |             echo "CPU id out of range"
 86 |             exit 1
 87 |         fi
 88 |         sudo wrmsr -p${cpu} 0x1a0 0x850089
 89 |         echo "Turbo Boost enabled for CPU ${cpu}"
 90 |     fi
 91 | }
 92 | 
 93 | function disable()
 94 | {
 95 |     cpu=$1
 96 | 
 97 |     cpus=$(lscpu | sed -n 4p | awk '{ print $2 }')
 98 | 
 99 |     if [ -z "${cpu}" ]; then
100 |         for (( i=0; i<${cpus}; i++ )); do 
101 |             sudo wrmsr -p$i 0x1a0 0x4000850089;
102 |         done
103 |         echo "Turbo Boost disabled for all CPUs"
104 |     else
105 |         if [ ${cpu} -ge ${cpus} ]; then
106 |             echo "CPU id out of range"
107 |             exit 1
108 |         fi
109 |         sudo wrmsr -p${cpu} 0x1a0 0x4000850089;
110 |         echo "Turbo Boost disabled for CPU ${cpu}"
111 |     fi
112 | }
113 | 
114 | 
115 | 
116 | ### MAIN ###
117 | 
118 | if [ $# -eq 0 ]; then
119 |     usage
120 |     exit 1
121 | fi
122 | 
123 | funct=$1
124 | target_cpu=$2
125 | 
126 | check_msr_module
127 | 
128 | if [ ! -z "${target_cpu}" ]; then
129 |     verify_cpu_id ${target_cpu}
130 | fi
131 | 
132 | case ${funct} in
133 |     "enable")
134 |         enable ${target_cpu}
135 |         ;;
136 |     "disable")
137 |         disable ${target_cpu}
138 |         ;;
139 |     "check")
140 |         check ${target_cpu}
141 |         ;;
142 |     *)
143 |         usage
144 |         exit 1
145 | esac
146 | 
147 | exit 0
148 | 
149 | 


--------------------------------------------------------------------------------
/src/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_subdirectory(lib)
2 | add_subdirectory(dev)
3 | 


--------------------------------------------------------------------------------
/src/dev/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Build NVM Emulation device driver (using Kbuild Makefile)
 2 | 
 3 | set(DEV_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
 4 | set(DEV_BIN_DIR "${CMAKE_CURRENT_BINARY_DIR}")
 5 | set(DEV_KERNEL_MODULE "${DEV_BIN_DIR}/nvmemul.ko")
 6 | mark_as_advanced(DEV_DIR DEV_BIN_DIR)
 7 | 
 8 | # We invoke make in build folder to keep the glog's source folder clean.
 9 | file(MAKE_DIRECTORY ${DEV_BIN_DIR})
10 | add_custom_command(OUTPUT ${DEV_KERNEL_MODULE}
11 |     COMMAND ${CMAKE_COMMAND} -E copy_directory ${DEV_DIR} ${DEV_BIN_DIR}
12 |     COMMAND ${CMAKE_MAKE_PROGRAM} -j
13 |     COMMENT [Build-NVM Emulation Device]
14 |     WORKING_DIRECTORY "${DEV_BIN_DIR}"
15 |     DEPENDS ${DEV_DIR}/pmc.c # just to see if it has been overwritten
16 | )
17 | 
18 | # we use add_custom_command for the build itself because otherwise we have to build it
19 | # every time. the following add_custom_target gives a name for the output.
20 | add_custom_target(dev_build ALL DEPENDS ${DEV_KERNEL_MODULE})
21 | 


--------------------------------------------------------------------------------
/src/dev/Makefile:
--------------------------------------------------------------------------------
 1 | # build modules
 2 | obj-m = nvmemul.o
 3 | nvmemul-objs = pmc.o
 4 | 
 5 | # use the kernel build system
 6 | KERNEL_VERSION := `uname -r`
 7 | KERNEL_SOURCE := /lib/modules/$(KERNEL_VERSION)/build
 8 | 
 9 | SRCDIR=`pwd`
10 | OBJDIR=`pwd`
11 | 
12 | all:
13 | 	make -C $(KERNEL_SOURCE)  M=$(OBJDIR) modules
14 | 
15 | clean: 
16 | 	make -C $(KERNEL_SOURCE) M=$(OBJDIR) clean
17 | 


--------------------------------------------------------------------------------
/src/dev/ioctl_query.h:
--------------------------------------------------------------------------------
 1 | /***************************************************************************
 2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
 3 | This program is free software; you can redistribute it and/or modify
 4 | it under the terms of the GNU General Public License as published by
 5 | the Free Software Foundation; either version 2 of the License, or (at
 6 | your option) any later version. This program is distributed in the
 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | PURPOSE. See the GNU General Public License for more details. You
10 | should have received a copy of the GNU General Public License along
11 | with this program; if not, write to the Free Software Foundation,
12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
13 | ***************************************************************************/
14 | #ifndef __IOCTL_QUERY_H
15 | #define __IOCTL_QUERY_H
16 | 
17 | #include <linux/ioctl.h>
18 | 
19 | #define MYDEV_MAGIC (0xAA)
20 | 
21 | typedef struct { 
22 |     unsigned int counter_id;
23 |     unsigned int event_id;
24 | } ioctl_query_setcounter_t;
25 | 
26 | typedef struct { 
27 |     unsigned int bus_id;
28 |     unsigned int device_id;
29 |     unsigned int function_id;
30 |     unsigned int offset;
31 |     unsigned int val;
32 | } ioctl_query_setgetpci_t;
33 | 
34 | #define IOCTL_SETCOUNTER _IOR(MYDEV_MAGIC, 0, ioctl_query_setcounter_t *) 
35 | #define IOCTL_SETPCI     _IOR(MYDEV_MAGIC, 1, ioctl_query_setgetpci_t *) 
36 | #define IOCTL_GETPCI     _IOWR(MYDEV_MAGIC, 2, ioctl_query_setgetpci_t *) 
37 | 
38 | 
39 | #endif /* __IOCTL_QUERY_H */
40 | 


--------------------------------------------------------------------------------
/src/dev/pmc.c:
--------------------------------------------------------------------------------
  1 | /***************************************************************************
  2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
  3 | This program is free software; you can redistribute it and/or modify
  4 | it under the terms of the GNU General Public License as published by
  5 | the Free Software Foundation; either version 2 of the License, or (at
  6 | your option) any later version. This program is distributed in the
  7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
  8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  9 | PURPOSE. See the GNU General Public License for more details. You
 10 | should have received a copy of the GNU General Public License along
 11 | with this program; if not, write to the Free Software Foundation,
 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 13 | ***************************************************************************/
 14 | #include <linux/init.h>
 15 | #include <linux/pci.h>
 16 | #include <linux/module.h>
 17 | #include <linux/moduleparam.h>
 18 | #include <linux/major.h>
 19 | #include <linux/kernel.h>
 20 | #include <linux/slab.h>
 21 | #include <linux/fs.h>
 22 | #include <linux/errno.h>
 23 | #include <linux/types.h>
 24 | #include <linux/proc_fs.h>
 25 | #include <linux/fcntl.h>
 26 | #include <linux/smp.h>
 27 | #include <linux/uaccess.h>
 28 | 
 29 | #include <asm/msr.h>
 30 | #include <asm/uaccess.h>
 31 | 
 32 | #include "ioctl_query.h"
 33 | 
 34 | static long pmc_ioctl(struct file *f, unsigned int cmd, unsigned long arg);
 35 | //unsigned long read_cr4(void);
 36 | //void write_cr4(unsigned long);
 37 | #ifndef read_cr4
 38 | #define read_cr4 native_read_cr4
 39 | #endif
 40 | #ifndef write_cr4
 41 | #define write_cr4 native_write_cr4
 42 | #endif
 43 | 
 44 | struct file_operations pmc_fops = {
 45 | 	.unlocked_ioctl = pmc_ioctl,
 46 | 	.compat_ioctl = pmc_ioctl,
 47 | };
 48 | 
 49 | static const char* module_name = "nvmemul";
 50 | static int mod_major = 0;
 51 | static const int NVMEMUL_MAJOR = 0;
 52 | const const int PERFCTR0 = 0xc1;
 53 | const const int PERFEVENTSEL0 = 0x186;
 54 | 
 55 | 
 56 | void pmc_set_pce_bit(void* arg) 
 57 | {
 58 | 	unsigned long cr4reg;
 59 | 
 60 |     cr4reg = read_cr4();
 61 | 	cr4reg |= 0x100; // setting the PCE bit
 62 | 	write_cr4(cr4reg);
 63 | }
 64 | 
 65 | int pmc_init_module(void)
 66 | {
 67 |  	printk(KERN_INFO "%s: Loading. Initializing...\n", module_name);
 68 | 	if ((mod_major = register_chrdev(NVMEMUL_MAJOR, module_name, &pmc_fops)) == -EBUSY) {
 69 | 		printk(KERN_INFO "%s: Unable to get major for %s device\n", module_name, module_name);
 70 | 		return -EIO;
 71 | 	}
 72 | 
 73 | 	if (mod_major <= 0) {
 74 | 		printk(KERN_INFO "%s: Unable to get major for %s device\n", module_name, module_name);
 75 | 		return -EIO;
 76 | 	}
 77 | 
 78 | 	printk(KERN_INFO "%s: major is %d\n", module_name, mod_major);
 79 | 
 80 | 	/*
 81 | 	 * In order to use the rdpmc instruction in user mode, we need to set the
 82 | 	 * PCE bit of CR4. PCE is 8th bit of cr4, and 256 is 2 << 8
 83 | 	 */
 84 | 
 85 |     pmc_set_pce_bit(NULL);
 86 |     smp_call_function(pmc_set_pce_bit, NULL, 1);
 87 | 
 88 | 	return 0;
 89 | }	
 90 | 
 91 | void pmc_exit_module(void) {
 92 |  	printk(KERN_INFO "%s: Unloading. Cleaning up...\n", module_name);
 93 | 	/* Freeing the major number */
 94 | 	unregister_chrdev(mod_major, module_name);
 95 | }	
 96 | 
 97 | struct counter_s {
 98 |     int counter_id;
 99 |     unsigned long val; 
100 | };
101 | 
102 | 
103 | /* 
104 |  * pmc_clear clears the PMC specified by counter
105 |  * counter = 0 => perfctr0
106 |  * counter = 1 => perfctr1
107 |  * it uses WRMSR to write the values in the counters
108 |  */
109 | static void __pmc_clear(int counter_id) {
110 | 	int counterRegister = PERFCTR0 + counter_id;
111 | 	/* clear the old register */
112 | 
113 | 	__asm__ __volatile__("mov %0, %%ecx\n\t"
114 | 	        "xor %%edx, %%edx\n\t"
115 |             "xor %%eax, %%eax\n\t"
116 |             "wrmsr\n\t"
117 | 	        : /* no outputs */
118 | 	        : "m" (counterRegister)
119 | 	        : "eax", "ecx", "edx" /* all clobbered */);
120 | }
121 | 
122 | static void pmc_clear(void* arg) {
123 |     struct counter_s* counter = (struct counter_s*) arg;
124 |     __pmc_clear(counter->counter_id);
125 | }
126 | 
127 | void pmc_clear_all_cpu(int counter_id)
128 | {
129 |     struct counter_s counter = { counter_id, 0};
130 |     pmc_clear((void*) &counter);
131 |     smp_call_function(pmc_clear, (void*) &counter, 1);
132 | }
133 | 
134 | /* 
135 |  * This function writes the value specified by the arg to the counter
136 |  * indicated by counter 
137 |  */
138 | 
139 | static void __set_counter(int counter_id, unsigned long val) 
140 | {
141 |     int selectionRegister = PERFEVENTSEL0 + counter_id;
142 |     __pmc_clear(counter_id);
143 | 
144 |     /* set the value */
145 | 
146 |     __asm__ __volatile__("mov %0, %%ecx\n\t" /* ecx contains the number of the MSR to set */
147 |             "xor %%edx, %%edx\n\t"/* edx contains the high bits to set the MSR to */
148 |             "mov %1, %%eax\n\t" /* eax contains the low bits to set the MSR to */
149 |             "wrmsr\n\t"
150 |             : /* no outputs */
151 |             : "m" (selectionRegister), "m" (val)
152 |             : "eax", "ecx", "edx" /* clobbered */);
153 | }
154 | 
155 | void set_counter(void* arg)
156 | {
157 |     struct counter_s* counter = (struct counter_s*) arg;
158 | 
159 |     __set_counter(counter->counter_id, counter->val);
160 | }
161 | 
162 | void set_counter_all_cpu(int counter_id, unsigned long arg)
163 | {
164 |     struct counter_s counter = { counter_id, arg};
165 | 
166 |     set_counter((void*) &counter);    
167 |     smp_call_function(set_counter, (void*) &counter, 1);
168 | }
169 | 
170 | static long pmc_ioctl_setcounter(struct file* f, unsigned int cmd, unsigned long arg)
171 | {
172 |     ioctl_query_setcounter_t q;
173 | 
174 |     if (copy_from_user(&q, (ioctl_query_setcounter_t*) arg, sizeof(ioctl_query_setcounter_t))) {
175 |         return -EFAULT;
176 |     }
177 | 
178 | 	if ((q.counter_id < 0) || (q.counter_id > 3)) {
179 | 		printk(KERN_INFO "%s: set_counter illegal value 0x%x for counter\n", module_name, q.counter_id);
180 |         return -ENXIO;
181 |     }
182 |     /* disable counter */
183 |     set_counter_all_cpu(q.counter_id, 0);
184 |     pmc_clear_all_cpu(q.counter_id);
185 | 	/* set counter */
186 | 	set_counter_all_cpu(q.counter_id, q.event_id);
187 |     printk(KERN_INFO "%s: setcounter counter_id: 0x%x event_id=0x%x\n", module_name, q.counter_id, q.event_id); 
188 |     return 0;
189 | }
190 | 
191 | static long pmc_ioctl_setpci(struct file* f, unsigned int cmd, unsigned long arg)
192 | {
193 |     ioctl_query_setgetpci_t q;
194 |     struct pci_bus *bus = NULL;
195 | 
196 |     if (copy_from_user(&q, (ioctl_query_setgetpci_t*) arg, sizeof(ioctl_query_setgetpci_t))) {
197 |         return -EFAULT;
198 |     }
199 | 
200 |     while ((bus = pci_find_next_bus(bus))) {
201 |         if (q.bus_id == bus->number) {
202 |             pci_bus_write_config_word(bus, PCI_DEVFN(q.device_id, q.function_id), q.offset, (u16) q.val);
203 |             printk(KERN_INFO "%s: setpci bus_id=0x%x device_id=0x%x, function_id=0x%x, val=0x%x\n",
204 |                     module_name, q.bus_id, q.device_id, q.function_id, q.val);
205 |             return 0;
206 |         }
207 |     }
208 |     return -ENXIO;
209 | }
210 | 
211 | static long pmc_ioctl_getpci(struct file* f, unsigned int cmd, unsigned long arg)
212 | {
213 |     ioctl_query_setgetpci_t q;
214 |     struct pci_bus *bus = NULL;
215 | 
216 |     if (copy_from_user(&q, (ioctl_query_setgetpci_t*) arg, sizeof(ioctl_query_setgetpci_t))) {
217 |         return -EFAULT;
218 |     }
219 | 
220 |     while ((bus = pci_find_next_bus(bus))) {
221 |         if (q.bus_id == bus->number) {
222 |             unsigned int val = 0;
223 |             pci_bus_read_config_word(bus, PCI_DEVFN(q.device_id, q.function_id), q.offset, (u16*) &val);
224 |             printk(KERN_INFO "%s: getpci bus_id 0x%x device_id 0x%x, function_id 0x%x, offset 0x%x, val 0x%x\n",
225 |                     module_name, q.bus_id, q.device_id, q.function_id, q.offset, val);
226 |             q.val = val;
227 |             if (copy_to_user((ioctl_query_setgetpci_t*) arg, &q, sizeof(ioctl_query_setgetpci_t))) {
228 |                 return -EFAULT;
229 |             }
230 |             return 0;
231 |         }
232 |     }
233 |     return -ENXIO;
234 | }
235 | 
236 | static long pmc_ioctl(struct file *f, unsigned int cmd, unsigned long arg) 
237 | {
238 |     int ret = -1;
239 | 
240 | 	printk(KERN_INFO "%s: ioctl command: 0x%x\n", module_name, cmd);
241 | 	switch (cmd) {
242 | 		case IOCTL_SETCOUNTER:
243 |             ret = pmc_ioctl_setcounter(f, cmd, arg);
244 |             break;
245 |         case IOCTL_SETPCI:
246 |             ret = pmc_ioctl_setpci(f, cmd, arg);
247 |             break;
248 |         case IOCTL_GETPCI:
249 |             ret = pmc_ioctl_getpci(f, cmd, arg);
250 |             break;
251 | 		default:
252 | 			printk(KERN_INFO "%s: ioctl illegal command: 0x%x\n", module_name, cmd);
253 | 			break;
254 | 	}
255 | 	return ret;
256 | }
257 | 
258 | 
259 | /* Declaration of the init and exit functions */
260 | module_init(pmc_init_module);
261 | module_exit(pmc_exit_module);
262 | 
263 | MODULE_LICENSE("GPL");
264 | MODULE_AUTHOR("HPLabs");
265 | 


--------------------------------------------------------------------------------
/src/lib/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | project(nvmemul)
 2 | 
 3 | option(STATISTICS "Enable statistics report" ON)
 4 | 
 5 | if(STATISTICS)
 6 |   message(STATUS "WITH STATISTICS")
 7 |   add_definitions(-DUSE_STATISTICS)
 8 | else()
 9 |   message(STATUS "WITHOUT STATISTICS")
10 | endif()
11 | 
12 | set(nvmemul_src
13 |     config.c
14 |     debug.c
15 |     dev.c
16 |     init.c
17 |     interpose.c
18 |     measure_bw.c
19 |     measure_lat.c
20 |     misc.c
21 |     monotonic_timer.c
22 |     model_bw.c
23 |     model_lat.c
24 |     pflush.c
25 |     pmalloc.c
26 |     stat.c
27 |     thread.c
28 |     topology.c
29 |     process_rank.c
30 | )
31 | 
32 | include_directories(${CMAKE_SOURCE_DIR}/third_party)
33 | include_directories(${CMAKE_SOURCE_DIR}/src)
34 | include_directories(${CMAKE_SOURCE_DIR}/src/lib)
35 | add_definitions(-g)
36 | add_definitions(-O2)
37 | add_definitions(-fPIC)
38 | add_definitions(-Wall)
39 | add_definitions(-march=native)
40 | add_definitions(-fopenmp)
41 | add_definitions(-std=gnu89)
42 | #add_definitions(-DNDEBUG)
43 | #add_definitions(-std=c99)
44 | add_definitions(-msse4)
45 | add_subdirectory(cpu)
46 | add_library(nvmemul SHARED ${nvmemul_src} $<TARGET_OBJECTS:cpu>)
47 | target_link_libraries(nvmemul dl)
48 | target_link_libraries(nvmemul config)
49 | target_link_libraries(nvmemul numa)
50 | target_link_libraries(nvmemul rt)
51 | target_link_libraries(nvmemul m)
52 | target_link_libraries(nvmemul gomp)
53 | 


--------------------------------------------------------------------------------
/src/lib/config.c:
--------------------------------------------------------------------------------
  1 | /***************************************************************************
  2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
  3 | This program is free software; you can redistribute it and/or modify
  4 | it under the terms of the GNU General Public License as published by
  5 | the Free Software Foundation; either version 2 of the License, or (at
  6 | your option) any later version. This program is distributed in the
  7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
  8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  9 | PURPOSE. See the GNU General Public License for more details. You
 10 | should have received a copy of the GNU General Public License along
 11 | with this program; if not, write to the Free Software Foundation,
 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 13 | ***************************************************************************/
 14 | #include "config.h"
 15 | #include <libconfig.h>
 16 | #include <string.h>
 17 | #include <stdlib.h>
 18 | #include <stdarg.h>
 19 | #include <ctype.h>
 20 | 
 21 | #define ENVVAR_MAX_LEN 128
 22 | 
 23 | static char* __getenv(const char* prefix, const char* name)
 24 | {
 25 | 	char normalized_name[ENVVAR_MAX_LEN];
 26 | 
 27 | 	if ((strlen(name) + strlen(prefix) + 1) > ENVVAR_MAX_LEN) {
 28 | 		return NULL;
 29 | 	}
 30 | 	
 31 |     strcpy(normalized_name, prefix);
 32 |     strcat(normalized_name, "_");
 33 |     strcat(normalized_name, name);
 34 | 
 35 |     return getenv(normalized_name);
 36 | }
 37 | 
 38 | static inline int 
 39 | env_setting_lookup(const char *name, char **value_str)
 40 | {
 41 | 	char *val;
 42 | 	char normalized_name[ENVVAR_MAX_LEN];
 43 | 	int  i;
 44 | 
 45 | 	if ((strlen(name)) > ENVVAR_MAX_LEN) {
 46 | 		return CONFIG_FALSE;
 47 | 	}
 48 | 	
 49 | 	for (i=0; name[i]; i++) {
 50 | 		if (name[i] == '.') {
 51 | 			normalized_name[i] = '_';
 52 | 		} else {
 53 | 			normalized_name[i] = toupper(name[i]);
 54 | 		}
 55 | 	}
 56 | 	normalized_name[i] = '\0';
 57 | 	
 58 | 	val = __getenv(ENVVAR_PREFIX, normalized_name);
 59 | 	if (val) {
 60 | 		*value_str = val;
 61 | 		return CONFIG_TRUE;
 62 | 	} else {
 63 | 		return CONFIG_FALSE;
 64 | 	}
 65 | }
 66 | 
 67 | 
 68 | static inline int
 69 | env_setting_lookup_int(const char *name, int *value)
 70 | {
 71 | 	char *value_str;
 72 | 
 73 | 	if (env_setting_lookup(name, &value_str) == CONFIG_FALSE) {
 74 | 		return CONFIG_FALSE;
 75 | 	}
 76 | 
 77 | 	if (value_str) {
 78 | 		*value = atoi(value_str);
 79 | 		return CONFIG_TRUE;
 80 | 	} else {
 81 | 		return CONFIG_FALSE;
 82 | 	}
 83 | }
 84 | 
 85 | 
 86 | static inline int
 87 | env_setting_lookup_bool(const char *name, int *value)
 88 | {
 89 | 	return env_setting_lookup_int(name, value);
 90 | }
 91 | 
 92 | 
 93 | static inline int 
 94 | env_setting_lookup_string(const char *name, char **value)
 95 | {
 96 | 	return env_setting_lookup(name, value);
 97 | }
 98 | 
 99 | 
100 | int
101 | __cconfig_lookup_bool(config_t *cfg, const char *name, int *value) 
102 | {
103 | 	int val;
104 | 	int found_val = 0;
105 | 
106 | 	if (env_setting_lookup_bool(name, &val) == CONFIG_TRUE) {
107 | 		found_val = 1;
108 | 	} else {
109 | 	    if (config_lookup_bool(cfg, name, &val) == CONFIG_TRUE) {
110 | 			found_val = 1;
111 | 		}
112 | 	}
113 | 
114 | 	if (found_val)	{
115 | 		*value = val;
116 | 		return CONFIG_TRUE;
117 | 	}
118 | 	return CONFIG_FALSE;
119 | }
120 | 
121 | 
122 | int
123 | __cconfig_lookup_valid_bool(config_t *cfg, 
124 |                      const char *name, 
125 |                      int *value, 
126 |                      int validity_check, ...)
127 | {
128 | 	return __cconfig_lookup_bool(cfg, name, value);
129 | }
130 | 
131 | 
132 | int
133 | __cconfig_lookup_int(config_t *cfg, const char *name, int *value)
134 | {
135 | 	int val;
136 | 	int found_val = 0;
137 | 
138 | 	if (env_setting_lookup_int(name, &val) == CONFIG_TRUE) {
139 | 		found_val = 1;
140 | 	} else {
141 | 		// third parameter changed from libconfig 1.3 to 1.4, it was 'long' and now it is 'int'
142 | 	    if (config_lookup_int(cfg, name, &val) == CONFIG_TRUE) {
143 | 			found_val = 1;
144 | 		}
145 | 	}
146 | 
147 | 	if (found_val)	{
148 | 		*value = val;
149 | 		return CONFIG_TRUE;
150 | 	}
151 | 	return CONFIG_FALSE;
152 | }
153 | 
154 | 
155 | int
156 | __cconfig_lookup_valid_int(config_t *cfg, 
157 |                            const char *name, 
158 |                            int *value, 
159 |                            int validity_check, ...)
160 | {
161 | 	int              min;
162 | 	int              max;
163 | 	int              list_length;
164 | 	int              i;
165 | 	int              val;
166 | 	int              listval;
167 | 	va_list          ap;
168 | 
169 | 	if (__cconfig_lookup_int(cfg, name, &val) == CONFIG_TRUE) {
170 | 		switch (validity_check) {
171 | 			case CONFIG_NO_CHECK:
172 | 				*value = val;
173 | 				return CONFIG_TRUE;
174 | 			case CONFIG_RANGE_CHECK:
175 | 				va_start(ap, validity_check);
176 | 				min = va_arg(ap, int);
177 | 				max = va_arg(ap, int);
178 | 				va_end(ap);
179 | 				if (*value >= min && *value <= max) {
180 | 					*value = val;
181 | 					return CONFIG_TRUE;
182 | 				}
183 | 				break;
184 | 			case CONFIG_LIST_CHECK:
185 | 				va_start(ap, validity_check);
186 | 				list_length = va_arg(ap, int);
187 | 				for (i=0; i<list_length; i++) {
188 | 					listval = va_arg(ap, int);
189 | 					if (val == listval) {
190 | 						*value = val;
191 | 						return CONFIG_TRUE;
192 | 					}
193 | 				}
194 | 				va_end(ap);
195 | 				break;
196 | 		}
197 | 	}
198 | 	return CONFIG_FALSE;
199 | }
200 | 
201 | 
202 | int
203 | __cconfig_lookup_string(config_t *cfg, const char *name, char **value)
204 | {
205 | 	char *val;
206 | 	int  found_val = 0;
207 | 
208 | 	if (env_setting_lookup_string(name, &val) == CONFIG_TRUE) {
209 | 		found_val = 1;
210 | 	} else {	
211 | 	    if (config_lookup_string(cfg, name, (const char**) &val) == CONFIG_TRUE) {
212 | 			found_val = 1;
213 | 		}
214 | 	}
215 | 
216 | 	if (found_val)	{
217 | 		*value = val;
218 | 		return CONFIG_TRUE;
219 | 	}
220 | 	return CONFIG_FALSE;
221 | }
222 | 
223 | 
224 | int
225 | __cconfig_lookup_valid_string(config_t *cfg, 
226 |                               const char *name, 
227 |                               char **value, 
228 |                               int validity_check, ...)
229 | {
230 | 	int       list_length;
231 | 	int       i;
232 | 	char      *val;
233 | 	va_list   ap;
234 | 
235 | 	if (__cconfig_lookup_string(cfg, name, &val) == CONFIG_TRUE) {
236 | 		switch (validity_check) {
237 | 			case CONFIG_NO_CHECK:
238 | 				*value = val;
239 | 				return CONFIG_TRUE;
240 | 			case CONFIG_RANGE_CHECK:
241 | 				break;
242 | 			case CONFIG_LIST_CHECK:
243 | 				va_start(ap, validity_check);
244 | 				list_length = va_arg(ap, int);
245 | 				for (i=0; i<list_length; i++) {
246 | 					if (strcmp(val, va_arg(ap, char *))==0) {
247 | 						*value = val;
248 | 						return CONFIG_TRUE;
249 | 					}
250 | 				}
251 | 				va_end(ap);
252 | 				break;
253 | 		}
254 | 	}
255 | 	return CONFIG_FALSE;
256 | }
257 | 
258 | 
259 | int 
260 | __cconfig_init(config_t *cfg, const char *config_file)
261 | {
262 |     int ret;
263 | 	char* env_config_file;
264 | 
265 | 	if ((env_config_file = __getenv(ENVVAR_PREFIX, "INI"))) {
266 | 		config_file = env_config_file;
267 | 	}
268 | 	
269 | 	config_init(cfg);
270 | 	if ((ret = config_read_file(cfg, config_file)) == CONFIG_FALSE) {
271 |         fprintf(stderr, "ERROR: nvmemul: Configuration file %s not found.\n", config_file);
272 |     }
273 |     return ret;
274 | }
275 | 


--------------------------------------------------------------------------------
/src/lib/config.h:
--------------------------------------------------------------------------------
 1 | /***************************************************************************
 2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
 3 | This program is free software; you can redistribute it and/or modify
 4 | it under the terms of the GNU General Public License as published by
 5 | the Free Software Foundation; either version 2 of the License, or (at
 6 | your option) any later version. This program is distributed in the
 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | PURPOSE. See the GNU General Public License for more details. You
10 | should have received a copy of the GNU General Public License along
11 | with this program; if not, write to the Free Software Foundation,
12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
13 | ***************************************************************************/
14 | #ifndef __CONFIG_H
15 | #define __CONFIG_H
16 | 
17 | /**
18 |  * \file 
19 |  * 
20 |  * Runtime configuration parameters
21 |  */
22 | 
23 | 
24 | #include <stdio.h>
25 | #include <libconfig.h>
26 | 
27 | #define ENVVAR_PREFIX "NVMEMUL"
28 | 
29 | #ifdef __cplusplus
30 | extern "C" {
31 | #endif
32 | 
33 | /* Make sure we don't redefine a macro already defined in libconfig.h */
34 | 
35 | #ifdef CONFIG_NO_CHECK
36 | # error "ERROR: Redefining previously defined CONFIG_NO_CHECK"
37 | #else
38 | # define CONFIG_NO_CHECK    0
39 | #endif
40 | 
41 | #ifdef CONFIG_RANGE_CHECK
42 | # error "ERROR: Redefining previously defined CONFIG_RANGE_CHECK"
43 | #else
44 | # define CONFIG_RANGE_CHECK 1
45 | #endif
46 | 
47 | #ifdef CONFIG_LIST_CHECK
48 | # error "ERROR: Redefining previously defined CONFIG_LIST_CHECK"
49 | #else
50 | # define CONFIG_LIST_CHECK  2
51 | #endif
52 | 
53 | 
54 | 
55 | /** 
56 |  * The lookup functions return the value of a configuration variable based on 
57 |  * the following order: 
58 |  *  1) value of environment variable
59 |  *  2) value in configuration file variable
60 |  *  
61 |  * If the variable is not found then a lookup function does not set the value.
62 |  */
63 | 
64 | int __cconfig_lookup_bool(config_t *cfg, const char *name, int *value);
65 | int __cconfig_lookup_int(config_t *cfg, const char *name, int *value);
66 | int __cconfig_lookup_string(config_t *cfg, const char *name, char **value);
67 | int __cconfig_lookup_valid_bool(config_t *cfg, const char *name, int *value, int validity_check, ...);
68 | int __cconfig_lookup_valid_int(config_t *cfg, const char *name, int *value, int validity_check, ...);
69 | int __cconfig_lookup_valid_string(config_t *cfg, const char *name, char **value, int validity_check, ...);
70 | int __cconfig_init(config_t *cfg, const char *config_file);
71 | 
72 | #ifdef __cplusplus
73 | }
74 | #endif
75 | 
76 | #endif /* __CONFIG_H */
77 | 


--------------------------------------------------------------------------------
/src/lib/cpu/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | set(nvmemul_cpu_src
2 |     cpu.c
3 |     pmc.c
4 | )
5 | 
6 | add_library(cpu OBJECT ${nvmemul_cpu_src})
7 | 


--------------------------------------------------------------------------------
/src/lib/cpu/cpu.c:
--------------------------------------------------------------------------------
  1 | /***************************************************************************
  2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
  3 | This program is free software; you can redistribute it and/or modify
  4 | it under the terms of the GNU General Public License as published by
  5 | the Free Software Foundation; either version 2 of the License, or (at
  6 | your option) any later version. This program is distributed in the
  7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
  8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  9 | PURPOSE. See the GNU General Public License for more details. You
 10 | should have received a copy of the GNU General Public License along
 11 | with this program; if not, write to the Free Software Foundation,
 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 13 | ***************************************************************************/
 14 | #include <stdio.h>
 15 | #include <stdlib.h>
 16 | #include <regex.h>
 17 | #include <string.h>
 18 | #include "cpu.h"
 19 | #include "dev.h"
 20 | #include "error.h"
 21 | #include "misc.h"
 22 | #include "known_cpus.h"
 23 | #include "xeon-ex.h"
 24 | #include <cpuid.h>
 25 | 
 26 | // Mainline architectures and processors available here:
 27 | // https://software.intel.com/en-us/articles/intel-architecture-and-processor-identification-with-cpuid-model-and-family-numbers
 28 | //
 29 | // It turns out that CPUID is not an accurate approach to identifying a
 30 | // processor as different processors may have the same CPUID.
 31 | // So instead we rely on the brand string returned by /proc/cpuinfo:model_name
 32 | 
 33 | #define MASK(msb, lsb) (~((~0) << (msb + 1)) & ((~0) << lsb))
 34 | #define EXTRACT(val, msb, lsb) ((MASK(msb, lsb) & val) >> lsb)
 35 | #define MODEL(eax) EXTRACT(eax, 7, 4)
 36 | #define EXTENDED_MODEL(eax) EXTRACT(eax, 19, 16)
 37 | #define MODEL_NUMBER(eax) ((EXTENDED_MODEL(eax) << 4) | MODEL(eax))
 38 | #define FAMILY(eax) EXTRACT(eax, 11, 8)
 39 | #define Extended_Family(eax) EXTRACT(eax, 27, 20)
 40 | #define Family_Number(eax) (FAMILY(eax) + Extended_Family(eax))
 41 | 
 42 | void cpuid(unsigned int info, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
 43 | {
 44 |     __asm__(
 45 |         "cpuid;"
 46 |         : "=a"(*eax), "=b"(*ebx), "=c"(*ecx), "=d"(*edx)
 47 |         : "a"(info));
 48 | }
 49 | 
 50 | void get_family_model(int *family, int *model)
 51 | {
 52 |     unsigned int eax, ebx, ecx, edx;
 53 |     int success = __get_cpuid(1, &eax, &ebx, &ecx, &edx);
 54 |     if (family != NULL)
 55 |     {
 56 |         *family = success ? Family_Number(eax) : 0;
 57 |     }
 58 | 
 59 |     if (model != NULL)
 60 |     {
 61 |         *model = success ? MODEL_NUMBER(eax) : 0;
 62 |     }
 63 | }
 64 | 
 65 | // caller is responsible for freeing memory allocated by this function
 66 | char *cpuinfo(char *valname)
 67 | {
 68 |     FILE *fp;
 69 |     char *line = NULL;
 70 |     size_t len = 0;
 71 |     ssize_t read;
 72 | 
 73 |     fp = fopen("/proc/cpuinfo", "r");
 74 |     if (fp == NULL)
 75 |     {
 76 |         return NULL;
 77 |     }
 78 | 
 79 |     while ((read = getline(&line, &len, fp)) != -1)
 80 |     {
 81 |         if (strstr(line, valname))
 82 |         {
 83 |             char *colon = strchr(line, ':');
 84 |             int len = colon - line;
 85 |             char *buf = malloc(strlen(line) - len);
 86 |             strcpy(buf, &line[len + 2]);
 87 |             free(line);
 88 |             fclose(fp);
 89 |             return buf;
 90 |         }
 91 |     }
 92 | 
 93 |     free(line);
 94 |     fclose(fp);
 95 |     return NULL;
 96 | }
 97 | 
 98 | // reads current cpu frequency through the /proc/cpuinfo file
 99 | // avoid calling this function often
100 | int cpu_speed_mhz()
101 | {
102 |     size_t val;
103 |     char *str = cpuinfo("cpu MHz");
104 |     val = string_to_size(str);
105 |     free(str);
106 |     return val;
107 | }
108 | 
109 | // reads cpu LLC cache size through the /proc/cpuinfo file
110 | // avoid calling this function often
111 | size_t cpu_llc_size_bytes()
112 | {
113 |     size_t val;
114 |     char *str = cpuinfo("cache size");
115 |     val = string_to_size(str);
116 |     free(str);
117 |     return val;
118 | }
119 | 
120 | // caller is responsible for freeing memory allocated by this function
121 | char *cpu_model_name()
122 | {
123 |     return cpuinfo("model name");
124 | }
125 | 
126 | int match(const char *to_match, const char *regex_text)
127 | {
128 |     int ret;
129 |     const char *p = to_match;
130 |     regex_t regex;
131 |     regmatch_t m[1];
132 | 
133 |     if ((ret = regcomp(&regex, regex_text, REG_EXTENDED | REG_NEWLINE)) != 0)
134 |     {
135 |         return E_ERROR;
136 |     }
137 |     if ((ret = regexec(&regex, p, 1, m, 0)))
138 |     {
139 |         regfree(&regex);
140 |         return E_ERROR; // no match
141 |     }
142 |     regfree(&regex);
143 |     return E_SUCCESS;
144 | }
145 | 
146 | int is_Xeon()
147 | {
148 |     char *model_name;
149 |     if ((model_name = cpu_model_name()) == NULL)
150 |     {
151 |         return 0;
152 |     }
153 | 
154 |     if (match(model_name, "Xeon") == E_SUCCESS)
155 |     {
156 |         free(model_name);
157 |         return 1;
158 |     }
159 |     else
160 |     {
161 |         free(model_name);
162 |         return 0;
163 |     }
164 | }
165 | 
166 | int is_Intel()
167 | {
168 |     char *model_name;
169 |     if ((model_name = cpu_model_name()) == NULL)
170 |     {
171 |         return 0;
172 |     }
173 | 
174 |     if (match(model_name, "Intel") == E_SUCCESS)
175 |     {
176 |         free(model_name);
177 |         return 1;
178 |     }
179 |     else
180 |     {
181 |         free(model_name);
182 |         return 0;
183 |     }
184 | }
185 | 
186 | cpu_model_t *cpu_model()
187 | {
188 |     int i, family, model;
189 |     cpu_model_t *cpu_model = NULL;
190 | 
191 |     if (!is_Intel())
192 |         return NULL;
193 | 
194 |     get_family_model(&family, &model);
195 | 
196 |     int isXeon = is_Xeon();
197 | 
198 |     for (i = 0; known_cpus[i].microarch != Invalid; i++)
199 |     {
200 |         microarch_ID_t c = known_cpus[i];
201 | 
202 |         if (c.family == family && c.model == model)
203 |         {
204 |             switch (c.microarch)
205 |             {
206 |             case SandyBridge:
207 |                 cpu_model = &cpu_model_intel_xeon_ex;
208 |                 break;
209 |             case IvyBridge:
210 |                 cpu_model = &cpu_model_intel_xeon_ex_v2;
211 |                 break;
212 |             case Haswell:
213 |                 cpu_model = &cpu_model_intel_xeon_ex_v3;
214 |                 break;
215 |             default:
216 |                 return NULL;
217 |             }
218 | 
219 |             if (!isXeon)
220 |                 cpu_model->microarch = (microarch_t)(cpu_model->microarch - 1);
221 | 
222 |             DBG_LOG(INFO, "Detected CPU model '%s'\n", microarch_strings[cpu_model->microarch]);
223 |             break;
224 |         }
225 |     }
226 | 
227 |     if (!cpu_model)
228 |     {
229 |         return NULL;
230 |     }
231 | 
232 |     // complete the model with some runtime information
233 |     cpu_model->llc_size_bytes = cpu_llc_size_bytes();
234 |     //    cpu_model->speed_mhz = cpu_speed_mhz();
235 | 
236 |     return cpu_model;
237 | }
238 | 


--------------------------------------------------------------------------------
/src/lib/cpu/cpu.h:
--------------------------------------------------------------------------------
 1 | /***************************************************************************
 2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
 3 | This program is free software; you can redistribute it and/or modify
 4 | it under the terms of the GNU General Public License as published by
 5 | the Free Software Foundation; either version 2 of the License, or (at
 6 | your option) any later version. This program is distributed in the
 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | PURPOSE. See the GNU General Public License for more details. You
10 | should have received a copy of the GNU General Public License along
11 | with this program; if not, write to the Free Software Foundation,
12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
13 | ***************************************************************************/
14 | #ifndef __CPU_H
15 | #define __CPU_H
16 | 
17 | #include <stddef.h>
18 | #include <stdint.h>
19 | #include "dev.h"
20 | 
21 | #define MAX_THROTTLE_VALUE 1023
22 | 
23 | int set_throttle_register(int node, uint64_t val);
24 | size_t cpu_llc_size_bytes();
25 | 
26 | struct pmc_set_s;
27 | 
28 | typedef enum {
29 |     THROTTLE_DDR_ACT = 0,
30 |     THROTTLE_DDR_READ,
31 |     THROTTLE_DDR_WRITE
32 | } throttle_type_t;
33 | 
34 | // order matters. see cpu_model()
35 | typedef enum {
36 |     Invalid,
37 |     SandyBridge,
38 |     SandyBridgeXeon,
39 |     IvyBridge,
40 |     IvyBridgeXeon,
41 |     Haswell,
42 |     HaswellXeon
43 | } microarch_t;
44 | 
45 | typedef struct
46 | {
47 |     int family;
48 |     int model;
49 |     microarch_t microarch;
50 | } microarch_ID_t;
51 | 
52 | /**
53 |  *  CPU object that encapsulates processor-specific methods for accessing
54 |  *  performance counters and memory controller PCI registers
55 |  */
56 | typedef struct cpu_model_s {
57 |     microarch_t microarch; // processor description
58 |     size_t llc_size_bytes; // last level cache size
59 | //    int speed_mhz; // cpu clock frequency
60 |     struct pmc_events_s* pmc_events; // performance monitoring events supported by the processor
61 |     int (*set_throttle_register)(pci_regs_t *regs, throttle_type_t throttle_type, uint16_t val);
62 |     int (*get_throttle_register)(pci_regs_t *regs, throttle_type_t throttle_type, uint16_t* val);
63 | } cpu_model_t;
64 | 
65 | cpu_model_t* cpu_model();
66 | int cpu_speed_mhz();
67 | 
68 | #endif /* __CPU_H */
69 | 


--------------------------------------------------------------------------------
/src/lib/cpu/haswell-papi.h:
--------------------------------------------------------------------------------
 1 | /***************************************************************************
 2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
 3 | This program is free software; you can redistribute it and/or modify
 4 | it under the terms of the GNU General Public License as published by
 5 | the Free Software Foundation; either version 2 of the License, or (at
 6 | your option) any later version. This program is distributed in the
 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | PURPOSE. See the GNU General Public License for more details. You
10 | should have received a copy of the GNU General Public License along
11 | with this program; if not, write to the Free Software Foundation,
12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
13 | ***************************************************************************/
14 | #ifndef __CPU_HASWELL_H
15 | #define __CPU_HASWELL_H
16 | 
17 | #include <papi.h>
18 | #include "debug.h"
19 | 
20 | // Perfmon2 is a library that provides a generic interface to access the PMU. It also comes with
21 | // applications to list all available performance events with their architecture specific
22 | // detailed description and translate them to their respective event code. 'showevtinfo' application can
23 | // be used to list all available performance event names with detailed description and 'check_events' application
24 | // can be used to translate the performance event to the corresponding event code.  
25 | 
26 | // These events will be initialized and started.
27 | // Every event reading will return an array with the values for all these events.
28 | // The array index is the same index used to define the event in the *_native_events array below
29 | const char *haswell_native_events[MAX_NUM_EVENTS] = {
30 |     "CYCLE_ACTIVITY:STALLS_L2_PENDING",
31 |     "MEM_LOAD_UOPS_L3_HIT_RETIRED:XSNP_NONE",
32 |     "MEM_LOAD_UOPS_L3_MISS_RETIRED:REMOTE_DRAM",
33 |     "MEM_LOAD_UOPS_L3_MISS_RETIRED:LOCAL_DRAM"
34 | };
35 | 
36 | uint64_t haswell_read_stall_events_local() {
37 |     long long values[MAX_NUM_EVENTS];
38 |     uint64_t events = 0;
39 | 
40 |     if (pmc_events_read_local_thread(values) == PAPI_OK) {
41 | 		uint64_t l2_pending = values[0];
42 | 		uint64_t llc_hit  = values[1];
43 | 		uint64_t remote_dram = values[2];
44 | 		uint64_t local_dram  = values[3];
45 | 
46 | 		DBG_LOG(DEBUG, "read stall L2 cycles %lu; llc_hit %lu; remote_dram %lu; local_dram %lu\n",
47 | 			l2_pending, llc_hit, remote_dram, local_dram);
48 | 
49 | 		double num = remote_dram + local_dram;
50 | 		double den = num + llc_hit;
51 | 		if (den == 0) return 0;
52 | 
53 | 		events = (uint64_t)((double)l2_pending * ((double)num / den));
54 |     } else {
55 |         DBG_LOG(ERROR, "read stall cycles failed\n");
56 |     }
57 | 
58 |     return events;
59 | }
60 | 
61 | uint64_t haswell_read_stall_events_remote() {
62 |     long long values[MAX_NUM_EVENTS];
63 |     uint64_t events = 0;
64 | 
65 |     if (pmc_events_read_local_thread(values) == PAPI_OK) {
66 | 		uint64_t l2_pending = values[0];
67 | 		uint64_t llc_hit  = values[1];
68 | 		uint64_t remote_dram = values[2];
69 | 		uint64_t local_dram  = values[3];
70 | 
71 | 		DBG_LOG(DEBUG, "read stall L2 cycles %lu; llc_hit %lu; remote_dram %lu; local_dram %lu\n",
72 | 			l2_pending, llc_hit, remote_dram, local_dram);
73 | 
74 | 		// calculate stalls based on l2 stalls and LLC miss/hit
75 | 		double num = remote_dram + local_dram;
76 | 		double den = num + llc_hit;
77 | 		if (den == 0) return 0;
78 | 		double stalls = (double)l2_pending * ((double)num / den);
79 | 
80 | 		// calculate remote dram stalls based on total stalls and local/remote dram accesses
81 | 		den = remote_dram + local_dram;
82 | 		if (den == 0) return 0;
83 | 		events = (uint64_t) (stalls * ((double)remote_dram / den));
84 |     } else {
85 |         DBG_LOG(ERROR, "read stall cycles failed\n");
86 |     }
87 | 
88 |     return events;
89 | }
90 | 
91 | #endif /* __CPU_HASWELL_H */
92 | 


--------------------------------------------------------------------------------
/src/lib/cpu/haswell.h:
--------------------------------------------------------------------------------
  1 | /***************************************************************************
  2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
  3 | This program is free software; you can redistribute it and/or modify
  4 | it under the terms of the GNU General Public License as published by
  5 | the Free Software Foundation; either version 2 of the License, or (at
  6 | your option) any later version. This program is distributed in the
  7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
  8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  9 | PURPOSE. See the GNU General Public License for more details. You
 10 | should have received a copy of the GNU General Public License along
 11 | with this program; if not, write to the Free Software Foundation,
 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 13 | ***************************************************************************/
 14 | #ifndef __CPU_HASWELL_H
 15 | #define __CPU_HASWELL_H
 16 | 
 17 | #include <math.h>
 18 | #include "thread.h"
 19 | #include "cpu/pmc.h"
 20 | #include "debug.h"
 21 | 
 22 | // Perfmon2 is a library that provides a generic interface to access the PMU. It also comes with
 23 | // applications to list all available performance events with their architecture specific
 24 | // detailed description and translate them to their respective event code. 'showevtinfo' application can
 25 | // be used to list all available performance event names with detailed description and 'check_events' application
 26 | // can be used to translate the performance event to the corresponding event code.  
 27 | 
 28 | extern __thread int tls_hw_local_latency;
 29 | extern __thread int tls_hw_remote_latency;
 30 | #ifdef MEMLAT_SUPPORT
 31 | extern __thread uint64_t tls_global_remote_dram;
 32 | extern __thread uint64_t tls_global_local_dram;
 33 | #endif
 34 | 
 35 | #undef FOREACH_PMC_HW_EVENT
 36 | #define FOREACH_PMC_HW_EVENT(ACTION)                                                                       \
 37 |   ACTION("CYCLE_ACTIVITY:STALLS_L2_PENDING", NULL, 0x55305a3)                                              \
 38 |   ACTION("MEM_LOAD_UOPS_L3_HIT_RETIRED:XSNP_NONE", NULL, 0x5308d2)                                        \
 39 |   ACTION("MEM_LOAD_UOPS_L3_MISS_RETIRED:REMOTE_DRAM", NULL, 0x530cd3)                                     \
 40 |   ACTION("MEM_LOAD_UOPS_L3_MISS_RETIRED:LOCAL_DRAM", NULL, 0x5303d3)
 41 | 
 42 | #undef FOREACH_PMC_EVENT
 43 | #define FOREACH_PMC_EVENT(ACTION, prefix)                                                                  \
 44 |   ACTION(ldm_stall_cycles, prefix)                                                                         \
 45 |   ACTION(remote_dram, prefix)
 46 | 
 47 | #define L3_FACTOR 7.0
 48 | 
 49 | DECLARE_ENABLE_PMC(haswell, ldm_stall_cycles)
 50 | {
 51 |     ASSIGN_PMC_HW_EVENT_TO_ME("CYCLE_ACTIVITY:STALLS_L2_PENDING", 0);
 52 |     ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_L3_HIT_RETIRED:XSNP_NONE", 1);
 53 |     ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_L3_MISS_RETIRED:REMOTE_DRAM", 2);
 54 |     ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_L3_MISS_RETIRED:LOCAL_DRAM", 3);
 55 | 
 56 |     return E_SUCCESS;
 57 | }
 58 | 
 59 | DECLARE_CLEAR_PMC(haswell, ldm_stall_cycles)
 60 | {
 61 | }
 62 | 
 63 | DECLARE_READ_PMC(haswell, ldm_stall_cycles)
 64 | {
 65 |    uint64_t l2_pending_diff  = READ_MY_HW_EVENT_DIFF(0);
 66 |    uint64_t llc_hit_diff     = READ_MY_HW_EVENT_DIFF(1);
 67 |    uint64_t remote_dram_diff = READ_MY_HW_EVENT_DIFF(2);
 68 |    uint64_t local_dram_diff  = READ_MY_HW_EVENT_DIFF(3);
 69 | 
 70 |    DBG_LOG(DEBUG, "read stall L2 cycles diff %lu; llc_hit %lu; cycles diff remote_dram %lu; local_dram %lu\n",
 71 | 		   l2_pending_diff, llc_hit_diff, remote_dram_diff, local_dram_diff);
 72 | 
 73 |    if ((remote_dram_diff == 0) && (local_dram_diff == 0)) return 0;
 74 | #ifdef MEMLAT_SUPPORT
 75 |    tls_global_local_dram += local_dram_diff;
 76 | #endif
 77 | 
 78 |    // calculate stalls based on L2 stalls and LLC miss/hit
 79 |    double num = L3_FACTOR * (remote_dram_diff + local_dram_diff);
 80 |    double den = num + llc_hit_diff;
 81 |    if (den == 0) return 0;
 82 |    return (uint64_t) ((double)l2_pending_diff * (num / den));
 83 | }
 84 | 
 85 | 
 86 | DECLARE_ENABLE_PMC(haswell, remote_dram)
 87 | {
 88 |     ASSIGN_PMC_HW_EVENT_TO_ME("CYCLE_ACTIVITY:STALLS_L2_PENDING", 0);
 89 |     ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_L3_HIT_RETIRED:XSNP_NONE", 1);
 90 |     ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_L3_MISS_RETIRED:REMOTE_DRAM", 2);
 91 |     ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_L3_MISS_RETIRED:LOCAL_DRAM", 3);
 92 | 
 93 |     return E_SUCCESS;
 94 | }
 95 | 
 96 | DECLARE_CLEAR_PMC(haswell, remote_dram)
 97 | {
 98 | }
 99 | 
100 | DECLARE_READ_PMC(haswell, remote_dram)
101 | {
102 |    uint64_t l2_pending_diff  = READ_MY_HW_EVENT_DIFF(0);
103 |    uint64_t llc_hit_diff     = READ_MY_HW_EVENT_DIFF(1);
104 |    uint64_t remote_dram_diff = READ_MY_HW_EVENT_DIFF(2);
105 |    uint64_t local_dram_diff  = READ_MY_HW_EVENT_DIFF(3);
106 | 
107 |    DBG_LOG(DEBUG, "read stall L2 cycles diff %lu; llc_hit %lu; cycles diff remote_dram %lu; local_dram %lu\n",
108 | 		   l2_pending_diff, llc_hit_diff, remote_dram_diff, local_dram_diff);
109 | 
110 |    if ((remote_dram_diff == 0) && (local_dram_diff == 0)) return 0;
111 | #ifdef MEMLAT_SUPPORT
112 |    tls_global_remote_dram += remote_dram_diff;
113 | #endif
114 | 
115 |    // calculate stalls based on L2 stalls and LLC miss/hit
116 |    double num = L3_FACTOR * (remote_dram_diff + local_dram_diff);
117 |    double den = num + llc_hit_diff;
118 |    if (den == 0) return 0;
119 |    double stalls = (double)l2_pending_diff * (num / den);
120 | 
121 |    // calculate remote dram stalls based on total stalls and local/remote dram accesses
122 |    // also consider the weight of remote memory access against local memory access
123 |    den = (remote_dram_diff * tls_hw_remote_latency) + (local_dram_diff * tls_hw_local_latency);
124 |    if (den == 0) return 0;
125 |    return (uint64_t) (stalls * ((double)(remote_dram_diff * tls_hw_remote_latency) / den));
126 | }
127 | 
128 | 
129 | PMC_EVENTS(haswell, 4)
130 | #endif /* __CPU_HASWELL_H */
131 | 


--------------------------------------------------------------------------------
/src/lib/cpu/ivybridge-papi.h:
--------------------------------------------------------------------------------
 1 | /***************************************************************************
 2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
 3 | This program is free software; you can redistribute it and/or modify
 4 | it under the terms of the GNU General Public License as published by
 5 | the Free Software Foundation; either version 2 of the License, or (at
 6 | your option) any later version. This program is distributed in the
 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | PURPOSE. See the GNU General Public License for more details. You
10 | should have received a copy of the GNU General Public License along
11 | with this program; if not, write to the Free Software Foundation,
12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
13 | ***************************************************************************/
14 | #ifndef __CPU_IVYBRIDGE_H
15 | #define __CPU_IVYBRIDGE_H
16 | 
17 | #include <papi.h>
18 | #include "debug.h"
19 | 
20 | // Perfmon2 is a library that provides a generic interface to access the PMU. It also comes with
21 | // applications to list all available performance events with their architecture specific
22 | // detailed description and translate them to their respective event code. 'showevtinfo' application can
23 | // be used to list all available performance event names with detailed description and 'check_events' application
24 | // can be used to translate the performance event to the corresponding event code.  
25 | 
26 | // These events will be initialized and started.
27 | // Every event reading will return an array with the values for all these events.
28 | // The array index is the same index used to define the event in the *_native_events array below
29 | const char *ivybridge_native_events[MAX_NUM_EVENTS] = {
30 |     "CYCLE_ACTIVITY:STALLS_L2_PENDING",
31 |     "MEM_LOAD_UOPS_LLC_HIT_RETIRED:XSNP_NONE",
32 |     "MEM_LOAD_UOPS_LLC_MISS_RETIRED:REMOTE_DRAM",
33 |     "MEM_LOAD_UOPS_LLC_MISS_RETIRED:LOCAL_DRAM"
34 | };
35 | 
36 | uint64_t ivybridge_read_stall_events_local() {
37 |     long long values[MAX_NUM_EVENTS];
38 |     uint64_t events = 0;
39 | 
40 |     if (pmc_events_read_local_thread(values) == PAPI_OK) {
41 | 		uint64_t l2_pending = values[0];
42 | 		uint64_t llc_hit  = values[1];
43 | 		uint64_t remote_dram = values[2];
44 | 		uint64_t local_dram  = values[3];
45 | 
46 | 		DBG_LOG(DEBUG, "read stall L2 cycles %lu; llc_hit %lu; remote_dram %lu; local_dram %lu\n",
47 | 			l2_pending, llc_hit, remote_dram, local_dram);
48 | 
49 | 		double num = remote_dram + local_dram;
50 | 		double den = num + llc_hit;
51 | 		if (den == 0) return 0;
52 | 
53 | 		events = (uint64_t)((double)l2_pending * ((double)num / den));
54 |     } else {
55 |         DBG_LOG(ERROR, "read stall cycles failed\n");
56 |     }
57 | 
58 |     return events;
59 | }
60 | 
61 | uint64_t ivybridge_read_stall_events_remote() {
62 |     long long values[MAX_NUM_EVENTS];
63 |     uint64_t events = 0;
64 | 
65 |     if (pmc_events_read_local_thread(values) == PAPI_OK) {
66 | 		uint64_t l2_pending = values[0];
67 | 		uint64_t llc_hit  = values[1];
68 | 		uint64_t remote_dram = values[2];
69 | 		uint64_t local_dram  = values[3];
70 | 
71 | 		DBG_LOG(DEBUG, "read stall L2 cycles %lu; llc_hit %lu; remote_dram %lu; local_dram %lu\n",
72 | 			l2_pending, llc_hit, remote_dram, local_dram);
73 | 
74 | 		// calculate stalls based on l2 stalls and LLC miss/hit
75 | 		double num = remote_dram + local_dram;
76 | 		double den = num + llc_hit;
77 | 		if (den == 0) return 0;
78 | 		double stalls = (double)l2_pending * ((double)num / den);
79 | 
80 | 		// calculate remote dram stalls based on total stalls and local/remote dram accesses
81 | 		den = remote_dram + local_dram;
82 | 		if (den == 0) return 0;
83 | 		events = (uint64_t) (stalls * ((double)remote_dram / den));
84 |     } else {
85 |         DBG_LOG(ERROR, "read stall cycles failed\n");
86 |     }
87 | 
88 |     return events;
89 | }
90 | 
91 | #endif /* __CPU_IVYBRIDGE_H */
92 | 


--------------------------------------------------------------------------------
/src/lib/cpu/ivybridge.h:
--------------------------------------------------------------------------------
  1 | /***************************************************************************
  2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
  3 | This program is free software; you can redistribute it and/or modify
  4 | it under the terms of the GNU General Public License as published by
  5 | the Free Software Foundation; either version 2 of the License, or (at
  6 | your option) any later version. This program is distributed in the
  7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
  8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  9 | PURPOSE. See the GNU General Public License for more details. You
 10 | should have received a copy of the GNU General Public License along
 11 | with this program; if not, write to the Free Software Foundation,
 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 13 | ***************************************************************************/
 14 | #ifndef __CPU_IVYBRIDGE_H
 15 | #define __CPU_IVYBRIDGE_H
 16 | 
 17 | #include <math.h>
 18 | #include "thread.h"
 19 | #include "cpu/pmc.h"
 20 | #include "debug.h"
 21 | 
 22 | // Perfmon2 is a library that provides a generic interface to access the PMU. It also comes with
 23 | // applications to list all available performance events with their architecture specific
 24 | // detailed description and translate them to their respective event code. 'showevtinfo' application can
 25 | // be used to list all available performance event names with detailed description and 'check_events' application
 26 | // can be used to translate the performance event to the corresponding event code.  
 27 | 
 28 | extern __thread int tls_hw_local_latency;
 29 | extern __thread int tls_hw_remote_latency;
 30 | #ifdef MEMLAT_SUPPORT
 31 | extern __thread uint64_t tls_global_remote_dram;
 32 | extern __thread uint64_t tls_global_local_dram;
 33 | #endif
 34 | 
 35 | #undef FOREACH_PMC_HW_EVENT
 36 | #define FOREACH_PMC_HW_EVENT(ACTION)                                                                       \
 37 |   ACTION("CYCLE_ACTIVITY:STALLS_L2_PENDING", NULL, 0x55305a3)                                              \
 38 |   ACTION("MEM_LOAD_UOPS_LLC_HIT_RETIRED:XSNP_NONE", NULL, 0x5308d2)                                        \
 39 |   ACTION("MEM_LOAD_UOPS_LLC_MISS_RETIRED:REMOTE_DRAM", NULL, 0x530cd3)                                     \
 40 |   ACTION("MEM_LOAD_UOPS_LLC_MISS_RETIRED:LOCAL_DRAM", NULL, 0x5303d3)
 41 | 
 42 | #undef FOREACH_PMC_EVENT
 43 | #define FOREACH_PMC_EVENT(ACTION, prefix)                                                                  \
 44 |   ACTION(ldm_stall_cycles, prefix)                                                                         \
 45 |   ACTION(remote_dram, prefix)
 46 | 
 47 | 
 48 | #define L3_FACTOR 7.0
 49 | 
 50 | DECLARE_ENABLE_PMC(ivybridge, ldm_stall_cycles)
 51 | {
 52 |     ASSIGN_PMC_HW_EVENT_TO_ME("CYCLE_ACTIVITY:STALLS_L2_PENDING", 0);
 53 |     ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_LLC_HIT_RETIRED:XSNP_NONE", 1);
 54 |     ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_LLC_MISS_RETIRED:REMOTE_DRAM", 2);
 55 |     ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_LLC_MISS_RETIRED:LOCAL_DRAM", 3);
 56 | 
 57 |     return E_SUCCESS;
 58 | }
 59 | 
 60 | DECLARE_CLEAR_PMC(ivybridge, ldm_stall_cycles)
 61 | {
 62 | }
 63 | 
 64 | DECLARE_READ_PMC(ivybridge, ldm_stall_cycles)
 65 | {
 66 |    uint64_t l2_pending_diff  = READ_MY_HW_EVENT_DIFF(0);
 67 |    uint64_t llc_hit_diff     = READ_MY_HW_EVENT_DIFF(1);
 68 |    uint64_t remote_dram_diff = READ_MY_HW_EVENT_DIFF(2);
 69 |    uint64_t local_dram_diff  = READ_MY_HW_EVENT_DIFF(3);
 70 | 
 71 |    DBG_LOG(DEBUG, "read stall L2 cycles diff %lu; llc_hit %lu; cycles diff remote_dram %lu; local_dram %lu\n",
 72 | 		   l2_pending_diff, llc_hit_diff, remote_dram_diff, local_dram_diff);
 73 | 
 74 |    if ((remote_dram_diff == 0) && (local_dram_diff == 0)) return 0;
 75 | #ifdef MEMLAT_SUPPORT
 76 |    tls_global_local_dram += local_dram_diff;
 77 | #endif
 78 | 
 79 |    // calculate stalls based on L2 stalls and LLC miss/hit
 80 |    double num = L3_FACTOR * (remote_dram_diff + local_dram_diff);
 81 |    double den = num + llc_hit_diff;
 82 |    if (den == 0) return 0;
 83 |    return (uint64_t) ((double)l2_pending_diff * (num / den));
 84 | }
 85 | 
 86 | 
 87 | DECLARE_ENABLE_PMC(ivybridge, remote_dram)
 88 | {
 89 |     ASSIGN_PMC_HW_EVENT_TO_ME("CYCLE_ACTIVITY:STALLS_L2_PENDING", 0);
 90 |     ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_LLC_HIT_RETIRED:XSNP_NONE", 1);
 91 |     ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_LLC_MISS_RETIRED:REMOTE_DRAM", 2);
 92 |     ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_LLC_MISS_RETIRED:LOCAL_DRAM", 3);
 93 | 
 94 |     return E_SUCCESS;
 95 | }
 96 | 
 97 | DECLARE_CLEAR_PMC(ivybridge, remote_dram)
 98 | {
 99 | }
100 | 
101 | DECLARE_READ_PMC(ivybridge, remote_dram)
102 | {
103 |    uint64_t l2_pending_diff  = READ_MY_HW_EVENT_DIFF(0);
104 |    uint64_t llc_hit_diff     = READ_MY_HW_EVENT_DIFF(1);
105 |    uint64_t remote_dram_diff = READ_MY_HW_EVENT_DIFF(2);
106 |    uint64_t local_dram_diff  = READ_MY_HW_EVENT_DIFF(3);
107 | 
108 |    DBG_LOG(DEBUG, "read stall L2 cycles diff %lu; llc_hit %lu; cycles diff remote_dram %lu; local_dram %lu\n",
109 | 		   l2_pending_diff, llc_hit_diff, remote_dram_diff, local_dram_diff);
110 | 
111 |    if ((remote_dram_diff == 0) && (local_dram_diff == 0)) return 0;
112 | #ifdef MEMLAT_SUPPORT
113 |    tls_global_remote_dram += remote_dram_diff;
114 | #endif
115 | 
116 |    // calculate stalls based on L2 stalls and LLC miss/hit
117 |    double num = L3_FACTOR * (remote_dram_diff + local_dram_diff);
118 |    double den = num + llc_hit_diff;
119 |    if (den == 0) return 0;
120 |    double stalls = (double)l2_pending_diff * (num / den);
121 | 
122 |    // calculate remote dram stalls based on total stalls and local/remote dram accesses
123 |    // also consider the weight of remote memory access against local memory access
124 |    den = (remote_dram_diff * tls_hw_remote_latency) + (local_dram_diff * tls_hw_local_latency);
125 |    if (den == 0) return 0;
126 |    return (uint64_t) (stalls * ((double)(remote_dram_diff * tls_hw_remote_latency) / den));
127 | }
128 | 
129 | 
130 | PMC_EVENTS(ivybridge, 4)
131 | #endif /* __CPU_IVYBRIDGE_H */
132 | 


--------------------------------------------------------------------------------
/src/lib/cpu/known_cpus.h:
--------------------------------------------------------------------------------
 1 | /***************************************************************************
 2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
 3 | This program is free software; you can redistribute it and/or modify
 4 | it under the terms of the GNU General Public License as published by
 5 | the Free Software Foundation; either version 2 of the License, or (at
 6 | your option) any later version. This program is distributed in the
 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | PURPOSE. See the GNU General Public License for more details. You
10 | should have received a copy of the GNU General Public License along
11 | with this program; if not, write to the Free Software Foundation,
12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
13 | ***************************************************************************/
14 | #ifndef __KNOWN_CPUS_H
15 | #define __KNOWN_CPUS_H
16 | 
17 | #include "cpu.h"
18 | 
19 | // later, cpu_model_name() is used to distinguish between
20 | // Xeon and non-Xeon processors. It's much easier here
21 | // to consider all processors non-Xeon.
22 | // references:
23 | // 1- http://a4lg.com/tech/x86/database/x86-families-and-models.en.html
24 | // 2- Intel® Xeon® Processor E7-8800/4800 v3 Product Family Specification
25 | // 3- https://software.intel.com/en-us/articles/intel-architecture-and-processor-identification-with-cpuid-model-and-family-numbers
26 | microarch_ID_t known_cpus[] =
27 |     {
28 |         // order does not matter
29 |         {.family = 0x06, .model = 0x2A, .microarch = SandyBridge},
30 |         {.family = 0x06, .model = 0x2D, .microarch = SandyBridge},
31 | 
32 |         {.family = 0x06, .model = 0x3A, .microarch = IvyBridge},
33 |         {.family = 0x06, .model = 0x3E, .microarch = IvyBridge},
34 | 
35 |         {.family = 0x06, .model = 0x3C, .microarch = Haswell},
36 |         {.family = 0x06, .model = 0x3F, .microarch = Haswell},
37 |         {.family = 0x06, .model = 0x45, .microarch = Haswell},
38 |         {.family = 0x06, .model = 0x46, .microarch = Haswell},
39 | 
40 |         // must be the last element
41 |         {.family = 0x0, .model = 0x0, .microarch = Invalid}};
42 | 
43 | // order must correspond to microarch_t
44 | char *microarch_strings[] =
45 |     {
46 |         "Invalid",
47 |         "Sandy Bridge",
48 |         "Sandy Bridge Xeon",
49 |         "Ivy Bridge",
50 |         "Ivy Bridge Xeon",
51 |         "Haswell",
52 |         "Haswell Xeon"};
53 | 
54 | #endif /* __KNOWN_CPUS_H */
55 | 


--------------------------------------------------------------------------------
/src/lib/cpu/pmc-papi.c:
--------------------------------------------------------------------------------
  1 | /***************************************************************************
  2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
  3 | This program is free software; you can redistribute it and/or modify
  4 | it under the terms of the GNU General Public License as published by
  5 | the Free Software Foundation; either version 2 of the License, or (at
  6 | your option) any later version. This program is distributed in the
  7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
  8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  9 | PURPOSE. See the GNU General Public License for more details. You
 10 | should have received a copy of the GNU General Public License along
 11 | with this program; if not, write to the Free Software Foundation,
 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 13 | ***************************************************************************/
 14 | #include <papi.h>
 15 | #include <pthread.h>
 16 | #include <sys/syscall.h>
 17 | #include "cpu/pmc-papi.h"
 18 | #include "debug.h"
 19 | 
 20 | __thread int tls_event_set = PAPI_NULL;
 21 | 
 22 | #define STR_MAX_SIZE 256
 23 | 
 24 | static void log_papi_critical(int ret_val, const char *msg) {
 25 | 	//char papi_str[STR_MAX_SIZE];
 26 | 	//PAPI_perror(ret_val, (char *)papi_str, sizeof(papi_str));
 27 |     DBG_LOG(CRITICAL, "%s (%s)\n", msg, PAPI_strerror(ret_val));
 28 | }
 29 | 
 30 | int pmc_init() {
 31 | 	int ret_val;
 32 | 
 33 |     if ((ret_val = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT) {
 34 |         log_papi_critical(ret_val, "PMC library init error");
 35 |         return -1;
 36 |     }
 37 | 
 38 |     if ((ret_val = PAPI_thread_init(pthread_self)) != PAPI_OK) {
 39 |         log_papi_critical(ret_val, "PMC thread support init error");
 40 |         return -1;
 41 |     }
 42 | 
 43 | //    if ((ret_val = PAPI_set_domain(PAPI_DOM_ALL)) != PAPI_OK) {
 44 | //        log_papi_critical(ret_val, "PMC set domain error");
 45 | //        return -1;
 46 | //    }
 47 | 
 48 |     return 0;
 49 | }
 50 | 
 51 | void pmc_shutdown() {
 52 |     PAPI_shutdown();
 53 | }
 54 | 
 55 | int pmc_create_event_set_local_thread() {
 56 | 	int ret_val;
 57 | 
 58 |     if ((ret_val = PAPI_create_eventset(&tls_event_set)) != PAPI_OK) {
 59 |         log_papi_critical(ret_val, "PMC event set init error");
 60 |         return -1;
 61 |     }
 62 | 
 63 | //    if ((ret_val = PAPI_set_granularity(PAPI_GRN_SYS)) != PAPI_OK) {
 64 | //        log_papi_critical(ret_val, "PMC set granularity error");
 65 | //        return -1;
 66 | //    }
 67 | 
 68 |     return 0;
 69 | }
 70 | 
 71 | void pmc_destroy_event_set_local_thread() {
 72 |     PAPI_cleanup_eventset(tls_event_set);
 73 |     PAPI_destroy_eventset(&tls_event_set);
 74 | }
 75 | 
 76 | int pmc_register_thread() {
 77 | 	return PAPI_register_thread();
 78 | }
 79 | 
 80 | int pmc_unregister_thread() {
 81 | 	return PAPI_unregister_thread();
 82 | }
 83 | 
 84 | int pmc_register_event_local_thread(const char *event_name) {
 85 |     int ret_val;
 86 |     char msg[STR_MAX_SIZE];
 87 | 
 88 |     // The pthread scope for each thread should be set to PTHREAD_SCOPE_SYSTEM.
 89 |     // On linux, pthread supports only PTHREAD_SCOPE_SYSTEM.
 90 | 
 91 |     assert(tls_event_set != PAPI_NULL);
 92 |     assert(event_name);
 93 | 
 94 |     if ((ret_val = PAPI_add_named_event(tls_event_set, (char *)event_name)) != PAPI_OK) {
 95 |     	snprintf(msg, sizeof(msg), "PMC event (%s) register error", event_name);
 96 |     	log_papi_critical(ret_val, msg);
 97 |         return -1;
 98 |     }
 99 | 
100 |     return 0;
101 | }
102 | 
103 | int pmc_events_start_local_thread() {
104 |     int ret_val;
105 | 
106 |     assert(tls_event_set != PAPI_NULL);
107 | 
108 |     if ((ret_val = PAPI_start(tls_event_set)) != PAPI_OK) {
109 |     	log_papi_critical(ret_val, "PMC events start error");
110 |         return -1;
111 |     }
112 | 
113 |     return 0;
114 | }
115 | 
116 | void pmc_events_stop_local_thread() {
117 | 	long long values[MAX_NUM_EVENTS];
118 | 
119 | 	assert(tls_event_set != PAPI_NULL);
120 | 
121 |     PAPI_stop(tls_event_set, values);
122 | }
123 | 
124 | int pmc_events_read_local_thread(long long *values) {
125 |     int ret_val;
126 | //    int status = 0;
127 | 
128 |     assert(values);
129 | 
130 | //    PAPI_state(event_set, &status);
131 | //    if (status != PAPI_RUNNING) {
132 | //        DBG_LOG(CRITICAL, "PMC event set not in running state");
133 | //        return -1;
134 | //    }
135 | 
136 |     if ((ret_val = PAPI_read(tls_event_set, values)) != PAPI_OK) {
137 |     	log_papi_critical(ret_val, "PMC events read error");
138 |         return -1;
139 |     }
140 | 
141 |     if ((ret_val = PAPI_reset(tls_event_set)) != PAPI_OK) {
142 |         log_papi_critical(ret_val, "PMC events reset error");
143 |         return -1;
144 |     }
145 | 
146 |     return 0;
147 | }
148 | 


--------------------------------------------------------------------------------
/src/lib/cpu/pmc-papi.h:
--------------------------------------------------------------------------------
 1 | /***************************************************************************
 2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
 3 | This program is free software; you can redistribute it and/or modify
 4 | it under the terms of the GNU General Public License as published by
 5 | the Free Software Foundation; either version 2 of the License, or (at
 6 | your option) any later version. This program is distributed in the
 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | PURPOSE. See the GNU General Public License for more details. You
10 | should have received a copy of the GNU General Public License along
11 | with this program; if not, write to the Free Software Foundation,
12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
13 | ***************************************************************************/
14 | #ifndef __CPU_PMC_H
15 | #define __CPU_PMC_H
16 | 
17 | #include <stdint.h>
18 | 
19 | 
20 | // Usually the architectures support up to 4 counters enabled at the same
21 | // time per core when HT is enabled
22 | #define MAX_NUM_EVENTS 4
23 | 
24 | typedef uint64_t (*read_stalls_t)(void);
25 | 
26 | typedef struct {
27 | 	const char **native_events;
28 | 	read_stalls_t read_stalls_events_local;
29 | 	read_stalls_t read_stalls_events_remote;
30 | } pmc_event_t;
31 | 
32 | int pmc_init();
33 | void pmc_shutdown();
34 | int pmc_create_event_set_local_thread();
35 | void pmc_destroy_event_set_local_thread();
36 | int pmc_register_event_local_thread(const char *event_name);
37 | int pmc_events_start_local_thread();
38 | void pmc_events_stop_local_thread();
39 | int pmc_events_read_local_thread(long long *values);
40 | 
41 | int pmc_register_thread();
42 | int pmc_unregister_thread();
43 | 
44 | #endif /* __CPU_PMC_H */
45 | 


--------------------------------------------------------------------------------
/src/lib/cpu/pmc.h:
--------------------------------------------------------------------------------
  1 | /***************************************************************************
  2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
  3 | This program is free software; you can redistribute it and/or modify
  4 | it under the terms of the GNU General Public License as published by
  5 | the Free Software Foundation; either version 2 of the License, or (at
  6 | your option) any later version. This program is distributed in the
  7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
  8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  9 | PURPOSE. See the GNU General Public License for more details. You
 10 | should have received a copy of the GNU General Public License along
 11 | with this program; if not, write to the Free Software Foundation,
 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 13 | ***************************************************************************/
 14 | #ifndef __CPU_PMC_H
 15 | #define __CPU_PMC_H
 16 | 
 17 | #include "cpu/cpu.h"
 18 | 
 19 | #define DECLARE_ENABLE_PMC(prefix, name) int prefix##_create_pmc_##name(struct pmc_events_s* events, struct pmc_event_s* event)
 20 | #define DECLARE_CLEAR_PMC(prefix, name) void prefix##_clear_pmc_##name(struct pmc_event_s* event)
 21 | #define DECLARE_READ_PMC(prefix, name) uint64_t prefix##_read_pmc_##name(struct pmc_event_s* event)
 22 | #define ENABLE_PMC_FNAME(prefix, name) prefix##_create_pmc_##name
 23 | #define CLEAR_PMC_FNAME(prefix, name) prefix##_clear_pmc_##name
 24 | #define READ_PMC_FNAME(prefix, name) prefix##_read_pmc_##name
 25 | 
 26 | #define PMC_HW_EVENT(name, os_name, encoding)  { name, os_name, encoding, 0, 0},
 27 | #define PMC_EVENT(name, prefix)  { #name, NULL, 0, 0, ENABLE_PMC_FNAME(prefix, name), CLEAR_PMC_FNAME(prefix, name), READ_PMC_FNAME(prefix, name)},
 28 | 
 29 | #define PMC_EVENTS_PTR(prefix) &prefix##_pmc_events
 30 | 
 31 | #define PMC_EVENTS(prefix, num_hw_cntrs)          \
 32 |   pmc_hw_event_t prefix##_known_hw_event[] = {    \
 33 |     FOREACH_PMC_HW_EVENT(PMC_HW_EVENT)            \
 34 |     {NULL, NULL, 0, 0, 0}                         \
 35 |   };                                              \
 36 |   pmc_event_t prefix##_known_event[] = {          \
 37 |     FOREACH_PMC_EVENT(PMC_EVENT, prefix)          \
 38 |     {NULL, NULL, 0, 0, NULL, NULL, NULL}          \
 39 |   };                                              \
 40 |   pmc_events_t prefix##_pmc_events = {            \
 41 |     num_hw_cntrs,                                 \
 42 |     prefix##_known_hw_event,                      \
 43 |     prefix##_known_event                          \
 44 |   };
 45 | 
 46 | #define ASSIGN_PMC_HW_EVENT_TO_ME(name, local_id)                                   \
 47 |   if (assign_pmc_hw_event_to_event(events, name, event, local_id) != E_SUCCESS) {   \
 48 |     release_all_pmc_hw_events_of_event(event);                                      \
 49 |   }
 50 | 
 51 | #define READ_MY_HW_EVENT_DIFF(local_id) read_pmc_hw_event_diff(event->hw_events[local_id])
 52 | #define READ_MY_HW_EVENT_CUR(local_id) read_pmc_hw_event_cur(event->hw_events[local_id])
 53 | 
 54 | typedef struct {
 55 |     char* name;
 56 |     char* os_name; // perf name if known
 57 |     uint64_t encoding;
 58 |     int active;
 59 |     int hw_cntr_id;
 60 |     uint64_t* last_val; // array holding the last read values per processor (useful to calculate the diff since the last read)
 61 | } pmc_hw_event_t;
 62 | 
 63 | typedef struct pmc_event_s {
 64 |     const char* name;
 65 |     pmc_hw_event_t** hw_events;
 66 |     int num_hw_events;
 67 |     int active;
 68 |     int (*enable)(struct pmc_events_s* events, struct pmc_event_s* event);
 69 |     void (*clear)(struct pmc_event_s* event);
 70 |     uint64_t (*read)(struct pmc_event_s* event);
 71 | } pmc_event_t;
 72 | 
 73 | typedef struct pmc_events_s {
 74 |     int num_avail_hw_cntrs; 
 75 |     pmc_hw_event_t* known_hw_events;
 76 |     pmc_event_t* known_events;
 77 | } pmc_events_t;
 78 | 
 79 | pmc_hw_event_t* enable_pmc_hw_event(pmc_events_t* events, const char* name);
 80 | void disable_pmc_hw_event(pmc_events_t* events, const char* name);
 81 | void clear_pmc_hw_event(pmc_hw_event_t* event);
 82 | uint64_t read_pmc_hw_event_cur(pmc_hw_event_t* event);
 83 | uint64_t read_pmc_hw_event_diff(pmc_hw_event_t* event);
 84 | int assign_pmc_hw_event_to_event(pmc_events_t* events, const char* name, pmc_event_t* event, int local_id);
 85 | void release_all_pmc_hw_events_of_event(pmc_event_t* event);
 86 | 
 87 | pmc_event_t* enable_pmc_event(cpu_model_t* cpu, const char* name);
 88 | void disable_pmc_event(cpu_model_t* cpu, const char* name);
 89 | 
 90 | static inline void clear_pmc_event(pmc_event_t* event)
 91 | {
 92 |     event->clear(event);
 93 | }
 94 | 
 95 | //#include "debug.h"
 96 | 
 97 | static inline uint64_t read_pmc_event(pmc_event_t* event)
 98 | {
 99 |     uint64_t ret;
100 |     ret = event->read(event);
101 |     return ret;
102 | }
103 | 
104 | #endif /* __CPU_PMC_H */
105 | 


--------------------------------------------------------------------------------
/src/lib/cpu/sandybridge-papi.h:
--------------------------------------------------------------------------------
 1 | /***************************************************************************
 2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
 3 | This program is free software; you can redistribute it and/or modify
 4 | it under the terms of the GNU General Public License as published by
 5 | the Free Software Foundation; either version 2 of the License, or (at
 6 | your option) any later version. This program is distributed in the
 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | PURPOSE. See the GNU General Public License for more details. You
10 | should have received a copy of the GNU General Public License along
11 | with this program; if not, write to the Free Software Foundation,
12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
13 | ***************************************************************************/
14 | #ifndef __CPU_SANDYBRIDGE_H
15 | #define __CPU_SANDYBRIDGE_H
16 | 
17 | #include <papi.h>
18 | #include <math.h>
19 | #include "debug.h"
20 | 
21 | // Perfmon2 is a library that provides a generic interface to access the PMU. It also comes with
22 | // applications to list all available performance events with their architecutre specific 
23 | // detailed description and translate them to their respective event code. showevtinfo application can 
24 | // be used to list all available performance event names with detailed desciption and check_events application
25 | // can be used to translate the performance event to the corresponding event code.  
26 | 
27 | // These events will be initialized and started.
28 | // Every event reading will return an array with the values for all these events.
29 | // The array index is the same index used to define the event in the *_native_events array below
30 | const char *sandybridge_native_events[MAX_NUM_EVENTS] = {
31 |     "CYCLE_ACTIVITY:STALLS_L2_PENDING",
32 |     "MEM_LOAD_UOPS_MISC_RETIRED:LLC_MISS",
33 |     "MEM_LOAD_UOPS_RETIRED:L3_HIT",
34 |     NULL
35 | };
36 | 
37 | 
38 | void sandybridge_latency_calibration_local(int *hw_latency, int target_latency) {
39 | 	if ((*hw_latency + 10) < target_latency)
40 | 		*hw_latency += 10;
41 | }
42 | 
43 | void sandybridge_latency_calibration_remote(int *hw_latency, int target_latency) {
44 | 	if ((*hw_latency + 30) < target_latency)
45 | 		*hw_latency += 30;
46 | }
47 | 
48 | uint64_t sandybridge_read_stall_events_local() {
49 |     long long values[MAX_NUM_EVENTS];
50 |     uint64_t events = 0;
51 | 
52 |     if (pmc_events_read_local_thread(values) == PAPI_OK) {
53 |         uint64_t cycle_activity_stalls_l2_pending_diff = values[0];
54 |         uint64_t mem_load_uops_misc_retired_llc_miss_diff = values[1];
55 |         uint64_t mem_load_uops_retired_l3_hit_diff = values[2];
56 | 
57 |         DBG_LOG(DEBUG, "read stall L2 cycles %lu, LLC miss %lu, L3 hit %lu\n",
58 |         		cycle_activity_stalls_l2_pending_diff, mem_load_uops_misc_retired_llc_miss_diff,
59 |         		mem_load_uops_retired_l3_hit_diff);
60 | 
61 |     	uint64_t uden = 7.0 * mem_load_uops_misc_retired_llc_miss_diff + mem_load_uops_retired_l3_hit_diff;
62 |         if (uden == 0) {
63 |             return 0;
64 |         }
65 |         double den = uden;
66 |         double num = 7.0 * mem_load_uops_misc_retired_llc_miss_diff;
67 | 
68 |         events = (uint64_t) floorl(cycle_activity_stalls_l2_pending_diff*num/den);
69 |     } else {
70 |         DBG_LOG(DEBUG, "read stall cycles failed\n");
71 |     }
72 | 
73 |     return events;
74 | }
75 | 
76 | #endif /* __CPU_SANDYBRIDGE_H */
77 | 


--------------------------------------------------------------------------------
/src/lib/cpu/sandybridge.h:
--------------------------------------------------------------------------------
 1 | /***************************************************************************
 2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
 3 | This program is free software; you can redistribute it and/or modify
 4 | it under the terms of the GNU General Public License as published by
 5 | the Free Software Foundation; either version 2 of the License, or (at
 6 | your option) any later version. This program is distributed in the
 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | PURPOSE. See the GNU General Public License for more details. You
10 | should have received a copy of the GNU General Public License along
11 | with this program; if not, write to the Free Software Foundation,
12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
13 | ***************************************************************************/
14 | #ifndef __CPU_SANDYBRIDGE_H
15 | #define __CPU_SANDYBRIDGE_H
16 | 
17 | #include <math.h>
18 | #include "thread.h"
19 | #include "cpu/pmc.h"
20 | #include "debug.h"
21 | 
22 | // Perfmon2 is a library that provides a generic interface to access the PMU. It also comes with
23 | // applications to list all available performance events with their architecutre specific 
24 | // detailed description and translate them to their respective event code. showevtinfo application can 
25 | // be used to list all available performance event names with detailed desciption and check_events application
26 | // can be used to translate the performance event to the corresponding event code.  
27 | 
28 | #undef FOREACH_PMC_HW_EVENT
29 | #define FOREACH_PMC_HW_EVENT(ACTION)                                                                       \
30 |   ACTION("CYCLE_ACTIVITY:STALLS_L2_PENDING", NULL, 0x55305a3)                                              \
31 |   ACTION("MEM_LOAD_UOPS_MISC_RETIRED:LLC_MISS", NULL, 0x5302d4)                                            \
32 |   ACTION("MEM_LOAD_UOPS_RETIRED:L3_HIT", NULL, 0x5304d1)                                                   \
33 |   ACTION("INSTRUCTION_RETIRED", NULL, 0x5300c0)               
34 | 
35 | #undef FOREACH_PMC_EVENT
36 | #define FOREACH_PMC_EVENT(ACTION, prefix)                                                                  \
37 |   ACTION(ldm_stall_cycles, prefix)
38 | 
39 | 
40 | DECLARE_ENABLE_PMC(sandybridge, ldm_stall_cycles)
41 | {
42 |     ASSIGN_PMC_HW_EVENT_TO_ME("CYCLE_ACTIVITY:STALLS_L2_PENDING", 0);
43 |     ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_MISC_RETIRED:LLC_MISS", 1);
44 |     //ASSIGN_PMC_HW_EVENT_TO_ME("INSTRUCTION_RETIRED", 2);
45 |     ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_RETIRED:L3_HIT", 2);
46 | 
47 |     return E_SUCCESS;
48 | }
49 | 
50 | DECLARE_CLEAR_PMC(sandybridge, ldm_stall_cycles)
51 | {
52 | }
53 | 
54 | DECLARE_READ_PMC(sandybridge, ldm_stall_cycles)
55 | {
56 | 	//return 0;
57 |    uint64_t cycle_activity_stalls_l2_pending_diff = READ_MY_HW_EVENT_DIFF(0);
58 |    uint64_t mem_load_uops_misc_retired_llc_miss_diff = READ_MY_HW_EVENT_DIFF(1);
59 |    uint64_t mem_load_uops_retired_l3_hit_diff = READ_MY_HW_EVENT_DIFF(2);
60 | 
61 |    //return floor(cycle_activity_stalls_l2_pending_diff * (((double) (7*mem_load_uops_misc_retired_llc_miss_diff))/((double)(7*mem_load_uops_misc_retired_llc_miss_diff + mem_load_uops_retired_l3_hit_diff))));
62 |    uint64_t uden = 7.0 * mem_load_uops_misc_retired_llc_miss_diff + mem_load_uops_retired_l3_hit_diff;
63 |    if (uden == 0) {
64 |       return 0;  
65 |    }
66 |    double den = uden;
67 |    double num = 7.0 * mem_load_uops_misc_retired_llc_miss_diff;
68 | 
69 |    return (uint64_t) floorl(cycle_activity_stalls_l2_pending_diff*num/den);
70 | }
71 | 
72 | 
73 | PMC_EVENTS(sandybridge, 4)
74 | #endif /* __CPU_SANDYBRIDGE_H */
75 | 


--------------------------------------------------------------------------------
/src/lib/cpu/xeon-ex.h:
--------------------------------------------------------------------------------
  1 | /***************************************************************************
  2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
  3 | This program is free software; you can redistribute it and/or modify
  4 | it under the terms of the GNU General Public License as published by
  5 | the Free Software Foundation; either version 2 of the License, or (at
  6 | your option) any later version. This program is distributed in the
  7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
  8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  9 | PURPOSE. See the GNU General Public License for more details. You
 10 | should have received a copy of the GNU General Public License along
 11 | with this program; if not, write to the Free Software Foundation,
 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 13 | ***************************************************************************/
 14 | #include "dev.h"
 15 | 
 16 | #ifdef PAPI_SUPPORT
 17 | #include "sandybridge-papi.h"
 18 | #include "ivybridge-papi.h"
 19 | #include "haswell-papi.h"
 20 | #else
 21 | #include "sandybridge.h"
 22 | #include "ivybridge.h"
 23 | #include "haswell.h"
 24 | #endif
 25 | 
 26 | int intel_xeon_ex_set_throttle_register(pci_regs_t *regs, throttle_type_t throttle_type, uint16_t val)
 27 | {
 28 |     int offset;
 29 |     int i;
 30 | 
 31 |     switch(throttle_type) {
 32 |         case THROTTLE_DDR_ACT:
 33 |             offset = 0x190; break;
 34 |         case THROTTLE_DDR_READ:
 35 |             offset = 0x192; break;
 36 |         case THROTTLE_DDR_WRITE:
 37 |             offset = 0x194; break;
 38 |         default:
 39 |             offset = 0x190;
 40 |     }
 41 | 
 42 |     // write to all 4 channels
 43 | 
 44 |     // first Activate throttling
 45 |     /*set_pci(bus_id, 0x10, 0x0, 0x190, (uint16_t) val);
 46 |     set_pci(bus_id, 0x10, 0x1, 0x190, (uint16_t) val);
 47 |     set_pci(bus_id, 0x10, 0x4, 0x190, (uint16_t) val);
 48 |     set_pci(bus_id, 0x10, 0x5, 0x190, (uint16_t) val);*/
 49 | 
 50 |     // then the Read or Write throttling
 51 |     for (i=0; i < regs->channels; ++i) {
 52 |         set_pci(regs->addr[i].bus_id, regs->addr[i].dev_id, regs->addr[i].funct, offset, (uint16_t) val);
 53 |     }
 54 | 
 55 |     return 0;
 56 | }
 57 | 
 58 | int intel_xeon_ex_get_throttle_register(pci_regs_t *regs, throttle_type_t throttle_type, uint16_t* val)
 59 | {
 60 |     int offset;
 61 | 
 62 |     switch(throttle_type) {
 63 |         case THROTTLE_DDR_ACT:
 64 |             offset = 0x190; break;
 65 |         case THROTTLE_DDR_READ:
 66 |             offset = 0x192; break;
 67 |         case THROTTLE_DDR_WRITE:
 68 |             offset = 0x194; break;
 69 |         default:
 70 |             offset = 0x190;
 71 |     }
 72 | 
 73 |     // read just channel 1
 74 |     get_pci(regs->addr[0].bus_id, regs->addr[0].dev_id, regs->addr[0].funct, offset, val);
 75 |     return 0;
 76 | }
 77 | 
 78 | 
 79 | // desc is fixed in cpu_model() if not Xeon
 80 | 
 81 | cpu_model_t cpu_model_intel_xeon_ex = {
 82 |     .microarch = SandyBridgeXeon,
 83 | #ifdef PAPI_SUPPORT
 84 |     .pmc_events = {sandybridge_native_events, sandybridge_read_stall_events_local, NULL},
 85 | #else
 86 |     .pmc_events = PMC_EVENTS_PTR(sandybridge),
 87 | #endif
 88 |     .set_throttle_register = intel_xeon_ex_set_throttle_register,
 89 |     .get_throttle_register = intel_xeon_ex_get_throttle_register
 90 | };
 91 | 
 92 | cpu_model_t cpu_model_intel_xeon_ex_v2 = {
 93 |     .microarch = IvyBridgeXeon,
 94 | #ifdef PAPI_SUPPORT
 95 |     .pmc_events = {ivybridge_native_events, ivybridge_read_stall_events_local, ivybridge_read_stall_events_remote},
 96 | #else
 97 |     .pmc_events = PMC_EVENTS_PTR(ivybridge),
 98 | #endif
 99 |     .set_throttle_register = intel_xeon_ex_set_throttle_register,
100 |     .get_throttle_register = intel_xeon_ex_get_throttle_register
101 | };
102 | 
103 | cpu_model_t cpu_model_intel_xeon_ex_v3 = {
104 |     .microarch = HaswellXeon,
105 | #ifdef PAPI_SUPPORT
106 |     .pmc_events = {haswell_native_events, haswell_read_stall_events_local, haswell_read_stall_events_remote},
107 | #else
108 |     .pmc_events = PMC_EVENTS_PTR(haswell),
109 | #endif
110 |     .set_throttle_register = intel_xeon_ex_set_throttle_register,
111 |     .get_throttle_register = intel_xeon_ex_get_throttle_register
112 | };
113 | 


--------------------------------------------------------------------------------
/src/lib/debug.c:
--------------------------------------------------------------------------------
  1 | /***************************************************************************
  2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
  3 | This program is free software; you can redistribute it and/or modify
  4 | it under the terms of the GNU General Public License as published by
  5 | the Free Software Foundation; either version 2 of the License, or (at
  6 | your option) any later version. This program is distributed in the
  7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
  8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  9 | PURPOSE. See the GNU General Public License for more details. You
 10 | should have received a copy of the GNU General Public License along
 11 | with this program; if not, write to the Free Software Foundation,
 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 13 | ***************************************************************************/
 14 | #include "debug.h"
 15 | #include <sys/types.h>
 16 | #include <execinfo.h>
 17 | #include <unistd.h>
 18 | #include <stdio.h>
 19 | #include "config.h"
 20 | 
 21 | 
 22 | int         dbg_modules[dbg_module_count];
 23 | int         dbg_level = 0;
 24 | int         dbg_verbose = 0;
 25 | const char* dbg_identifier = "";
 26 | static char dbg_identifier_buf[128];
 27 | 
 28 | static int 
 29 | strrep(char *target, char *source, char oldc, char newc)
 30 | {
 31 | 	int i;
 32 | 
 33 | 	for (i=0; source[i]; i++) {
 34 | 		if (source[i] == oldc) {
 35 | 			target[i] = newc;
 36 | 		} else {
 37 | 			target[i] = source[i];
 38 | 		}
 39 | 	}
 40 | 	target[i] = '\0';
 41 | 	return 0;
 42 | }
 43 | 
 44 | 
 45 | void
 46 | dbg_set_level(int level)
 47 | {
 48 | 	dbg_level = level;
 49 | }
 50 | 
 51 | 
 52 | int 
 53 | dbg_init(config_t* dbg_cfg, int level, const char* identifier)
 54 | {
 55 | 	// if user hasn't provided a debugging level then get it from the 
 56 | 	// configuration env/file
 57 | 	if (level < 0) {
 58 | 		__cconfig_lookup_int(dbg_cfg, "debug.level", &dbg_level);
 59 | 	} else {
 60 | 		dbg_level = level;
 61 | 	}
 62 | 
 63 | 	__cconfig_lookup_int(dbg_cfg, "debug.verbose", &dbg_verbose);
 64 | 
 65 | 	// if user hasn't provide an identifier then check whether the environment 
 66 | 	// provides one, othewise create one based on process' pid 
 67 | 	if (!identifier) {
 68 | 		dbg_identifier = getenv("DEBUG_IDENTIFIER");
 69 | 		if (!dbg_identifier) {
 70 | 			sprintf(dbg_identifier_buf, "%d", getpid()); 
 71 | 			dbg_identifier = dbg_identifier_buf;
 72 | 		}
 73 | 	} else {
 74 | 		dbg_identifier = identifier;
 75 | 	}
 76 | 
 77 | 
 78 | 	// read per module debugging flags
 79 | #define STR(name) #name
 80 | #define ACTION(name)                                                           \
 81 | 	do {                                                                       \
 82 | 		char dotstr[128];                                                      \
 83 | 		strrep(dotstr, STR(debug_module_##name), '_', '.');                    \
 84 | 		__cconfig_lookup_bool(dbg_cfg, dotstr,                                 \
 85 | 		                      &dbg_modules[dbg_module_##name]);                \
 86 | 	} while (0);
 87 | 
 88 | 	FOREACH_DEBUG_MODULE(ACTION)
 89 | #undef ACTION
 90 |         DBG_LOG(DEBUG, ""); // prevent compiler warning
 91 | 	return 0;
 92 | }
 93 | 
 94 | 
 95 | void
 96 | dbg_backtrace (void)
 97 | {
 98 | 	void *array[10];
 99 | 	size_t size;
100 | 	char **strings;
101 | 	size_t i;
102 |  
103 | 	size = backtrace (array, 10);
104 | 	strings = backtrace_symbols (array, size);
105 | 									      
106 | 	printf ("Obtained %zd stack frames.\n", size);
107 | 																	      
108 | 	for (i = 0; i < size; i++)
109 | 		printf ("%s\n", strings[i]);
110 | 	free (strings);
111 | }
112 | 


--------------------------------------------------------------------------------
/src/lib/debug.h:
--------------------------------------------------------------------------------
  1 | /***************************************************************************
  2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
  3 | This program is free software; you can redistribute it and/or modify
  4 | it under the terms of the GNU General Public License as published by
  5 | the Free Software Foundation; either version 2 of the License, or (at
  6 | your option) any later version. This program is distributed in the
  7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
  8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  9 | PURPOSE. See the GNU General Public License for more details. You
 10 | should have received a copy of the GNU General Public License along
 11 | with this program; if not, write to the Free Software Foundation,
 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 13 | ***************************************************************************/
 14 | #ifndef __DEBUG_H
 15 | #define __DEBUG_H
 16 | 
 17 | #include <stdio.h>
 18 | #include <stdlib.h>
 19 | #include <assert.h>
 20 | #include <time.h>
 21 | #include "config.h"
 22 | 
 23 | #define FOREACH_DEBUG_MODULE(ACTION)                        \
 24 | 	ACTION(all) /* special name that covers all modules */
 25 | 
 26 | 
 27 | #define ACTION(name)                                        \
 28 | 	dbg_module_##name,
 29 | 
 30 | enum {
 31 | 	FOREACH_DEBUG_MODULE(ACTION)
 32 | 	dbg_module_count
 33 | };
 34 | #undef ACTION
 35 | 
 36 | #ifndef NDEBUG
 37 | #define DBG_CODE(code) DBG_##code
 38 | 
 39 | enum dbg_code {
 40 | 	DBG_OFF = 0,
 41 | 	DBG_CODE(CRITICAL) = 1, // Critical
 42 | 	DBG_CODE(ERROR)    = 2, // Error
 43 | 	DBG_CODE(WARNING)  = 3, // Warning
 44 | 	DBG_CODE(INFO)     = 4, // Info
 45 | 	DBG_CODE(DEBUG)    = 5, // Debugging
 46 | };
 47 | 
 48 | static const char* dbg_code2str[] = {
 49 | 	(char*) "OFF",
 50 | 	(char*) "CRITICAL",
 51 | 	(char*) "ERROR",
 52 | 	(char*) "WARNING",
 53 | 	(char*) "INFO",
 54 | 	(char*) "DEBUG",
 55 | };
 56 | 
 57 | static const int dbg_terminate_level = DBG_ERROR;
 58 | static const int dbg_stderr_level = DBG_WARNING;
 59 | 
 60 | extern int         dbg_modules[];
 61 | extern int         dbg_level;
 62 | extern int         dbg_verbose;
 63 | extern const char* dbg_identifier;
 64 | 
 65 | #define DBG_MODULE(name) dbg_module_##name
 66 | 
 67 | #define DBG_LOG(level, format, ...)                                            \
 68 |   do {                                                                         \
 69 |     FILE* ferr = stdout;                                                       \
 70 |     time_t ctime;                                                              \
 71 |     if (DBG_CODE(level) && (DBG_CODE(level) <= dbg_level ||                    \
 72 |                   DBG_CODE(level) <= dbg_terminate_level))                     \
 73 |     {                                                                          \
 74 |       if (DBG_CODE(level) <= dbg_stderr_level) {                               \
 75 |         ferr=stderr;                                                           \
 76 |       }                                                                        \
 77 |       if (dbg_verbose) {                                                       \
 78 |         ctime = time(NULL);                                                    \
 79 |         fprintf(ferr, "[%s] [%lu] %s in %s <%s,%d>: " format,                  \
 80 |                 dbg_identifier,                                                \
 81 |                 ctime,                                                         \
 82 |                 dbg_code2str[DBG_CODE(level)],                                 \
 83 |                 __FUNCTION__, __FILE__, __LINE__, ##__VA_ARGS__);              \
 84 |       } else {                                                                 \
 85 |         fprintf(ferr, "[%s] %s: " format,                                      \
 86 |                 dbg_identifier,                                                \
 87 |                 dbg_code2str[DBG_CODE(level)],                                 \
 88 |                 ##__VA_ARGS__);                                                \
 89 |       }                                                                        \
 90 |       if (DBG_CODE(level) <= dbg_terminate_level) {                            \
 91 |         exit(-1);                                                              \
 92 |       }	                                                                       \
 93 |     }			                                                               \
 94 |   } while(0);
 95 | 
 96 | 
 97 | #define DBG_LOG2(level, module, format, ...)                                   \
 98 |   do {                                                                         \
 99 |     FILE* ferr = stdout;                                                       \
100 |     if (DBG_CODE(level) &&                                                     \
101 | 	    (dbg_modules[module] || dbg_modules[dbg_module_all] ||                 \
102 | 		 DBG_CODE(level) <= dbg_terminate_level) &&                            \
103 | 	    (DBG_CODE(level) <= dbg_level ||                                       \
104 |          DBG_CODE(level) <= dbg_terminate_level))                              \
105 |     {                                                                          \
106 |       if (DBG_CODE(level) <= dbg_stderr_level) {                               \
107 |         ferr=stderr;                                                           \
108 |       }                                                                        \
109 |  	  fprintf(ferr, "[%s] %s in %s <%s,%d>: " format,                          \
110 |               dbg_identifier,                                                  \
111 |               dbg_code2str[DBG_CODE(level)],                                   \
112 |               __FUNCTION__, __FILE__, __LINE__, ##__VA_ARGS__);                \
113 |       if (DBG_CODE(level) <= dbg_terminate_level) {                            \
114 |         exit(-1);                                                              \
115 |       }	                                                                       \
116 |     }			                                                               \
117 |   } while(0);
118 | 
119 | #else /* NDEBUG */
120 | 
121 | #define DBG_LOG(level, format, ...)
122 | #define DBG_LOG2(level, module, format, ...)
123 | 
124 | #endif /* NDEBUG */
125 | 
126 | 
127 | #define VERIFY(condition)                                                      \
128 |   do {                                                                         \
129 |     if (!(condition)) {                                                        \
130 |       fprintf(stderr, "Assumption \"%s\"\nFailed in file %s: at line:%i\n",    \
131 |               #condition,__FILE__,__LINE__);                                   \
132 |       DBG_LOG (DBG_CRITICAL, #condition);}                                     \
133 |       fflush(stderr);                                                          \
134 |   } while (0);
135 | 
136 | 
137 | int dbg_init(config_t* dbg_cfg, int level, const char* identifier);
138 | void dbg_backtrace (void);
139 | void dbg_set_level(int level);
140 | 
141 | #endif // __DEBUG_H
142 | 


--------------------------------------------------------------------------------
/src/lib/dev.c:
--------------------------------------------------------------------------------
  1 | /***************************************************************************
  2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
  3 | This program is free software; you can redistribute it and/or modify
  4 | it under the terms of the GNU General Public License as published by
  5 | the Free Software Foundation; either version 2 of the License, or (at
  6 | your option) any later version. This program is distributed in the
  7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
  8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  9 | PURPOSE. See the GNU General Public License for more details. You
 10 | should have received a copy of the GNU General Public License along
 11 | with this program; if not, write to the Free Software Foundation,
 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 13 | ***************************************************************************/
 14 | #include <sys/types.h>
 15 | #include <sys/stat.h>
 16 | #include <sys/ioctl.h>
 17 | #include <fcntl.h>
 18 | #include <stdint.h>
 19 | #include <unistd.h>
 20 | #include <errno.h>
 21 | #include "dev/ioctl_query.h"
 22 | #include "error.h"
 23 | #include "dev.h"
 24 | 
 25 | // TODO: get this value from the config file
 26 | #define DEV_PATH "/dev/nvmemul"
 27 | 
 28 | int set_counter(unsigned int counter_id, unsigned int event_id)
 29 | {
 30 |     int fd;
 31 |     int ret;
 32 | 
 33 |     ioctl_query_setcounter_t q;
 34 |     fd = open(DEV_PATH, O_RDONLY);
 35 |     if (fd < 0) {
 36 |         DBG_LOG(ERROR, "Can't open %s - Is the NVM emulator device driver installed?\n", DEV_PATH);
 37 |         return E_ERROR;
 38 |     }
 39 |     q.counter_id = counter_id;
 40 |     q.event_id = event_id;
 41 |     if ((ret = ioctl(fd, IOCTL_SETCOUNTER, &q)) < 0) {
 42 |     close(fd);
 43 |         return E_ERROR;
 44 |     }
 45 |     close(fd);
 46 |     return E_SUCCESS;
 47 | }
 48 | 
 49 | 
 50 | int set_pci(unsigned int bus_id, unsigned int device_id, unsigned int function_id, unsigned int offset, uint16_t val)
 51 | {
 52 | 	int fd; 
 53 |     int ret;
 54 | 
 55 |     ioctl_query_setgetpci_t q;
 56 | 	fd = open(DEV_PATH, O_RDONLY);
 57 | 	if (fd < 0) {
 58 | 		DBG_LOG(ERROR, "Can't open %s - Is the NVM emulator device driver installed?\n", DEV_PATH);
 59 | 		return E_ERROR;
 60 | 	}
 61 |     q.bus_id = bus_id;
 62 |     q.device_id = device_id;
 63 |     q.function_id = function_id;
 64 |     q.offset = offset;
 65 |     q.val = val;
 66 |     if ((ret = ioctl(fd, IOCTL_SETPCI, &q)) < 0) {
 67 |     	close(fd);
 68 |         return E_ERROR;
 69 |     }
 70 | 	close(fd);
 71 |     return E_SUCCESS;
 72 | }
 73 | 
 74 | int get_pci(unsigned int bus_id, unsigned int device_id, unsigned int function_id, unsigned int offset, uint16_t* val)
 75 | {
 76 | 	int fd; 
 77 |     int ret;
 78 | 
 79 |     ioctl_query_setgetpci_t q;
 80 | 	fd = open(DEV_PATH, O_RDWR);
 81 | 	if (fd < 0) {
 82 | 		DBG_LOG(ERROR, "Can't open %s - Is the NVM emulator device driver installed?\n", DEV_PATH);
 83 | 		return E_ERROR;
 84 | 	}
 85 |     q.bus_id = bus_id;
 86 |     q.device_id = device_id;
 87 |     q.function_id = function_id;
 88 |     q.offset = offset;
 89 |     q.val = 0;
 90 |     if ((ret = ioctl(fd, IOCTL_GETPCI, &q)) < 0) {
 91 |     	close(fd);
 92 |         return E_ERROR;
 93 |     }
 94 |     *val = q.val;
 95 | 	close(fd);
 96 |     return E_SUCCESS;
 97 | }
 98 | 
 99 | 
100 | 


--------------------------------------------------------------------------------
/src/lib/dev.h:
--------------------------------------------------------------------------------
 1 | /***************************************************************************
 2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
 3 | This program is free software; you can redistribute it and/or modify
 4 | it under the terms of the GNU General Public License as published by
 5 | the Free Software Foundation; either version 2 of the License, or (at
 6 | your option) any later version. This program is distributed in the
 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | PURPOSE. See the GNU General Public License for more details. You
10 | should have received a copy of the GNU General Public License along
11 | with this program; if not, write to the Free Software Foundation,
12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
13 | ***************************************************************************/
14 | #ifndef __DEVICE_DRIVER_API_H
15 | #define __DEVICE_DRIVER_API_H
16 | 
17 | #include <stdint.h>
18 | 
19 | #define MAX_NUM_MC_PCI_BUS 16
20 | #define MAX_NUM_MC_CHANNELS 16
21 | 
22 | typedef struct {
23 |     unsigned int bus_id;
24 |     unsigned int dev_id;
25 |     unsigned int funct;
26 | } pci_addr;
27 | 
28 | typedef struct {
29 |     pci_addr addr[MAX_NUM_MC_CHANNELS];
30 |     unsigned int channels;
31 | } pci_regs_t;
32 | 
33 | int set_counter(unsigned int counter_id, unsigned int event_id);
34 | int set_pci(unsigned bus_id, unsigned int device_id, unsigned int function_id, unsigned int offset, uint16_t val);
35 | int get_pci(unsigned bus_id, unsigned int device_id, unsigned int function_id, unsigned int offset, uint16_t* val);
36 | 
37 | #endif /* __DEVICE_DRIVER_API_H */
38 | 


--------------------------------------------------------------------------------
/src/lib/errno.h:
--------------------------------------------------------------------------------
 1 | /***************************************************************************
 2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
 3 | This program is free software; you can redistribute it and/or modify
 4 | it under the terms of the GNU General Public License as published by
 5 | the Free Software Foundation; either version 2 of the License, or (at
 6 | your option) any later version. This program is distributed in the
 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | PURPOSE. See the GNU General Public License for more details. You
10 | should have received a copy of the GNU General Public License along
11 | with this program; if not, write to the Free Software Foundation,
12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
13 | ***************************************************************************/
14 | #ifndef __ERRNO_H
15 | #define __ERRNO_H
16 | 
17 | #ifdef __DEFINE_ERRNO
18 | # error "__DEFINE_ERRNO previously defined"
19 | #endif
20 | 
21 | /*
22 |  * Define error codes and error messages here
23 |  */
24 | #define __DEFINE_ERRNO(ACTION)                                               \
25 | 	ACTION(E_SUCCESS, "Success")                                             \
26 | 	ACTION(E_ERROR, "Generic error")                                         \
27 | 	ACTION(E_NOMEM, "No memory")                                             \
28 |     ACTION(E_EXIST, "Name already exists")                                   \
29 |     ACTION(E_NOENT, "Name does not exist")                                   \
30 |     ACTION(E_INVAL, "Invalid argument")                                      \
31 |     ACTION(E_BUSY, "Resource busy")                                          \
32 |     ACTION(E_NOTEMPTY, "Not empty")                                          \
33 |     ACTION(E_ERRNO, "Standard C library error; check errno for details")
34 | 
35 | 
36 | #ifdef __ENUM_MEMBER
37 | # error "__ENUM_MEMBER previously defined"
38 | #endif
39 | 
40 | #define __ENUM_MEMBER(name, str)  name,
41 | 
42 | enum {
43 | 	__DEFINE_ERRNO(__ENUM_MEMBER)
44 | 	E_MAXERRNO
45 | };
46 | 
47 | #undef __ENUM_MEMBER /* don't polute the macro namespace */
48 | 
49 | #ifdef __ERRNO_STRING
50 | # error "__ERRNO_STRING previously defined"
51 | #endif
52 | 
53 | #define __ERRNO_STRING(name, str) str,
54 | 
55 | /*
56 |     TODO: not used for now
57 | static const char* 
58 | ErrorToString(int err) {
59 | 	static const char* errstr[] = {
60 | 		__DEFINE_ERRNO(__ERRNO_STRING)
61 | 		"Unknown error code"
62 | 	};
63 | 	if (err >= 0 && err < E_MAXERRNO) {
64 | 		return errstr[err];
65 | 	}
66 | 	return errstr[E_MAXERRNO];
67 | }
68 | */
69 | #undef __ERRNO_STRING /* don't polute the macro namespace */
70 | #undef __DEFINE_ERRNO /* don't polute the macro namespace */
71 | 
72 | #endif /* __ERRNO_H */
73 | 


--------------------------------------------------------------------------------
/src/lib/error.h:
--------------------------------------------------------------------------------
 1 | /***************************************************************************
 2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
 3 | This program is free software; you can redistribute it and/or modify
 4 | it under the terms of the GNU General Public License as published by
 5 | the Free Software Foundation; either version 2 of the License, or (at
 6 | your option) any later version. This program is distributed in the
 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | PURPOSE. See the GNU General Public License for more details. You
10 | should have received a copy of the GNU General Public License along
11 | with this program; if not, write to the Free Software Foundation,
12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
13 | ***************************************************************************/
14 | #ifndef __ERROR_H
15 | #define __ERROR_H
16 | 
17 | #include "errno.h"
18 | #include "debug.h"
19 | 
20 | #endif /* __ERROR_H */
21 | 


--------------------------------------------------------------------------------
/src/lib/init.c:
--------------------------------------------------------------------------------
  1 | /***************************************************************************
  2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
  3 | This program is free software; you can redistribute it and/or modify
  4 | it under the terms of the GNU General Public License as published by
  5 | the Free Software Foundation; either version 2 of the License, or (at
  6 | your option) any later version. This program is distributed in the
  7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
  8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  9 | PURPOSE. See the GNU General Public License for more details. You
 10 | should have received a copy of the GNU General Public License along
 11 | with this program; if not, write to the Free Software Foundation,
 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 13 | ***************************************************************************/
 14 | #include <errno.h>
 15 | #include "cpu/cpu.h"
 16 | #include "config.h"
 17 | #include "error.h"
 18 | #include "model.h"
 19 | #include "measure.h"
 20 | #include "thread.h"
 21 | #include "topology.h"
 22 | #include "interpose.h"
 23 | #include "monotonic_timer.h"
 24 | #include "pflush.h"
 25 | #include "stat.h"
 26 | 
 27 | static void init() __attribute__((constructor));
 28 | static void finalize() __attribute__((destructor));
 29 | 
 30 | int set_process_local_rank();
 31 | int unset_process_local_rank();
 32 | int partition_cpus(virtual_topology_t* virtual_topology);
 33 | 
 34 | static virtual_topology_t* virtual_topology = NULL;
 35 | 
 36 | void finalize() {
 37 |     int i;
 38 |     if (latency_model.enabled) {
 39 |         unregister_self();
 40 |     }
 41 | 
 42 |     if (read_bw_model.enabled) {
 43 |         for (i=0; i < virtual_topology->num_virtual_nodes; i++) {
 44 |             // FIXME: currently we keep a single bandwidth model and not per-node BW model
 45 |             physical_node_t* phys_node = virtual_topology->virtual_nodes[i].nvram_node;
 46 |             pci_regs_t *regs = phys_node->mc_pci_regs;
 47 | 
 48 |             // reset throttling
 49 |             phys_node->cpu_model->set_throttle_register(regs, THROTTLE_DDR_ACT, 0x8FFF);
 50 |         }
 51 |     }
 52 | #ifdef USE_STATISTICS
 53 |     stats_report();
 54 | #endif
 55 |     // finalize libraries and release resources
 56 | #ifdef PAPI_SUPPORT
 57 |     pmc_shutdown();
 58 | #endif
 59 | 
 60 |     unset_process_local_rank();
 61 | 
 62 |     //__cconfig_destroy(&cfg);
 63 | }
 64 | 
 65 | void init()
 66 | {
 67 |     config_t cfg;
 68 |     cpu_model_t* cpu;
 69 |     char* ld_preload_path;
 70 |     double start_time, end_time;
 71 | #ifdef CALIBRATION_SUPPORT
 72 |     int i;
 73 | #endif
 74 | 
 75 |     // FIXME: do we need to register the main thread with our system?
 76 |     // YES: for sure for single-threaded apps
 77 | 
 78 |     start_time = monotonic_time_us();
 79 | 
 80 |     // we reset LD_PRELOAD to ensure we don't get into recursive preloads when 
 81 |     // calling popen during initialization. before exiting we reactivate LD_PRELOAD 
 82 |     // to allow LD_PRELOADS on children
 83 |     ld_preload_path = getenv("LD_PRELOAD");
 84 |     unsetenv("LD_PRELOAD");
 85 | 
 86 |     if (__cconfig_init(&cfg, "nvmemul.ini") == CONFIG_FALSE) {
 87 |         goto error;
 88 |     }
 89 | 
 90 |     __cconfig_lookup_bool(&cfg, "latency.enable", &latency_model.enabled);
 91 |     __cconfig_lookup_bool(&cfg, "bandwidth.enable", &read_bw_model.enabled);
 92 | 
 93 |     if (dbg_init(&cfg, -1, NULL) != E_SUCCESS) {
 94 |         goto error;
 95 |     }
 96 | 
 97 |     if (init_interposition() != E_SUCCESS) {
 98 |         goto error;
 99 |     }
100 | 
101 |     if ((cpu = cpu_model()) == NULL) {
102 |         DBG_LOG(ERROR, "No supported processor found\n");
103 |         goto error;
104 |     }
105 | 
106 |     init_virtual_topology(&cfg, cpu, &virtual_topology);
107 | 
108 |     if (init_bandwidth_model(&cfg, virtual_topology) != E_SUCCESS) {
109 |         goto error;
110 |     }
111 | 
112 |     if (latency_model.enabled) {
113 |         if (init_latency_model(&cfg, cpu, virtual_topology) != E_SUCCESS) {
114 |    	        goto error;
115 |         }
116 | 
117 |         init_thread_manager(&cfg, virtual_topology);
118 | 
119 | #ifdef USE_STATISTICS
120 |         // statistics makes use of the thread manager and is used by the register_self()
121 |         stats_enable(&cfg);
122 | #endif
123 | 
124 |         set_process_local_rank();
125 | 
126 |         // thread manager must be initialized and local rank set
127 |         // CPU partitioning must be made before the first thread is registered
128 |         if (partition_cpus(virtual_topology) != E_SUCCESS) {
129 |             goto error;
130 |         }
131 | 
132 |         if (register_self() != E_SUCCESS) {
133 |             goto error;
134 |         }
135 | 
136 | #ifdef CALIBRATION_SUPPORT
137 |         // main thread is now tracked by the latency emulator
138 |         // first, calibrate the latency emulation
139 |         if (latency_model.calibration) {
140 |             for (i = 0; i < virtual_topology->num_virtual_nodes; ++i) {
141 |                 latency_calibration(&virtual_topology->virtual_nodes[i]);
142 |             }
143 |         }
144 | #endif
145 |         int write_latency;
146 |         __cconfig_lookup_int(&cfg, "latency.write", &write_latency);
147 |         init_pflush(cpu_speed_mhz(), write_latency);
148 |     }
149 | 
150 |     end_time = monotonic_time_us();
151 | 
152 | #ifdef USE_STATISTICS
153 |     if (latency_model.enabled) {
154 |         stats_set_init_time(end_time - start_time);
155 |     }
156 | #endif
157 | 
158 |     if (ld_preload_path)
159 |         setenv("LD_PRELOAD", ld_preload_path, 1);
160 | 
161 |     return;
162 | 
163 | error:
164 |     /* Cannot initialize library -- catastrophic error */
165 |     if (ld_preload_path)
166 |         setenv("LD_PRELOAD", ld_preload_path, 1);
167 | 
168 |     fprintf(stderr, "ERROR: nvmemul: Initialization failed. Running without non-volatile memory emulation.\n");
169 | }
170 | 


--------------------------------------------------------------------------------
/src/lib/interpose.c:
--------------------------------------------------------------------------------
  1 | /***************************************************************************
  2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
  3 | This program is free software; you can redistribute it and/or modify
  4 | it under the terms of the GNU General Public License as published by
  5 | the Free Software Foundation; either version 2 of the License, or (at
  6 | your option) any later version. This program is distributed in the
  7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
  8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  9 | PURPOSE. See the GNU General Public License for more details. You
 10 | should have received a copy of the GNU General Public License along
 11 | with this program; if not, write to the Free Software Foundation,
 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 13 | ***************************************************************************/
 14 | #define _GNU_SOURCE
 15 | #include <stdio.h>
 16 | #include <dlfcn.h>
 17 | #include <pthread.h>
 18 | #include <assert.h>
 19 | #include <signal.h>
 20 | #include "error.h"
 21 | #include "model.h"
 22 | #include "thread.h"
 23 | #include "cpu/cpu.h"
 24 | #ifdef PAPI_SUPPORT
 25 | #include "cpu/pmc-papi.h"
 26 | #else
 27 | #include "cpu/pmc.h"
 28 | #endif
 29 | 
 30 | 
 31 | // WARNING: Our library MUST directly use the functions we interpose on by 
 32 | // calling __lib_X to avoid interposition on ourselves.
 33 | 
 34 | 
 35 | int (*__lib_pthread_create)(pthread_t *thread, const pthread_attr_t *attr,
 36 |                               void *(*start_routine) (void *), void *arg);
 37 | int (*__lib_pthread_mutex_lock)(pthread_mutex_t *mutex);
 38 | int (*__lib_pthread_mutex_trylock)(pthread_mutex_t *mutex);
 39 | int (*__lib_pthread_mutex_unlock)(pthread_mutex_t *mutex);
 40 | int (*__lib_pthread_detach)(pthread_t thread);
 41 | 
 42 | extern inline hrtime_t hrtime_cycles(void);
 43 | extern inline int cycles_to_us(cpu_model_t* cpu, hrtime_t cycles);
 44 | 
 45 | 
 46 | int init_interposition()
 47 | {
 48 | 	char *error;
 49 |     // if no symbol is returned then no interposition needed
 50 |     __lib_pthread_create = dlsym(RTLD_NEXT, "pthread_create");
 51 |     __lib_pthread_mutex_lock = dlsym(RTLD_NEXT, "pthread_mutex_lock");
 52 |     __lib_pthread_mutex_trylock = dlsym(RTLD_NEXT, "pthread_mutex_trylock");
 53 |     __lib_pthread_mutex_unlock = dlsym(RTLD_NEXT, "pthread_mutex_unlock");
 54 |     __lib_pthread_detach = dlsym(RTLD_NEXT, "pthread_detach");
 55 | 
 56 |     if (__lib_pthread_mutex_lock == NULL || __lib_pthread_mutex_unlock == NULL ||
 57 |     	    __lib_pthread_create == NULL || __lib_pthread_mutex_trylock == NULL ||
 58 |     	    __lib_pthread_detach == NULL) {
 59 |     	error = dlerror();
 60 |     	DBG_LOG(ERROR, "Interposition failed: %s\n", error != NULL ? error : "unknown reason");
 61 |     	return E_ERROR;
 62 |     }
 63 | 
 64 |     return E_SUCCESS;
 65 | }
 66 | 
 67 | 
 68 | // Interposing on pthread_create requires interposing on the thread created as we 
 69 | // require the TID of that thread which we can only get by executing the gettid() 
 70 | // system call from that thread. So we interpose on the start_routine which is
 71 | // called by the new thread
 72 | typedef struct {
 73 |     void *(*start_routine) (void *);
 74 |     void *arg;
 75 | } pthread_create_functor_t;
 76 | 
 77 | void* __interposed_start_routine(void* args)
 78 | {
 79 |     void* ret;
 80 |     pthread_create_functor_t* f = (pthread_create_functor_t*) args;
 81 |     if (register_self() != E_SUCCESS) {
 82 |         free(args);
 83 |         return NULL;
 84 |     }
 85 |     ret = f->start_routine(f->arg);
 86 |     // FIXME: directly calling unregister may miss cases where the 
 87 |     // thread terminates prematurely (such as pthread_exit or cancel)
 88 |     // consider using a key destructor function instead
 89 |     //fprintf(stderr, "stall cycles: %lu\n", thread_self()->stall_cycles);
 90 |     //fprintf(stderr, "signals_sent: %lu signals_recv: %lu\n", thread_self()->signals_sent, thread_self()->signals_recv);
 91 |     unregister_self();
 92 |     free(args);
 93 |     return ret;
 94 | }
 95 | 
 96 | int pthread_create(pthread_t *thread, const pthread_attr_t *attr,
 97 |                    void *(*start_routine) (void *), void *arg)
 98 | {
 99 |     int ret;
100 | 
101 |     //DBG_LOG(DEBUG, "interposing pthread_create\n");
102 | 
103 |     //assert(__lib_pthread_create);
104 |     if (__lib_pthread_create == NULL)
105 |         init_interposition();
106 | 
107 |     if (latency_model.enabled) {
108 |         pthread_create_functor_t *functor = malloc(sizeof(pthread_create_functor_t));
109 |         functor->arg = arg;
110 |         functor->start_routine = start_routine;
111 | 
112 |         if ((ret = __lib_pthread_create(thread, attr, __interposed_start_routine, (void*) functor)) != 0) {
113 |             DBG_LOG(ERROR, "call to __lib_pthread_create failed\n");
114 |             return ret;
115 |         }
116 |     } else {
117 |         ret = __lib_pthread_create(thread, attr, start_routine, arg);
118 |     }
119 | 
120 |     return ret;    
121 | }
122 | 
123 | int pthread_mutex_lock(pthread_mutex_t *mutex)
124 | {
125 |     int err;
126 | 
127 |     if (latency_model.enabled) {
128 |         if(reached_min_epoch_duration(thread_self())) {
129 |             // create new epoch here in order to propagate only the critical session delay to other threads
130 |             // the thread monitor will keep trying to create new epoch, unless the min duration has not been reached
131 |             create_latency_epoch();
132 |         }
133 |     }
134 | 
135 |     //DBG_LOG(DEBUG, "interposing pthread_mutex_lock\n");
136 | 
137 |     //assert(__lib_pthread_mutex_lock);
138 |     if (__lib_pthread_mutex_lock == NULL)
139 |         init_interposition();
140 |     err =  __lib_pthread_mutex_lock(mutex);
141 | 
142 |     return err;
143 | }
144 | 
145 | int pthread_mutex_trylock(pthread_mutex_t *mutex)
146 | {
147 |     int err;
148 | 
149 |     if (latency_model.enabled) {
150 |         if(reached_min_epoch_duration(thread_self())) {
151 |             create_latency_epoch();
152 |         }
153 |     }
154 | 
155 |     //DBG_LOG(DEBUG, "interposing pthread_mutex_trylock\n");
156 | 
157 |     //assert(__lib_pthread_mutex_trylock);
158 |     if (__lib_pthread_mutex_trylock == NULL)
159 |         init_interposition();
160 |     err =  __lib_pthread_mutex_trylock(mutex);
161 | 
162 |     return err;
163 | }
164 | 
165 | int pthread_mutex_unlock(pthread_mutex_t *mutex)
166 | {
167 |     int err;
168 | 
169 |     if (latency_model.enabled) {
170 |         if (reached_min_epoch_duration(thread_self())) {
171 |             create_latency_epoch();
172 |         }
173 |     }
174 | 
175 |     //DBG_LOG(DEBUG, "interposing pthread_mutex_unlock\n");
176 | 
177 |     //assert(__lib_pthread_mutex_unlock);
178 |     if (__lib_pthread_mutex_unlock == NULL)
179 |         init_interposition();
180 |     err = __lib_pthread_mutex_unlock(mutex);
181 | 
182 |     return err;
183 | }
184 | 


--------------------------------------------------------------------------------
/src/lib/interpose.h:
--------------------------------------------------------------------------------
 1 | /***************************************************************************
 2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
 3 | This program is free software; you can redistribute it and/or modify
 4 | it under the terms of the GNU General Public License as published by
 5 | the Free Software Foundation; either version 2 of the License, or (at
 6 | your option) any later version. This program is distributed in the
 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | PURPOSE. See the GNU General Public License for more details. You
10 | should have received a copy of the GNU General Public License along
11 | with this program; if not, write to the Free Software Foundation,
12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
13 | ***************************************************************************/
14 | #ifndef __INTERPOSE_H
15 | #define __INTERPOSE_H
16 | 
17 | 
18 | /**
19 |  * 
20 |  * \page library_interposition Library interposition 
21 |  * 
22 |  * The emulator intercepts several events of interest. It achieves this
23 |  * by interposing on corresponding functions. 
24 |  * Currently this includes thread creation and POSIX synchronization mechanisms.
25 |  */
26 | 
27 | extern int (*__lib_pthread_create)(pthread_t *thread, const pthread_attr_t *attr,
28 |                                    void *(*start_routine) (void *), void *arg);
29 | extern int (*__lib_pthread_mutex_lock)(pthread_mutex_t *mutex);
30 | extern int (*__lib_pthread_mutex_trylock)(pthread_mutex_t *mutex);
31 | extern int (*__lib_pthread_mutex_unlock)(pthread_mutex_t *mutex);
32 | extern int (*__lib_pthread_detach)(pthread_t thread);
33 | 
34 | int init_interposition();
35 | 
36 | #endif /* __INTERPOSE_H */
37 | 


--------------------------------------------------------------------------------
/src/lib/measure.h:
--------------------------------------------------------------------------------
 1 | /***************************************************************************
 2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
 3 | This program is free software; you can redistribute it and/or modify
 4 | it under the terms of the GNU General Public License as published by
 5 | the Free Software Foundation; either version 2 of the License, or (at
 6 | your option) any later version. This program is distributed in the
 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | PURPOSE. See the GNU General Public License for more details. You
10 | should have received a copy of the GNU General Public License along
11 | with this program; if not, write to the Free Software Foundation,
12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
13 | ***************************************************************************/
14 | #ifndef __MEASURE_H
15 | #define __MEASURE_H
16 | 
17 | /**
18 |  * \file 
19 |  * 
20 |  * Memory latency and bandwidth measurements
21 |  */
22 | 
23 | /**
24 |  * \brief Measure memory read bandwidth
25 |  *
26 |  * Measures memory read bandwidth from a local socket (cpu_node) 
27 |  * to the memory of a remote socket (mem_node). It does this 
28 |  * by firing a bunch of threads issuing streaming instructions
29 |  * to saturate memory bandwidth. 
30 |  */
31 | double measure_read_bw(int cpu_node, int mem_node);
32 | 
33 | /**
34 |  * \brief Measure memory write bandwidth
35 |  *
36 |  * Measures memory write bandwidth from a local socket (cpu_node) 
37 |  * to the memory of a remote socket (mem_node).
38 |  * See measure_read_bw for how this is done.
39 |  */
40 | double measure_write_bw(int cpu_node, int mem_node);
41 | 
42 | 
43 | /** 
44 |  * \brief Measure memory latency 
45 |  * 
46 |  * Measures memory read latency from one local socket to the memory of a 
47 |  * remote socket. It does this using a pointer chasing microbenchmark.
48 |  * The microbenchmark setups an array where each element determines the
49 |  * element to be read next.
50 |  */ 
51 | int measure_latency(cpu_model_t* cpu, int from_node_id, int to_node_id);
52 | 
53 | /**
54 |  * \brief Calibrate memory latency
55 |  *
56 |  * Automatically tweaks the memory latency based on the detected hardware latency
57 |  * on the target systems.
58 |  */
59 | void latency_calibration();
60 | 
61 | #endif
62 | 


--------------------------------------------------------------------------------
/src/lib/misc.c:
--------------------------------------------------------------------------------
 1 | /***************************************************************************
 2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
 3 | This program is free software; you can redistribute it and/or modify
 4 | it under the terms of the GNU General Public License as published by
 5 | the Free Software Foundation; either version 2 of the License, or (at
 6 | your option) any later version. This program is distributed in the
 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | PURPOSE. See the GNU General Public License for more details. You
10 | should have received a copy of the GNU General Public License along
11 | with this program; if not, write to the Free Software Foundation,
12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
13 | ***************************************************************************/
14 | #include <stddef.h>
15 | #include <stdlib.h>
16 | #include <string.h>
17 | #include <ctype.h>
18 | 
19 | 
20 | #include <stdio.h>
21 | size_t string_to_size(char* str)
22 | {
23 |     size_t factor = 1;
24 |     size_t size;
25 |     long   val;
26 |     char*  endptr = 0;
27 | 
28 |     val = strtoull(str, &endptr, 10);
29 |     while(endptr && (endptr - str) < strlen(str) && !isalpha(*endptr)) {endptr++;}
30 | 
31 |     switch (endptr[0]) {
32 |         case 'K': case 'k':
33 |             factor = 1024LLU;
34 |             break;
35 |         case 'M': case 'm':
36 |             factor = 1024LLU*1024LLU;
37 |             break;
38 |         case 'G': case 'g':
39 |             factor = 1024LLU*1024LLU*1024LLU;
40 |             break;
41 |         default:
42 |             factor = 1;
43 |     }
44 |     size = factor * val;
45 |     return size;
46 | }
47 | 


--------------------------------------------------------------------------------
/src/lib/misc.h:
--------------------------------------------------------------------------------
 1 | /***************************************************************************
 2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
 3 | This program is free software; you can redistribute it and/or modify
 4 | it under the terms of the GNU General Public License as published by
 5 | the Free Software Foundation; either version 2 of the License, or (at
 6 | your option) any later version. This program is distributed in the
 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | PURPOSE. See the GNU General Public License for more details. You
10 | should have received a copy of the GNU General Public License along
11 | with this program; if not, write to the Free Software Foundation,
12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
13 | ***************************************************************************/
14 | #ifndef __MISC_H
15 | #define __MISC_H
16 | 
17 | size_t string_to_size(char* str);
18 | 
19 | #endif
20 | 


--------------------------------------------------------------------------------
/src/lib/model.h:
--------------------------------------------------------------------------------
 1 | /***************************************************************************
 2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
 3 | This program is free software; you can redistribute it and/or modify
 4 | it under the terms of the GNU General Public License as published by
 5 | the Free Software Foundation; either version 2 of the License, or (at
 6 | your option) any later version. This program is distributed in the
 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | PURPOSE. See the GNU General Public License for more details. You
10 | should have received a copy of the GNU General Public License along
11 | with this program; if not, write to the Free Software Foundation,
12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
13 | ***************************************************************************/
14 | #ifndef __MODEL_H
15 | #define __MODEL_H
16 | 
17 | #include "config.h"
18 | #include "cpu/cpu.h"
19 | #include "thread.h"
20 | #ifdef PAPI_SUPPORT
21 | #include "cpu/pmc-papi.h"
22 | #else
23 | #include "cpu/pmc.h"
24 | #endif
25 | 
26 | #define MAX_EPOCH_DURATION_US 1000000
27 | #define MIN_EPOCH_DURATION_US 1
28 | 
29 | typedef struct {
30 | 	int enabled;
31 |     int read_latency;
32 |     int write_latency;
33 |     int inject_delay;
34 | #ifdef CALIBRATION_SUPPORT
35 |     int calibration;
36 | #endif
37 | #ifdef PAPI_SUPPORT
38 |     read_stalls_t pmc_stall_local;
39 |     read_stalls_t pmc_stall_remote;
40 | #else
41 |     pmc_event_t* pmc_stall_cycles;
42 |     pmc_event_t* pmc_remote_dram;
43 |     int process_local_rank;
44 |     int max_local_processe_ranks;
45 | #endif
46 | 
47 |     double stalls_calibration_factor;
48 | } latency_model_t;
49 | 
50 | extern latency_model_t latency_model;
51 | 
52 | typedef struct {
53 |     unsigned int throttle_reg_val[MAX_THROTTLE_VALUE]; 
54 |     double bandwidth[MAX_THROTTLE_VALUE];
55 |     int npoints;
56 |     int enabled;
57 | } bw_model_t;
58 | 
59 | extern bw_model_t read_bw_model;
60 | extern bw_model_t write_bw_model;
61 | 
62 | int init_bandwidth_model(config_t* cfg, struct virtual_topology_s* topology);
63 | int init_latency_model(config_t* cfg, cpu_model_t* cpu, struct virtual_topology_s* virtual_topology);
64 | void init_thread_latency_model(thread_t *thread);
65 | 
66 | void create_latency_epoch();
67 | 
68 | #endif /* __MODEL_H */
69 | 


--------------------------------------------------------------------------------
/src/lib/monotonic_timer.c:
--------------------------------------------------------------------------------
 1 | // Copyright 2013 Alex Reece.
 2 | //
 3 | // A cross platform monotonic timer.
 4 | 
 5 | #include <unistd.h>
 6 | #include "monotonic_timer.h"
 7 | 
 8 | #if _POSIX_TIMERS > 0 && defined(_POSIX_MONOTONIC_CLOCK)
 9 |   // If we have it, use clock_gettime and CLOCK_MONOTONIC.
10 | 
11 |   #include <time.h>
12 | 
13 |   double monotonic_time() {
14 |     struct timespec time;
15 |     // Note: Make sure to link with -lrt to define clock_gettime.
16 |     clock_gettime(CLOCK_MONOTONIC, &time);
17 |     return ((double) time.tv_sec) + ((double) time.tv_nsec / (NANOS_PER_SECF));
18 |   }
19 | 
20 |   double monotonic_time_us() {
21 | 	  struct timespec time;
22 | 	  // Note: Make sure to link with -lrt to define clock_gettime.
23 | 	  clock_gettime(CLOCK_MONOTONIC, &time);
24 | 	  return ((double) (time.tv_sec * USECS_PER_SEC)) + ((double) time.tv_nsec / NANOS_PER_USECF);
25 |   }
26 | 
27 | #else
28 |   // Fall back to rdtsc. The reason we don't use clock() is this scary message
29 |   // from the man page:
30 |   //     "On several other implementations, the value returned by clock() also
31 |   //      includes the times of any children whose status has been collected via
32 |   //      wait(2) (or another wait-type call)."
33 |   //
34 |   // Also, clock() only has microsecond accuracy.
35 |   //
36 |   // This whitepaper offered excellent advice on how to use rdtscp for
37 |   // profiling: http://download.intel.com/embedded/software/IA/324264.pdf
38 |   //
39 |   // Unfortunately, we can't follow its advice exactly with our semantics,
40 |   // so we're just going to use rdtscp with cpuid.
41 |   //
42 |   // Note that rdtscp will only be available on new processors.
43 | 
44 |   #include <stdint.h>
45 | 
46 |   static inline uint64_t rdtsc() {
47 |     uint32_t hi, lo;
48 |     asm volatile("rdtscp\n"
49 |                  "movl %%edx, %0\n"
50 |                  "movl %%eax, %1\n"
51 |                  "cpuid"
52 |                  : "=r" (hi), "=r" (lo) : : "%rax", "%rbx", "%rcx", "%rdx");
53 |     return (((uint64_t)hi) << 32) | (uint64_t)lo;
54 |   }
55 | 
56 |   static uint64_t rdtsc_per_sec = 0;
57 |   static uint64_t rdtsc_per_usec = 0;
58 |   static void __attribute__((constructor)) init_rdtsc_per_sec() {
59 |     uint64_t before, after;
60 | 
61 |     before = rdtsc();
62 |     usleep(USECS_PER_SEC);
63 |     after = rdtsc();
64 | 
65 |     rdtsc_per_sec = after - before;
66 | 
67 |     before = rdtsc();
68 |     usleep(1);
69 |     after = rdtsc();
70 | 
71 |     rdtsc_per_usec = after - before;
72 |   }
73 | 
74 |   double monotonic_time() {
75 |     return (double) rdtsc() / (double) rdtsc_per_sec;
76 |   }
77 | 
78 |   // TODO: not tested, it is core specific and callers must be aware
79 |   double monotonic_time_us() {
80 |     return ((double) rdtsc() / (double) rdtsc_per_usec);
81 |   }
82 | 
83 | #endif
84 | 


--------------------------------------------------------------------------------
/src/lib/monotonic_timer.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2013 Alex Reece.
 2 | //
 3 | // A cross platform monotonic timer.
 4 | 
 5 | #ifndef MONOTONIC_TIMER_H_
 6 | #define MONOTONIC_TIMER_H_
 7 | 
 8 | #define NANOS_PER_SECF 1000000000.0
 9 | #define NANOS_PER_USECF 1000.0
10 | #define NANOS_PER_USEC 1000
11 | #define USECS_PER_SEC 1000000
12 | 
13 | // Returns seconds since some unspecified start time (guaranteed to be
14 | // monotonically increasing).
15 | double monotonic_time();
16 | double monotonic_time_us();
17 | 
18 | #endif  // MONOTONIC_TIMER_H_
19 | 


--------------------------------------------------------------------------------
/src/lib/pflush.c:
--------------------------------------------------------------------------------
  1 | /***************************************************************************
  2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
  3 | This program is free software; you can redistribute it and/or modify
  4 | it under the terms of the GNU General Public License as published by
  5 | the Free Software Foundation; either version 2 of the License, or (at
  6 | your option) any later version. This program is distributed in the
  7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
  8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  9 | PURPOSE. See the GNU General Public License for more details. You
 10 | should have received a copy of the GNU General Public License along
 11 | with this program; if not, write to the Free Software Foundation,
 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 13 | ***************************************************************************/
 14 | #include "pflush.h"
 15 | 
 16 | #include <stdint.h>
 17 | 
 18 | typedef uint64_t hrtime_t;
 19 | 
 20 | #if defined(__i386__)
 21 | 
 22 | static inline unsigned long long asm_rdtsc(void)
 23 | {
 24 |     unsigned long long int x;
 25 |     __asm__ volatile (".byte 0x0f, 0x31" : "=A" (x));
 26 |     return x;
 27 | }
 28 | 
 29 | static inline unsigned long long asm_rdtscp(void)
 30 | {
 31 |         unsigned hi, lo;
 32 |     __asm__ __volatile__ ("rdtscp" : "=a"(lo), "=d"(hi)::"ecx");
 33 |     return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
 34 | 
 35 | }
 36 | #elif defined(__x86_64__)
 37 | 
 38 | static inline unsigned long long asm_rdtsc(void)
 39 | {
 40 |     unsigned hi, lo;
 41 |     __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
 42 |     return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
 43 | }
 44 | 
 45 | static inline unsigned long long asm_rdtscp(void)
 46 | {
 47 |     unsigned hi, lo;
 48 |     __asm__ __volatile__ ("rdtscp" : "=a"(lo), "=d"(hi)::"rcx");
 49 |     return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
 50 | }
 51 | #else
 52 | #error "What architecture is this???"
 53 | #endif
 54 | 
 55 | /* Flush cacheline */
 56 | #define asm_clflush(addr)                   \
 57 | ({                              \
 58 |     __asm__ __volatile__ ("clflush %0" : : "m"(*addr)); \
 59 | })
 60 | 
 61 | /* Memory fence */
 62 | #define asm_mfence()                \
 63 | ({                      \
 64 |     PM_FENCE();             \
 65 |     __asm__ __volatile__ ("mfence");    \
 66 | })
 67 | 
 68 | static int global_cpu_speed_mhz = 0;
 69 | static int global_write_latency_ns = 0;
 70 | 
 71 | void init_pflush(int cpu_speed_mhz, int write_latency_ns)
 72 | {
 73 |     global_cpu_speed_mhz = cpu_speed_mhz;
 74 |     global_write_latency_ns = write_latency_ns;
 75 | }
 76 | 
 77 | inline hrtime_t cycles_to_ns(int cpu_speed_mhz, hrtime_t cycles)
 78 | {
 79 |     return (cycles*1000/cpu_speed_mhz);
 80 | }
 81 | 
 82 | inline hrtime_t ns_to_cycles(int cpu_speed_mhz, hrtime_t ns)
 83 | {
 84 |     return (ns*cpu_speed_mhz/1000);
 85 | }
 86 | 
 87 | static inline
 88 | void
 89 | emulate_latency_ns(int ns)
 90 | {
 91 |     hrtime_t cycles;
 92 |     hrtime_t start;
 93 |     hrtime_t stop;
 94 |     
 95 |     start = asm_rdtsc();
 96 |     cycles = ns_to_cycles(global_cpu_speed_mhz, ns);
 97 | 
 98 |     do { 
 99 |         /* RDTSC doesn't necessarily wait for previous instructions to complete 
100 |          * so a serializing instruction is usually used to ensure previous 
101 |          * instructions have completed. However, in our case this is a desirable
102 |          * property since we want to overlap the latency we emulate with the
103 |          * actual latency of the emulated instruction. 
104 |          */
105 |         stop = asm_rdtsc();
106 |     } while (stop - start < cycles);
107 | }
108 | 
109 | void
110 | pflush(uint64_t *addr)
111 | {
112 |     if (global_write_latency_ns == 0) {
113 |         return;
114 |     }
115 | 
116 |     /* Measure the latency of a clflush and add an additional delay to
117 |      * meet the latency to write to NVM */
118 |     hrtime_t start;
119 |     hrtime_t stop;
120 |     start = asm_rdtscp();
121 |     asm_clflush(addr);  
122 |     stop = asm_rdtscp();
123 |     int to_insert_ns = global_write_latency_ns - cycles_to_ns(global_cpu_speed_mhz, stop-start);
124 |     if (to_insert_ns <= 0) {
125 |         return;
126 |     }
127 |     emulate_latency_ns(to_insert_ns);
128 | }
129 | 


--------------------------------------------------------------------------------
/src/lib/pflush.h:
--------------------------------------------------------------------------------
 1 | /***************************************************************************
 2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
 3 | This program is free software; you can redistribute it and/or modify
 4 | it under the terms of the GNU General Public License as published by
 5 | the Free Software Foundation; either version 2 of the License, or (at
 6 | your option) any later version. This program is distributed in the
 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | PURPOSE. See the GNU General Public License for more details. You
10 | should have received a copy of the GNU General Public License along
11 | with this program; if not, write to the Free Software Foundation,
12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
13 | ***************************************************************************/
14 | #ifndef __PFLUSH_H
15 | #define __PFLUSH_H
16 | 
17 | /**
18 |  * \file
19 |  * 
20 |  * \page pflush_api Persistent Memory API 
21 |  *
22 |  * Method to be used by client to inject a write latency.
23 |  */
24 | 
25 | #include <stdint.h>
26 | 
27 | #ifdef __cplusplus
28 | extern "C" {
29 | #endif
30 | 
31 | void init_pflush(int cpu_speed_mhz, int write_latency_ns);
32 | 
33 | /**
34 |  * \brief Flush the cacheline containing address addr.
35 |  */
36 | void pflush(uint64_t *addr);
37 | 
38 | #ifdef __cplusplus
39 | }
40 | #endif
41 | 
42 | #endif /* __PFLUSH_H */
43 | 


--------------------------------------------------------------------------------
/src/lib/pmalloc.c:
--------------------------------------------------------------------------------
 1 | /***************************************************************************
 2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
 3 | This program is free software; you can redistribute it and/or modify
 4 | it under the terms of the GNU General Public License as published by
 5 | the Free Software Foundation; either version 2 of the License, or (at
 6 | your option) any later version. This program is distributed in the
 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | PURPOSE. See the GNU General Public License for more details. You
10 | should have received a copy of the GNU General Public License along
11 | with this program; if not, write to the Free Software Foundation,
12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
13 | ***************************************************************************/
14 | #include <numa.h>
15 | #include "topology.h"
16 | #include "pmalloc.h"
17 | #include "thread.h"
18 | #include "debug.h"
19 | 
20 | // pmalloc should be implemented as a separate library
21 | 
22 | // FIXME: pmalloc currently uses numa_alloc_onnode() which is slower than regular malloc.
23 | // Consider layering another malloc on top of a emulated nvram 
24 | 
25 | 
26 | void* pmalloc(size_t size)
27 | {
28 |     thread_t* thread = thread_self();
29 | 
30 |     if (thread == NULL) {
31 |     	// FIXME: JVM for instance create threads using a mechanism not traced by this emulator
32 |     	//        for now we make sure the current thread is registered right when it makes the
33 |     	//        first explicit NVM allocation. A better solution is to trace the thread creation
34 |     	//        done by JVM.
35 |         register_self();
36 |         thread = thread_self();
37 |     }
38 | 
39 |     if (thread) {
40 |         return numa_alloc_onnode(size, thread->virtual_node->nvram_node->node_id);
41 |     } else {
42 |     	DBG_LOG(ERROR, "pmalloc called with NULL thread\n");
43 |     }
44 |     
45 |     return NULL;
46 | }
47 | 
48 | void *prealloc(void *old_addr, size_t old_size, size_t new_size)
49 | {
50 |     return numa_realloc(old_addr, old_size, new_size);
51 | }
52 | 
53 | void pfree(void* start, size_t size)
54 | {
55 |     numa_free(start, size);
56 | }
57 | 


--------------------------------------------------------------------------------
/src/lib/pmalloc.h:
--------------------------------------------------------------------------------
 1 | /***************************************************************************
 2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
 3 | This program is free software; you can redistribute it and/or modify
 4 | it under the terms of the GNU General Public License as published by
 5 | the Free Software Foundation; either version 2 of the License, or (at
 6 | your option) any later version. This program is distributed in the
 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | PURPOSE. See the GNU General Public License for more details. You
10 | should have received a copy of the GNU General Public License along
11 | with this program; if not, write to the Free Software Foundation,
12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
13 | ***************************************************************************/
14 | #ifndef __PMALLOC_H
15 | #define __PMALLOC_H
16 | 
17 | /**
18 |  * \file
19 |  * 
20 |  * \page pmalloc_api Persistent Memory API 
21 |  *
22 |  * Methods to be used by clients to allocate and free emulated NVRAM.
23 |  */
24 | 
25 | #include <stddef.h>
26 | 
27 | #ifdef __cplusplus
28 | extern "C" {
29 | #endif
30 | 
31 | void *pmalloc(size_t size);
32 | void *prealloc(void *old_addr, size_t old_size, size_t new_size);
33 | void pfree(void *start, size_t size);
34 | 
35 | #ifdef __cplusplus
36 | }
37 | #endif
38 | 
39 | #endif /* __PMALLOC_H */
40 | 


--------------------------------------------------------------------------------
/src/lib/process_rank.c:
--------------------------------------------------------------------------------
  1 | /***************************************************************************
  2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
  3 | This program is free software; you can redistribute it and/or modify
  4 | it under the terms of the GNU General Public License as published by
  5 | the Free Software Foundation; either version 2 of the License, or (at
  6 | your option) any later version. This program is distributed in the
  7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
  8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  9 | PURPOSE. See the GNU General Public License for more details. You
 10 | should have received a copy of the GNU General Public License along
 11 | with this program; if not, write to the Free Software Foundation,
 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 13 | ***************************************************************************/
 14 | /*
 15 |  * process_rank.c
 16 |  *
 17 |  *  Created on: Jun 16, 2015
 18 |  *      Author: root
 19 |  */
 20 | 
 21 | 
 22 | #include <unistd.h>
 23 | #include "model.h"
 24 | #include "error.h"
 25 | 
 26 | #define EMUL_LOCAL_PROCESSES_VAR "EMUL_LOCAL_PROCESSES"
 27 | 
 28 | #define EMUL_LOCK_FILE "/tmp/emul_lock_file"
 29 | #define EMUL_PROCESS_LOCAL_RANK_FILE "/tmp/emul_process_local_rank"
 30 | #define LOCKED_WAIT_US 1000
 31 | #define MAX_LOCKED_RETRIES 50
 32 | 
 33 | extern latency_model_t latency_model;
 34 | 
 35 | int set_process_local_rank()
 36 | {
 37 |     FILE *flock = NULL;
 38 |     FILE *fcounter = NULL;
 39 |     int expired = 0;
 40 |     int process_id = 0;
 41 |     char *processes;
 42 |     int ret = E_SUCCESS;
 43 | #ifndef NDEBUG
 44 |     char hname[64];
 45 | #endif
 46 | 
 47 |     processes = getenv(EMUL_LOCAL_PROCESSES_VAR);
 48 | 
 49 |     if (!processes) {
 50 |     	DBG_LOG(WARNING, "No %s variable set, skipping rank setting\n", EMUL_LOCAL_PROCESSES_VAR);
 51 |     	return E_SUCCESS;
 52 |     } else {
 53 |     	if (sscanf(processes, "%d", &latency_model.max_local_processe_ranks) != 1) {
 54 |     		DBG_LOG(WARNING, "Ignoring EMUL_PROCESSES_PER_SYSTEM variable with invalid value '%s'\n", processes);
 55 |     		return E_SUCCESS;
 56 |     	}
 57 |     }
 58 | 
 59 |     if (latency_model.max_local_processe_ranks < 2) {
 60 |     	DBG_LOG(WARNING, "EMUL_PROCESSES_PER_SYSTEM value is %d, skipping rank setting\n",
 61 |     			latency_model.max_local_processe_ranks);
 62 |     	return E_SUCCESS;
 63 |     }
 64 | 
 65 |     DBG_LOG(DEBUG, "setting process local rank for %d local processes\n",
 66 |     		latency_model.max_local_processe_ranks);
 67 | 
 68 |     while (expired < MAX_LOCKED_RETRIES) {
 69 |     	// open lock file on exclusive mode
 70 |         flock = fopen(EMUL_LOCK_FILE, "wx");
 71 | 
 72 |         if (flock == NULL) {
 73 | //        	DBG_LOG(DEBUG, "failed to create lock file\n");
 74 |             usleep(LOCKED_WAIT_US);
 75 |             expired++;
 76 |         }
 77 |         if (flock) break;
 78 |     }
 79 |     if (expired >= MAX_LOCKED_RETRIES) {
 80 |     	DBG_LOG(ERROR, "failed to set process local rank\n");
 81 |     	return E_ERROR;
 82 |     }
 83 | 
 84 |     // lock acquired, read process counter file
 85 |     if (access(EMUL_PROCESS_LOCAL_RANK_FILE, R_OK | W_OK) < 0) {
 86 |     	// rank file does not exist, create it and write "1" for next process
 87 |     	// this process rank id is 1
 88 |     	process_id = 1;
 89 |     	fcounter = fopen(EMUL_PROCESS_LOCAL_RANK_FILE, "w");
 90 |     	fwrite(&process_id, sizeof(int), 1, fcounter);
 91 |     	fclose(fcounter);
 92 |     } else {
 93 |     	// rank file exists, read the current rank max value and use it as this process local
 94 |     	// rank id and increment the value in the rank file for the next process
 95 |     	fcounter = fopen(EMUL_PROCESS_LOCAL_RANK_FILE, "r+");
 96 |     	if (fread(&process_id, sizeof(int), 1, fcounter) == 0) {
 97 |     	    abort();
 98 |     	}
 99 |     	DBG_LOG(DEBUG, "read from file current max rank %d\n", process_id);
100 |     	latency_model.process_local_rank = process_id;
101 |     	process_id++;
102 |     	if (process_id >= latency_model.max_local_processe_ranks) {
103 |     	    DBG_LOG(ERROR, "process rank %d exceeded limit of %d max emulated processes\n",
104 |     	        process_id, latency_model.max_local_processe_ranks);
105 |     	    fclose(fcounter);
106 |     	    ret = E_ERROR;
107 |     	} else {
108 |     	    DBG_LOG(DEBUG, "write to file new max rank %d\n", process_id);
109 |     	    rewind(fcounter);
110 |             fwrite(&process_id, sizeof(int), 1, fcounter);
111 |             fclose(fcounter);
112 |         }
113 |     }
114 | 
115 |     // close and delete lock file
116 |     fclose(flock);
117 |     remove(EMUL_LOCK_FILE);
118 | 
119 | #ifndef NDEBUG
120 |     gethostname(hname, sizeof(hname));
121 |     DBG_LOG(DEBUG, "process local rank is %d on system %s\n", latency_model.process_local_rank, hname);
122 | #endif
123 | 
124 |     return ret;
125 | }
126 | 
127 | int unset_process_local_rank()
128 | {
129 |     FILE *flock = NULL;
130 |     FILE *fcounter = NULL;
131 |     int expired = 0;
132 |     int process_id;
133 | 
134 |     if (latency_model.max_local_processe_ranks < 2) {
135 |     	return E_SUCCESS;
136 |     }
137 | 
138 |     DBG_LOG(DEBUG, "Unsetting process local rank\n");
139 | 
140 |     while (expired < MAX_LOCKED_RETRIES) {
141 |     	// open lock file on Exclusive mode
142 |         flock = fopen(EMUL_LOCK_FILE, "wx");
143 | 
144 |         if (flock == NULL) {
145 | //        	DBG_LOG(DEBUG, "failed to create lock file\n");
146 |             usleep(LOCKED_WAIT_US);
147 |             expired++;
148 |         }
149 |         if (flock) break;
150 |     }
151 |     if (expired >= MAX_LOCKED_RETRIES) {
152 |     	DBG_LOG(ERROR, "failed to unset process local rank\n");
153 |     	return E_ERROR;
154 |     }
155 | 
156 |     // lock acquired, read process counter file
157 |     if (access(EMUL_PROCESS_LOCAL_RANK_FILE, R_OK | W_OK) == 0) {
158 |     	// if rank file does not exist, nothing to be done
159 |     	// file exists, read the current value and decrement it
160 |     	fcounter = fopen(EMUL_PROCESS_LOCAL_RANK_FILE, "r+");
161 |     	if (fread(&process_id, sizeof(int), 1, fcounter) == 0) {
162 |     	    abort();
163 |     	}
164 |     	DBG_LOG(DEBUG, "Exiting process and reading current rank max %d\n", process_id);
165 |     	if (process_id > 0) process_id--;
166 |     	{
167 |     	char hname[64];
168 |     	gethostname(hname, sizeof(hname));
169 |     	DBG_LOG(DEBUG, "Exiting process and writing new rank max %d on %s\n", process_id, hname);
170 |     	}
171 |     	rewind(fcounter);
172 | 		fwrite(&process_id, sizeof(int), 1, fcounter);
173 | 		fclose(fcounter);
174 |     }
175 | 
176 |     // close and delete lock file
177 |     fclose(flock);
178 |     remove(EMUL_LOCK_FILE);
179 | 
180 |     return E_SUCCESS;
181 | }
182 | 


--------------------------------------------------------------------------------
/src/lib/stat.c:
--------------------------------------------------------------------------------
  1 | /***************************************************************************
  2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
  3 | This program is free software; you can redistribute it and/or modify
  4 | it under the terms of the GNU General Public License as published by
  5 | the Free Software Foundation; either version 2 of the License, or (at
  6 | your option) any later version. This program is distributed in the
  7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
  8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  9 | PURPOSE. See the GNU General Public License for more details. You
 10 | should have received a copy of the GNU General Public License along
 11 | with this program; if not, write to the Free Software Foundation,
 12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 13 | ***************************************************************************/
 14 | #include <stdio.h>
 15 | #include <math.h>
 16 | #include <sys/types.h>
 17 | #include <unistd.h>
 18 | 
 19 | #include "utlist.h"
 20 | #include "stat.h"
 21 | #include "thread.h"
 22 | #include "interpose.h"
 23 | #include "model.h"
 24 | 
 25 | thread_manager_t* get_thread_manager();
 26 | hrtime_t cycles_to_us(int cpu_speed_mhz, hrtime_t cycles);
 27 | 
 28 | #ifdef USE_STATISTICS
 29 | void stats_set_init_time(double init_time_us) {
 30 | 	thread_manager_t* thread_manager = get_thread_manager();
 31 | 
 32 | 	__lib_pthread_mutex_lock(&thread_manager->mutex);
 33 | 	thread_manager->stats.init_time_us = init_time_us;
 34 | 	__lib_pthread_mutex_unlock(&thread_manager->mutex);
 35 | }
 36 | 
 37 | void stats_enable(config_t *cfg) {
 38 | 	thread_manager_t* thread_manager = get_thread_manager();
 39 | 
 40 |     __cconfig_lookup_bool(cfg, "statistics.enable", &thread_manager->stats.enabled);
 41 |     if (__cconfig_lookup_string(cfg, "statistics.file", &thread_manager->stats.output_file) == CONFIG_FALSE) {
 42 |     	__lib_pthread_mutex_lock(&thread_manager->mutex);
 43 |     	thread_manager->stats.output_file = NULL;
 44 |     	__lib_pthread_mutex_unlock(&thread_manager->mutex);
 45 |     }
 46 | }
 47 | 
 48 | static char *get_current_time() {
 49 |     time_t curtime;
 50 |     char *str_time;
 51 | 
 52 |     time(&curtime);
 53 |     str_time = ctime(&curtime);
 54 |     str_time[strlen(str_time) - 1] = 0;
 55 | 
 56 |     return str_time;
 57 | }
 58 | 
 59 | static inline hrtime_t ns_to_cycles(int cpu_speed_mhz, int ns)
 60 | {
 61 |     return (cpu_speed_mhz * ns) / 1000;
 62 | }
 63 | 
 64 | extern __thread int tls_hw_local_latency;
 65 | extern __thread int tls_hw_remote_latency;
 66 | 
 67 | static void show_thread_stats(thread_t *thread, FILE *out_file) {
 68 |     uint64_t fixed_value;
 69 |     uint64_t cycles;
 70 | 
 71 |     fprintf(out_file, "\tThread id [%d]\n", thread->tid);
 72 |     fprintf(out_file, "\t\t: cpu id: %d\n", thread->cpu_id);
 73 |     fprintf(out_file, "\t\t: spawn timestamp: %lu\n", thread->stats.register_timestamp);
 74 |     fprintf(out_file, "\t\t: termination timestamp: %lu\n", thread->stats.unregister_timestamp);
 75 |     fixed_value = thread->stats.unregister_timestamp > 0 ? (thread->stats.unregister_timestamp - thread->stats.register_timestamp) : 0;
 76 |     fprintf(out_file, "\t\t: execution time: %lu usecs\n", fixed_value);
 77 |     fprintf(out_file, "\t\t: stall cycles: %lu\n", thread->stats.stall_cycles);
 78 | 
 79 |     if (thread->virtual_node->dram_node != thread->virtual_node->nvram_node &&
 80 |                 latency_model.pmc_remote_dram) {
 81 |         cycles = ns_to_cycles(thread->cpu_speed_mhz, tls_hw_remote_latency);
 82 |         fixed_value = cycles ? thread->stats.stall_cycles / cycles : 0;
 83 |     }
 84 |     else {
 85 |         cycles = ns_to_cycles(thread->cpu_speed_mhz, tls_hw_local_latency);
 86 |         fixed_value = cycles ? thread->stats.stall_cycles / cycles : 0;
 87 |     }
 88 |     fprintf(out_file, "\t\t: NVM accesses: %lu\n", fixed_value);
 89 | 
 90 | 
 91 |     fprintf(out_file, "\t\t: latency calculation overhead cycles: %lu\n", thread->stats.overhead_cycles);
 92 |     fprintf(out_file, "\t\t: injected delay cycles: %lu\n", thread->stats.delay_cycles);
 93 |     if (thread->cpu_speed_mhz) {
 94 |         fprintf(out_file, "\t\t: injected delay in usec: %lu\n", cycles_to_us(thread->cpu_speed_mhz, thread->stats.delay_cycles));
 95 |     }
 96 |     fprintf(out_file, "\t\t: longest epoch duration: %lu usec\n", thread->stats.longest_epoch_duration_us);
 97 |     fixed_value = (thread->stats.shortest_epoch_duration_us == UINT64_MAX) ? 0 : thread->stats.shortest_epoch_duration_us;
 98 |     fprintf(out_file, "\t\t: shortest epoch duration: %lu usec\n", fixed_value);
 99 |     fixed_value = thread->stats.epochs ? (thread->stats.overall_epoch_duration_us / thread->stats.epochs) :
100 |     		thread->stats.overall_epoch_duration_us;
101 |     fprintf(out_file, "\t\t: average epoch duration: %lu usec\n", fixed_value);
102 |     fprintf(out_file, "\t\t: number of epochs: %lu\n", thread->stats.epochs);
103 |     fprintf(out_file, "\t\t: epochs which didn't reach min duration: %lu\n", thread->stats.min_epoch_not_reached);
104 |     fprintf(out_file, "\t\t: static epochs requested: %lu\n", thread->stats.signals_sent);
105 | }
106 | 
107 | void stats_report() {
108 |     thread_t *thread;
109 |     FILE *out_file;
110 |     uint64_t running_threads = 0;
111 |     thread_manager_t* thread_manager = get_thread_manager();
112 |     uint64_t terminated_threads;
113 | 
114 |     if (!thread_manager) return;
115 |     if (!thread_manager->stats.enabled) return;
116 | 
117 |     if (thread_manager->stats.output_file) {
118 |         out_file = fopen(thread_manager->stats.output_file, "a");
119 |         if (!out_file) {
120 |             fprintf(stderr, "Failed to open statistics file for writing: %s\n", thread_manager->stats.output_file);
121 |             return;
122 |         }
123 |     } else {
124 |         out_file = stdout;
125 |     }
126 | 
127 |     __lib_pthread_mutex_lock(&thread_manager->mutex);
128 |     LL_FOREACH(thread_manager->thread_list, thread) {
129 |         running_threads++;
130 |     }
131 |     __lib_pthread_mutex_unlock(&thread_manager->mutex);
132 | 
133 |     fprintf(out_file, "\n\n===== STATISTICS (%s) =====\n\n", get_current_time());
134 |     if (!latency_model.inject_delay) {
135 |     	fprintf(out_file, "WARNING: delay injection is disabled\n");
136 |     }
137 |     fprintf(out_file, "PID: %d\n", getpid());
138 |     fprintf(out_file, "Initialization duration: %lu usec\n", thread_manager->stats.init_time_us);
139 |     fprintf(out_file, "Running threads: %lu\n", running_threads);
140 |     terminated_threads = thread_manager->stats.n_threads > 0 ? (thread_manager->stats.n_threads - running_threads) : 0;
141 |     fprintf(out_file, "Terminated threads: %lu\n", terminated_threads);
142 |     fprintf(out_file, "\n");
143 | 
144 |     fprintf(out_file, "== Running threads == \n");
145 | 
146 |     __lib_pthread_mutex_lock(&thread_manager->mutex);
147 |     LL_FOREACH(thread_manager->thread_list, thread) {
148 |     	show_thread_stats(thread, out_file);
149 |     }
150 |     __lib_pthread_mutex_unlock(&thread_manager->mutex);
151 | 
152 |     fprintf(out_file, "\n== Terminated threads == \n");
153 | 
154 |     __lib_pthread_mutex_lock(&thread_manager->mutex);
155 |     LL_FOREACH(thread_manager->stats.thread_list, thread) {
156 |     	show_thread_stats(thread, out_file);
157 |     }
158 |     __lib_pthread_mutex_unlock(&thread_manager->mutex);
159 | 
160 |     if (out_file != stdout) {
161 |         fclose(out_file);
162 |     }
163 | }
164 | #endif
165 | 
166 | double sum(double array[], int n)
167 | {
168 |     int i;
169 |     double s = 0;
170 | 
171 |     for (i=0; i<n; i++) {
172 |         s += array[i];
173 |     }
174 |     return s;
175 | }
176 | 
177 | // returns sum of x . y
178 | double sumxy(double x[], double y[], int n)
179 | {
180 |     int i;
181 |     double s = 0;
182 | 
183 |     for (i=0; i<n; i++) {
184 |         s += x[i] * y[i];
185 |     }
186 |     return s;
187 | }
188 | 
189 | 
190 | double avg(double array[], int n)
191 | {
192 |     double s;
193 | 
194 |     s = sum(array, n);
195 |     return s/n;
196 | }
197 | 
198 | double slope(double x[], double y[], int n)
199 | {
200 |     double sumxy_;
201 |     double sumx2;
202 |     double sumx;
203 |     double sumy;
204 |     double m; 
205 | 
206 |     sumxy_ = sumxy(x, y, n);
207 |     sumx2 = sumxy(x, x, n);
208 |     sumx = sum(x, n);
209 |     sumy = sum(y, n);
210 | 
211 |     m = (n * sumxy_ - sumx * sumy) / 
212 |         (n * sumx2 - sumx*sumx);
213 |     return m;
214 | }
215 | 


--------------------------------------------------------------------------------
/src/lib/stat.h:
--------------------------------------------------------------------------------
 1 | /***************************************************************************
 2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
 3 | This program is free software; you can redistribute it and/or modify
 4 | it under the terms of the GNU General Public License as published by
 5 | the Free Software Foundation; either version 2 of the License, or (at
 6 | your option) any later version. This program is distributed in the
 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | PURPOSE. See the GNU General Public License for more details. You
10 | should have received a copy of the GNU General Public License along
11 | with this program; if not, write to the Free Software Foundation,
12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
13 | ***************************************************************************/
14 | #ifndef __STATISTICS_H
15 | #define __STATISTICS_H
16 | 
17 | //#include <sys/types.h>
18 | #include <stdint.h>
19 | #include "config.h"
20 | 
21 | #ifdef USE_STATISTICS
22 | struct thread_s;
23 | 
24 | typedef struct {
25 |     int enabled;
26 |     struct thread_s* thread_list;
27 |     uint64_t n_threads;
28 |     uint64_t init_time_us;
29 |     char *output_file;
30 | } stats_t;
31 | 
32 | typedef struct {
33 |     uint64_t stall_cycles;
34 |     uint64_t overhead_cycles;
35 |     uint64_t delay_cycles;
36 |     uint64_t signals_sent;
37 |     uint64_t epochs;
38 |     double last_epoch_timestamp;
39 |     uint64_t shortest_epoch_duration_us;
40 |     uint64_t longest_epoch_duration_us;
41 |     uint64_t overall_epoch_duration_us;
42 |     uint64_t min_epoch_not_reached;
43 |     uint64_t register_timestamp;
44 |     uint64_t unregister_timestamp;
45 | } thread_stats_t;
46 | 
47 | void stats_enable(config_t *cfg);
48 | void stats_set_init_time(double init_time_us);
49 | void stats_report();
50 | #endif
51 | 
52 | double sum(double array[], int n);
53 | double sumxy(double x[], double y[], int n);
54 | double avg(double array[], int n);
55 | double slope(double x[], double y[], int n);
56 | 
57 | #endif /* __STATISTICS_H */
58 | 


--------------------------------------------------------------------------------
/src/lib/thread.h:
--------------------------------------------------------------------------------
 1 | /***************************************************************************
 2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
 3 | This program is free software; you can redistribute it and/or modify
 4 | it under the terms of the GNU General Public License as published by
 5 | the Free Software Foundation; either version 2 of the License, or (at
 6 | your option) any later version. This program is distributed in the
 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | PURPOSE. See the GNU General Public License for more details. You
10 | should have received a copy of the GNU General Public License along
11 | with this program; if not, write to the Free Software Foundation,
12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
13 | ***************************************************************************/
14 | #ifndef __THREAD_H
15 | #define __THREAD_H
16 | 
17 | #include <sys/types.h>
18 | #include <stdint.h>
19 | #include <numa.h>
20 | #include <pthread.h>
21 | #include <libconfig.h>
22 | #include "topology.h"
23 | #include "cpu/cpu.h"
24 | #include "stat.h"
25 | 
26 | 
27 | struct thread_manager_s; // opaque
28 | 
29 | typedef uint64_t hrtime_t;
30 | 
31 | // TODO: Used by memlat benchmark, should be disabled on a release version
32 | #define MEMLAT_SUPPORT
33 | 
34 | typedef struct thread_s {
35 |     struct virtual_node_s* virtual_node;
36 |     pthread_t pthread;
37 |     pid_t tid;
38 |     int cpu_id; // the processor the thread is bound on
39 |     int cpu_speed_mhz;
40 |     struct thread_manager_s* thread_manager;
41 |     struct thread_s* next;
42 |     int signaled;
43 | #ifdef MEMLAT_SUPPORT
44 | 	uint64_t stall_cycles;
45 | #endif
46 | #ifdef USE_STATISTICS
47 |     thread_stats_t stats;
48 | #else
49 |     double last_epoch_timestamp;
50 | #endif
51 | } thread_t;
52 | 
53 | typedef struct thread_manager_s {
54 |     pthread_mutex_t mutex;
55 |     thread_t* thread_list;
56 |     int max_epoch_duration_us; // maximum epoch duration in microseconds
57 |     int min_epoch_duration_us; // minimum epoch duration in microseconds
58 |     int next_virtual_node_id; // used by the round-robin policy -- next virtual node to run on 
59 |     int next_cpu_id; // used by the round-robin policy -- next cpu to run on
60 |     struct virtual_topology_s* virtual_topology;   
61 | #ifdef USE_STATISTICS
62 |     stats_t stats;
63 | #endif
64 | } thread_manager_t; 
65 | 
66 | int init_thread_manager(config_t* cfg, struct virtual_topology_s* virtual_topology);
67 | int register_self();
68 | int unregister_self();
69 | thread_t* thread_self();
70 | int reached_min_epoch_duration(thread_t* thread);
71 | void block_new_epoch();
72 | void unblock_new_epoch();
73 | 
74 | #endif /* __THREAD_H */
75 | 


--------------------------------------------------------------------------------
/src/lib/topology.h:
--------------------------------------------------------------------------------
 1 | /***************************************************************************
 2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
 3 | This program is free software; you can redistribute it and/or modify
 4 | it under the terms of the GNU General Public License as published by
 5 | the Free Software Foundation; either version 2 of the License, or (at
 6 | your option) any later version. This program is distributed in the
 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | PURPOSE. See the GNU General Public License for more details. You
10 | should have received a copy of the GNU General Public License along
11 | with this program; if not, write to the Free Software Foundation,
12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
13 | ***************************************************************************/
14 | #ifndef __TOPOLOGY_H
15 | #define __TOPOLOGY_H
16 | 
17 | #include <numa.h>
18 | #include "config.h"
19 | #include "cpu/cpu.h"
20 | #include "dev.h"
21 | 
22 | /* DOXYGEN Documentation : */
23 | 
24 | /**
25 |     \page virtual_topology Virtual topology
26 |  
27 |     The emulator constructs a topology of virtual nodes out of physical nodes
28 |     (i.e., NUMA sockets) that represents the arrangement of processors, DRAM, 
29 |     and NVRAM of the virtual machine that the emulator emulates. 
30 | 
31 |     Currently, the emulator supports a NUMA virtual topology where essentially
32 |     two physical sockets are fused into a single virtual node. Each virtual 
33 |     node comprises the processors from one socket only (active socket), and 
34 |     DRAM from both two physical sockets. The DRAM attached to the active socket
35 |     is used as the virtual node's locally attached DRAM and the DRAM of the other 
36 |     socket (passive) is used as the virtual node's locally attached NVRAM.
37 |     This topology allows us to emulate a machine that has both DRAM and NVRAM but
38 |     reduces the computation capacity of the machine to half.
39 |     
40 |     In the future we would like to support a topology that matches the shared NVRAM
41 |     storage of The Machine.
42 | 
43 |  */
44 |  
45 | 
46 | 
47 | typedef struct {
48 |     int node_id;
49 |     cpu_model_t* cpu_model;
50 |     pci_regs_t  *mc_pci_regs;
51 |     int num_cpus; // number of node's cpus
52 |     struct bitmask* cpu_bitmask; // a bitmask of the node's CPUs 
53 | 
54 |     // this is actual physical latency. the latency number though depends on 
55 |     // whether the node corresponds to a dram node or a nvram node. 
56 |     // if dram then latency is the measured local latency to dram.
57 |     // if nvram then latency is the measured remote latency to the sibling nvram node
58 |     int latency; 
59 | } physical_node_t;
60 | 
61 | typedef struct virtual_node_s {
62 |     int node_id;
63 |     physical_node_t* dram_node;
64 |     physical_node_t* nvram_node;
65 |     //cpu_model_t* cpu_model;
66 | } virtual_node_t;
67 | 
68 | typedef struct virtual_topology_s {
69 |     virtual_node_t* virtual_nodes; // pointer to an array of virtual nodes
70 |     int num_virtual_nodes;
71 | } virtual_topology_t;
72 | 
73 | int init_virtual_topology(config_t* cfg, cpu_model_t* cpu_model, virtual_topology_t** virtual_topologyp);
74 | int system_num_cpus();
75 | int first_cpu(struct bitmask* bitmask);
76 | int next_cpu(struct bitmask* bitmask, int cpu_id);
77 | 
78 | #endif /* __TOPOLOGY_H */
79 | 


--------------------------------------------------------------------------------
/test/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | include_directories(${CMAKE_SOURCE_DIR}/third_party/gtest-1.7.0/include)
 2 | include_directories(${CMAKE_SOURCE_DIR}/src/lib)
 3 | 
 4 | add_definitions(-g)
 5 | add_definitions(-Wall)
 6 | #add_definitions(-DNDEBUG)
 7 | 
 8 | add_executable(test_interpose ${CMAKE_CURRENT_SOURCE_DIR}/test_interpose.cc)
 9 | target_link_libraries(test_interpose pthread gtest)
10 | 
11 | add_executable(test_dev ${CMAKE_CURRENT_SOURCE_DIR}/test_dev.cc)
12 | target_link_libraries(test_dev pthread nvmemul)
13 | 
14 | add_executable(test_thread ${CMAKE_CURRENT_SOURCE_DIR}/test_thread.cc)
15 | target_link_libraries(test_thread nvmemul pthread)
16 | 
17 | add_executable(test_mutex ${CMAKE_CURRENT_SOURCE_DIR}/test_mutex.cc)
18 | target_link_libraries(test_mutex nvmemul pthread)
19 | 
20 | add_executable(test_nvm_remote_dram ${CMAKE_CURRENT_SOURCE_DIR}/test_nvm_remote_dram.c)
21 | target_link_libraries(test_nvm_remote_dram nvmemul)
22 | 
23 | add_executable(test_nvm ${CMAKE_CURRENT_SOURCE_DIR}/test_nvm.c)
24 | target_link_libraries(test_nvm nvmemul)
25 | 
26 | add_executable(test_multithread ${CMAKE_CURRENT_SOURCE_DIR}/test_multithread.c)
27 | #target_link_libraries(test_multithread rt)
28 | target_link_libraries(test_multithread nvmemul pthread)
29 | 
30 | add_test(NAME interpose COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test_interpose)
31 | 
32 | set(ENV_COMMON "LD_PRELOAD=${CMAKE_BINARY_DIR}/src/emul/libnvmemul.so")
33 | 
34 | SET_PROPERTY(TEST interpose PROPERTY ENVIRONMENT ${ENV_COMMON} "ENUM_INI=emul.ini")
35 | 


--------------------------------------------------------------------------------
/test/test_dev.cc:
--------------------------------------------------------------------------------
 1 | /***************************************************************************
 2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
 3 | This program is free software; you can redistribute it and/or modify
 4 | it under the terms of the GNU General Public License as published by
 5 | the Free Software Foundation; either version 2 of the License, or (at
 6 | your option) any later version. This program is distributed in the
 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | PURPOSE. See the GNU General Public License for more details. You
10 | should have received a copy of the GNU General Public License along
11 | with this program; if not, write to the Free Software Foundation,
12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
13 | ***************************************************************************/
14 | #include <pthread.h>
15 | #include <stdlib.h>
16 | #include <stdio.h>
17 | #include "gtest/gtest.h"
18 | #include "pmalloc.h"
19 | 
20 | int main(int argc, char** argv)
21 | {
22 | //    ::testing::InitGoogleTest(&argc, argv);
23 | //    return RUN_ALL_TESTS();
24 |     printf("PID: %d\n", getpid());
25 |     printf("malloc: %p\n", malloc(8));
26 |     printf("malloc: %p\n", malloc(8));
27 |     printf("pmalloc: %p\n", pmalloc(8));
28 | }
29 | 
30 | 


--------------------------------------------------------------------------------
/test/test_interpose.cc:
--------------------------------------------------------------------------------
 1 | /***************************************************************************
 2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
 3 | This program is free software; you can redistribute it and/or modify
 4 | it under the terms of the GNU General Public License as published by
 5 | the Free Software Foundation; either version 2 of the License, or (at
 6 | your option) any later version. This program is distributed in the
 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | PURPOSE. See the GNU General Public License for more details. You
10 | should have received a copy of the GNU General Public License along
11 | with this program; if not, write to the Free Software Foundation,
12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
13 | ***************************************************************************/
14 | #include <pthread.h>
15 | #include <stdlib.h>
16 | #include <stdio.h>
17 | #include "gtest/gtest.h"
18 | 
19 | static int interpose_pthread_create_success = 0;
20 | 
21 | 
22 | // Ugly hack: we want to test whether interposition works. To do this we 
23 | // hook on the functions that the interposition code calls by redefining these
24 | // functions. As those functions are written in C, we need to make sure we force
25 | // the C++ compiler use C linkage.
26 | 
27 | #ifdef __cplusplus
28 | extern "C" {
29 | #endif
30 | 
31 | // this function is called when interposition of pthread_create is successful
32 | int register_thread(pthread_t thread)
33 | {
34 |     interpose_pthread_create_success = 1;
35 |     return 0;
36 | }
37 | 
38 | #ifdef __cplusplus
39 | }
40 | #endif
41 | 
42 | void* interpose_pthread_create_start_routine(void* args)
43 | {
44 |     return NULL;
45 | }
46 | 
47 | void interpose_pthread_create()
48 | {
49 |     pthread_t thread;  
50 |     
51 |     pthread_create (&thread, NULL, &interpose_pthread_create_start_routine, NULL);
52 | 
53 |     pthread_join(thread, NULL);
54 |               
55 | }
56 | 
57 | void interpose_pthread_mutex_lock(pthread_mutex_t* lock)
58 | {
59 |     pthread_mutex_lock(lock);
60 | }
61 | 
62 | void interpose_pthread_mutex_unlock(pthread_mutex_t* lock)
63 | {
64 |     pthread_mutex_unlock(lock);
65 | }
66 | 
67 | TEST(Interpose, pthread_create)
68 | {
69 |     EXPECT_EQ(0, interpose_pthread_create_success);
70 |     interpose_pthread_create();
71 |     EXPECT_EQ(1, interpose_pthread_create_success);
72 | }
73 | 
74 | TEST(Interpose, pthread_mutex_lock)
75 | {
76 |     //EXPECT_EQ(1, 0);
77 | }
78 | 
79 | 
80 | int main(int argc, char** argv)
81 | {
82 |     ::testing::InitGoogleTest(&argc, argv);
83 |     return RUN_ALL_TESTS();
84 | 
85 |     pthread_mutex_t lock;
86 |     pthread_mutex_init(&lock, NULL);
87 |     interpose_pthread_mutex_lock(&lock);
88 |     interpose_pthread_mutex_unlock(&lock);
89 | }
90 | 


--------------------------------------------------------------------------------
/test/test_mutex.cc:
--------------------------------------------------------------------------------
 1 | /***************************************************************************
 2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
 3 | This program is free software; you can redistribute it and/or modify
 4 | it under the terms of the GNU General Public License as published by
 5 | the Free Software Foundation; either version 2 of the License, or (at
 6 | your option) any later version. This program is distributed in the
 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | PURPOSE. See the GNU General Public License for more details. You
10 | should have received a copy of the GNU General Public License along
11 | with this program; if not, write to the Free Software Foundation,
12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
13 | ***************************************************************************/
14 | #include <pthread.h>
15 | #include <stdlib.h>
16 | #include <stdint.h>
17 | #include <stdio.h>
18 | #include <stddef.h>
19 | #include "gtest/gtest.h"
20 | 
21 | #define MAX_NUM_THREADS 128
22 | 
23 | pthread_mutex_t mutex;
24 | 
25 | void* worker(void* args) 
26 | {
27 | //    int i;
28 | //    char* array = (char*) malloc(1024*1024);
29 | 
30 |     pthread_mutex_lock(&mutex);
31 | 
32 |     pthread_mutex_unlock(&mutex);
33 |     return NULL;
34 | }
35 | 
36 | 
37 | int main(int argc, char** argv)
38 | {
39 | 	pthread_t thread[MAX_NUM_THREADS];
40 | 	int thread_count = 4;
41 | 	int i;
42 | //    int sum;
43 | 
44 |     pthread_mutex_init(&mutex, NULL);
45 |     pthread_mutex_lock(&mutex);
46 |     pthread_mutex_unlock(&mutex);
47 | 	for (i = 0; i< thread_count; i++)	
48 | 		pthread_create(&thread[i], NULL, worker, NULL);
49 | 
50 | 	for(i = 0 ; i < thread_count ; i++)
51 | 		pthread_join(thread[i], NULL);
52 | }
53 | 


--------------------------------------------------------------------------------
/test/test_nvm.c:
--------------------------------------------------------------------------------
 1 | /***************************************************************************
 2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
 3 | This program is free software; you can redistribute it and/or modify
 4 | it under the terms of the GNU General Public License as published by
 5 | the Free Software Foundation; either version 2 of the License, or (at
 6 | your option) any later version. This program is distributed in the
 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | PURPOSE. See the GNU General Public License for more details. You
10 | should have received a copy of the GNU General Public License along
11 | with this program; if not, write to the Free Software Foundation,
12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
13 | ***************************************************************************/
14 | 
15 | #include <stdio.h>
16 | #include <unistd.h>
17 | 
18 | 
19 | #define BUF_SIZE (2048)
20 | 
21 | unsigned long mem[BUF_SIZE][BUF_SIZE];
22 | 
23 | void iter()
24 | {
25 | 	int i;
26 | 	int j;
27 | 	unsigned long k;
28 | 
29 | 	for (i=0; i < BUF_SIZE; ++i) {
30 | 		for (j=0; j < BUF_SIZE; ++j) {
31 | 			mem[i][j] = i * j;
32 | 		}
33 | 	}
34 | 
35 | 	k = 0;
36 | 	while(1) {
37 | 		for (i=0; i < BUF_SIZE; ++i) {
38 | 			__asm__ __volatile__("");
39 | 			for (j=0; j < BUF_SIZE; ++j) {
40 | 		        k += mem[j][i] + i*j;
41 | 		        mem[j][i] = k;
42 | 			}
43 | 		}
44 | //		fprintf(stdout, "k is %lu\n", (unsigned long)k);
45 | 		usleep(1000);
46 | 	}
47 | }
48 | 
49 | int main()
50 | {
51 |     iter();
52 |     return 0;
53 | }
54 | 


--------------------------------------------------------------------------------
/test/test_nvm_remote_dram.c:
--------------------------------------------------------------------------------
 1 | /***************************************************************************
 2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
 3 | This program is free software; you can redistribute it and/or modify
 4 | it under the terms of the GNU General Public License as published by
 5 | the Free Software Foundation; either version 2 of the License, or (at
 6 | your option) any later version. This program is distributed in the
 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | PURPOSE. See the GNU General Public License for more details. You
10 | should have received a copy of the GNU General Public License along
11 | with this program; if not, write to the Free Software Foundation,
12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
13 | ***************************************************************************/
14 | 
15 | #include <stdio.h>
16 | #include <unistd.h>
17 | #include "pmalloc.h"
18 | 
19 | 
20 | #define BUF_SIZE (4 * 1024)
21 | 
22 | unsigned long **mem;
23 | 
24 | void iter()
25 | {
26 | 	int i;
27 | 	int j;
28 | 	unsigned long k;
29 | 
30 | 	mem = (unsigned long **) pmalloc(BUF_SIZE * sizeof(unsigned long *));
31 | 	for (i=0; i < BUF_SIZE; ++i) {
32 | 		mem[i] = (unsigned long *) pmalloc(BUF_SIZE * sizeof(unsigned long));
33 | 		for (j=0; j < BUF_SIZE; ++j) {
34 | 			mem[i][j] = i * j;
35 | 		}
36 | 	}
37 | 
38 | 	k = 0;
39 | 	while(1) {
40 | 		for (i=0; i < BUF_SIZE; ++i) {
41 | 			__asm__ __volatile__("");
42 | 			for (j=0; j < BUF_SIZE; ++j) {
43 | 		        k += mem[j][i] + i*j;
44 | 		        mem[j][i] = k;
45 | 			}
46 | 		}
47 | //		usleep(1000);
48 | 	}
49 | 
50 | 	for (i=0; i < BUF_SIZE; ++i) {
51 | 		pfree(mem[i], BUF_SIZE * sizeof(unsigned long));
52 | 	}
53 | 	pfree(mem, BUF_SIZE * sizeof(unsigned long *));
54 | }
55 | 
56 | int main()
57 | {
58 |     iter();
59 |     return 0;
60 | }
61 | 


--------------------------------------------------------------------------------
/test/test_thread.cc:
--------------------------------------------------------------------------------
 1 | /***************************************************************************
 2 | Copyright 2016 Hewlett Packard Enterprise Development LP.  
 3 | This program is free software; you can redistribute it and/or modify
 4 | it under the terms of the GNU General Public License as published by
 5 | the Free Software Foundation; either version 2 of the License, or (at
 6 | your option) any later version. This program is distributed in the
 7 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 8 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | PURPOSE. See the GNU General Public License for more details. You
10 | should have received a copy of the GNU General Public License along
11 | with this program; if not, write to the Free Software Foundation,
12 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
13 | ***************************************************************************/
14 | #include <pthread.h>
15 | #include <stdlib.h>
16 | #include <stdint.h>
17 | #include <stdio.h>
18 | #include <stddef.h>
19 | #include "gtest/gtest.h"
20 | 
21 | #define MAX_NUM_THREADS 128
22 | 
23 | void* worker(void* args) 
24 | {
25 |     int i;
26 |     char* array = (char*) malloc(1024*1024);
27 | 
28 |     //while(1) {
29 |         for (i=0; i<1024*1024; i++) {
30 |             array[i] += 1;
31 |         }
32 |     //}
33 |     //pthread_exit(NULL);
34 |     printf("exiting\n");
35 |     return NULL;
36 | }
37 | 
38 | 
39 | int main(int argc, char** argv)
40 | {
41 | 	pthread_t thread[MAX_NUM_THREADS];
42 | 	int thread_count = 4;
43 | 	int i;
44 | //    int sum;
45 | 
46 | 	for (i = 0; i< thread_count; i++)	
47 | 		pthread_create(&thread[i], NULL, worker, NULL);
48 | 
49 | 	for(i = 0 ; i < thread_count ; i++)
50 | 		pthread_join(thread[i], NULL);
51 | }
52 | 


--------------------------------------------------------------------------------