├── src ├── func_list ├── annotation.cpp ├── annotation.h ├── SConstruct ├── AtomicTrace.py ├── drain_buffer.cpp ├── inc_sim.py ├── inc_sim_cas.py ├── inc.cpp ├── inc_cas.cpp └── merge.cpp ├── merge_test ├── my_sort.sh ├── inc_test.sh ├── README.md ├── Test.py └── CompareMerge.py ├── trace ├── makefile ├── makefile.rules └── trace.cpp └── README.md /src/func_list: -------------------------------------------------------------------------------- 1 | atomic_trace::special_malloc 2 | atomic_trace::special_free 3 | -------------------------------------------------------------------------------- /merge_test/my_sort.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | sort -k 1 -n memory_trace.out | sed '/thread_sync/d' 3 | -------------------------------------------------------------------------------- /merge_test/inc_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ~/pin/pin -t ~/work/atomic-memory-trace/trace/obj-intel64/trace.so -f ~/work/atomic-memory-trace/src/func_list -i 1 -r 1 -- ~/work/atomic-memory-trace/src/inc 5 1000 3 | -------------------------------------------------------------------------------- /merge_test/README.md: -------------------------------------------------------------------------------- 1 | Merge Test 2 | ---------- 3 | 4 | The provided merge utility is useful for streaming/piping a 5 | pintool-provided memory trace directly to a simulation application. 6 | 7 | This merge test compares the output of the merge to the linux 8 | "sort" utility (which I assume will be correct). 9 | A proper comparison requires that the lamport timestamps be included 10 | in both the merge output and sort output. 11 | Additionally, trace entries with equal timestamps may correctly 12 | appear in any order, and this order may differ between the merge 13 | and sort outputs. Therefore, we chech that identical entries 14 | appear with the same timestamp. 15 | -------------------------------------------------------------------------------- /trace/makefile: -------------------------------------------------------------------------------- 1 | ############################################################## 2 | # 3 | # DO NOT EDIT THIS FILE! 4 | # 5 | ############################################################## 6 | 7 | # If the tool is built out of the kit, PIN_ROOT must be specified in the make invocation and point to the kit root. 8 | ifdef PIN_ROOT 9 | CONFIG_ROOT := $(PIN_ROOT)/source/tools/Config 10 | else 11 | CONFIG_ROOT := ../Config 12 | endif 13 | include $(CONFIG_ROOT)/makefile.config 14 | include makefile.rules 15 | include $(TOOLS_ROOT)/Config/makefile.default.rules 16 | 17 | ############################################################## 18 | # 19 | # DO NOT EDIT THIS FILE! 20 | # 21 | ############################################################## 22 | -------------------------------------------------------------------------------- /src/annotation.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013 Steven Pelley 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | // this software and associated documentation files (the "Software"), to deal in 5 | // the Software without restriction, including without limitation the rights to 6 | // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | // the Software, and to permit persons to whom the Software is furnished to do so, 8 | // subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in all 11 | // copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 15 | // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 16 | // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 17 | // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 18 | // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | 20 | #include "annotation.h" 21 | 22 | // outside of header to make sure functions are not in-lined 23 | 24 | void* atomic_trace::special_malloc(size_t size) { 25 | return malloc(size); 26 | } 27 | 28 | void atomic_trace::special_free(void *addr) { 29 | free(addr); 30 | } 31 | 32 | void atomic_trace::register_thread(int64_t thread_num) { 33 | } 34 | 35 | void atomic_trace::start_roi() {} 36 | void atomic_trace::end_roi() {} 37 | -------------------------------------------------------------------------------- /src/annotation.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013 Steven Pelley 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | // this software and associated documentation files (the "Software"), to deal in 5 | // the Software without restriction, including without limitation the rights to 6 | // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | // the Software, and to permit persons to whom the Software is furnished to do so, 8 | // subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in all 11 | // copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 15 | // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 16 | // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 17 | // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 18 | // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | 20 | #ifndef PMC_ATOMIC_TRACE_H 21 | #define PMC_ATOMIC_TRACE_H 22 | 23 | // provide annotation for persistent memory traces 24 | // need general trace annotation (threads, region of interest) 25 | // persistent memory regions (pers_malloc) 26 | // and persist barriers 27 | 28 | #include 29 | #include 30 | 31 | namespace atomic_trace { 32 | 33 | void* special_malloc(size_t size); 34 | void special_free(void *addr); 35 | void register_thread(int64_t thread_num); 36 | void start_roi(); 37 | void end_roi(); 38 | } 39 | 40 | #endif 41 | -------------------------------------------------------------------------------- /src/SConstruct: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # Copyright (c) 2013 Steven Pelley 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy of 5 | # this software and associated documentation files (the "Software"), to deal in 6 | # the Software without restriction, including without limitation the rights to 7 | # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 8 | # the Software, and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 19 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | 21 | env = Environment() 22 | # for clang color if using gnu screen or terminal emulation 23 | import os 24 | env['ENV']['TERM'] = os.environ['TERM'] 25 | 26 | debug = ARGUMENTS.get('debug', 0) 27 | if int(debug): 28 | env.Append(CCFLAGS = ['-g', '-O0']) 29 | 30 | cppdefines = [] 31 | for key, value in ARGLIST: 32 | if key == 'define': 33 | cppdefines.append(value) 34 | env.Append(CPPDEFINES=cppdefines) 35 | 36 | env_annotation = env.Clone() 37 | annotation_lib = env_annotation.Library('annotation', 'annotation.cpp') 38 | 39 | env_inc = env.Clone() 40 | env_inc.Append(CCFLAGS = '-pthread', LIBS=['pthread', annotation_lib]) 41 | env_inc.Program('inc.cpp') 42 | 43 | env_inc_cas = env.Clone() 44 | env_inc_cas.Append(CCFLAGS = '-pthread', LIBS=['pthread', annotation_lib]) 45 | env_inc_cas.Program('inc_cas.cpp') 46 | 47 | env_merge = env.Clone() 48 | env_merge.Program(['merge.cpp']) 49 | 50 | env_drain_buffer = env.Clone() 51 | env_drain_buffer.Append(CCFLAGS = '-pthread', LIBS=['pthread']) 52 | env_drain_buffer.Program(['drain_buffer.cpp']) 53 | -------------------------------------------------------------------------------- /merge_test/Test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # repeatedly test merge utility for given pintool command 3 | # inputs: 4 | # script that produces output trace in memory_trace.out 5 | # for now the script must use fully qualified names of files 6 | # number of attempts 7 | # 8 | # for each output file that merge fails (either merge process fails) 9 | # of output of merge and sort differs) 10 | # produce a directory with the original memory_trace.out, 11 | # sort.out, and merge.out 12 | 13 | # return True if success, False if some failure 14 | # if fails produce a 'fail' file with some output related to failure 15 | # 16 | # test file and merge utility must be fully qualified names 17 | def test1(test_file, sort_utility, merge_utility): 18 | import subprocess 19 | import shlex 20 | 21 | fout = open('command_out', 'w') 22 | ret = subprocess.call(shlex.split(test_file), stdout=fout, stderr=subprocess.STDOUT) 23 | assert ret == 0 24 | fout.close() 25 | 26 | # memory_trace.out now exists 27 | # first sort with linux sort utility 28 | fout = open('memory_trace.sort', 'w') 29 | ret = subprocess.call(shlex.split(sort_utility), stdout=fout) 30 | assert ret == 0 31 | fout.close() 32 | 33 | # now run merge, capturing stderr in a file 34 | fin = open('memory_trace.out') 35 | fout = open('memory_trace.merge', 'w') 36 | ferr = open('merge_err', 'w') 37 | ret = subprocess.call(shlex.split(merge_utility + " -t"), stdin = fin, stdout=fout, stderr=ferr) 38 | fin.close() 39 | fout.close() 40 | ferr.close() 41 | 42 | if ret != 0: 43 | return False 44 | 45 | f1 = open('memory_trace.sort') 46 | f2 = open('memory_trace.merge') 47 | 48 | eq_ret = CompareMerge.compare(f1, f2) 49 | f1.close() 50 | f2.close() 51 | if not eq_ret[0]: 52 | fout = open('merge_results') 53 | fout.write("files differ:\n") 54 | fout.write("file {}:\ttime: {}\tline {}\n".format(f1_name, out[1], out[3])) 55 | fout.write("file {}:\ttime: {}\tline {}\n".format(f2_name, out[3], out[4])) 56 | fout.close() 57 | 58 | return eq_ret[0] 59 | 60 | if __name__=="__main__": 61 | import CompareMerge 62 | import sys 63 | import os.path 64 | import shutil 65 | 66 | num_test = int(sys.argv[1]) 67 | test_file = os.path.abspath(sys.argv[2]) 68 | sort_utility = os.path.abspath('my_sort.sh') 69 | merge_utility = os.path.abspath('../src/merge') 70 | 71 | if os.path.exists('compare_test'): 72 | shutil.rmtree('compare_test') 73 | os.mkdir('compare_test') 74 | os.chdir('compare_test') 75 | 76 | file_count = 1 77 | for i in range(num_test): 78 | dir_name = 'test{}'.format(file_count) 79 | os.mkdir(dir_name) 80 | os.chdir(dir_name) 81 | 82 | succ = test1(test_file, sort_utility, merge_utility) 83 | os.chdir('..') 84 | if not succ: 85 | file_count += 1 86 | else: 87 | shutil.rmtree(dir_name) 88 | fail_count = file_count - 1 89 | print "failures:\t{}".format(fail_count) 90 | 91 | -------------------------------------------------------------------------------- /merge_test/CompareMerge.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # compare the 2 input files 3 | # if not identical find the first lines where they differ 4 | # as well as the lamport timestamp 5 | 6 | # return 7 | # (True,) if they are equal and 8 | # 9 | # ( 10 | # False, 11 | # f1 timestamp where first differ, 12 | # f2 timestamp where first differ, 13 | # f1 line, 14 | # f2 line 15 | # ) 16 | # otherwise 17 | def compare(f1, f2, failassert=False): 18 | # initialize by reading first lines 19 | f1_line = 1 20 | f2_line = 1 21 | 22 | f1_last_line = f1.readline() 23 | if f1_last_line != "": 24 | f1_time = get_time(f1_last_line) 25 | f2_last_line = f2.readline() 26 | if f2_last_line != "": 27 | f2_time = get_time(f2_last_line) 28 | 29 | f1_time = 0 30 | f2_time = 0 31 | 32 | while True: 33 | # read all lines from each file that have the next timestamp 34 | # check that the timestamp is the same for both files 35 | # and check that the set of lines with this timestamp are 36 | # identical (but different orders allowed) 37 | 38 | f1_cont = f1_last_line != "" 39 | f2_cont = f2_last_line != "" 40 | if f1_cont != f2_cont: 41 | fail(failassert) 42 | return (False, f1_time, f2_time, f1_line, f2_line,) 43 | elif (not f1_cont and not f2_cont): 44 | return (True,) 45 | # else both are continue and last_lines contain valid strings 46 | 47 | (bunch1, f1_last_line, f1_new_time, f1_line) = \ 48 | create_bunch(f1, f1_last_line, f1_line, failassert) 49 | 50 | (bunch2, f2_last_line, f2_new_time, f2_line) = \ 51 | create_bunch(f2, f2_last_line, f2_line, failassert) 52 | 53 | if (f1_new_time <= f1_time or 54 | f2_new_time <= f2_time or 55 | bunch1 != bunch2 or 56 | f1_new_time != f2_new_time 57 | ): 58 | fail(failassert) 59 | return (False, f1_time, f2_time, f1_line, f2_line,) 60 | f1_time = f1_new_time 61 | f2_time = f2_new_time 62 | 63 | assert False, "fell through without break?" 64 | 65 | # create the next bunch 66 | # return 67 | # (tuple bunch, new last_line, bunch's time, new line_num,) 68 | # 69 | # if last_line == "" then we have reached new line 70 | # so return (True, (), "", 0, same line_num) 71 | def create_bunch(f, last_line, last_line_num, failassert): 72 | if last_line == "": 73 | return ((), "", 0, last_line_num) 74 | bunch_time = get_time(last_line) 75 | 76 | line_num = last_line_num 77 | bunch = [last_line] 78 | while True: # do while 79 | line_num += 1 80 | last_line = f.readline() 81 | if last_line == "": 82 | break 83 | new_time = get_time(last_line) 84 | if new_time != bunch_time: 85 | break 86 | bunch.append(last_line) 87 | return (tuple(sorted(bunch)), last_line, bunch_time, line_num) 88 | 89 | def fail(failassert): 90 | if failassert: 91 | assert False 92 | 93 | def get_time(s): 94 | return int(s.split('\t', 1)[0]) 95 | 96 | if __name__=="__main__": 97 | import sys 98 | f1_name = sys.argv[1] 99 | f1 = open(f1_name) 100 | f2_name = sys.argv[2] 101 | f2 = open(f2_name) 102 | 103 | out = compare(f1, f2, True) 104 | 105 | f1.close() 106 | f2.close() 107 | 108 | if out[0]: 109 | print "equal" 110 | sys.exit(0) 111 | else: 112 | print "files differ:" 113 | print "file {}:\ttime: {}\tline {}".format(f1_name, out[1], out[3]) 114 | print "file {}:\ttime: {}\tline {}".format(f2_name, out[3], out[4]) 115 | sys.exit(1) 116 | -------------------------------------------------------------------------------- /src/AtomicTrace.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2013 Steven Pelley 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | # this software and associated documentation files (the "Software"), to deal in 5 | # the Software without restriction, including without limitation the rights to 6 | # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | # the Software, and to permit persons to whom the Software is furnished to do so, 8 | # subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in all 11 | # copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 15 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 16 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 17 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 18 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | 20 | # framework for running atomic memory trace simulations 21 | # takes in a trace file and a simulation object 22 | # simulation object provides callback function and state for the sim 23 | 24 | import re 25 | 26 | class Trace: 27 | 28 | # caller is responsible for initializing sim 29 | def __init__(self, trace_file, sim): 30 | self._sim = sim 31 | self._trace_file = trace_file 32 | 33 | def run(self): 34 | # need to exist before callback 35 | read_1_address = 0 36 | read_size = 0 37 | read_2_address = 0 38 | write_address = 0 39 | write_size = 0 40 | 41 | for i,l in enumerate(self._trace_file): 42 | l = l.strip() 43 | l_list = l.split('\t') 44 | threadid = int(l_list[0]) 45 | operation = l_list[1] 46 | 47 | # memory 48 | if operation == 'm': 49 | have_read_1 = len(l_list) > 2 and l_list[2] == 'r' 50 | have_read_2 = have_read_1 and len(l_list) > 5 and l_list[5] == 'r2' 51 | have_write = l_list[-3] == 'w' 52 | if have_read_1: 53 | read_1_address = int(l_list[3]) 54 | read_size = int(l_list[4]) 55 | if have_read_2: 56 | read_2_address = int(l_list[6]) 57 | if have_write: 58 | write_address = int(l_list[-2]) 59 | write_size = int(l_list[-1]) 60 | self._sim.memory_access( 61 | i 62 | , threadid 63 | , have_read_1 64 | , have_read_2 65 | , have_write 66 | , read_1_address 67 | , read_size 68 | , read_2_address 69 | , write_address 70 | , write_size 71 | ) 72 | 73 | # thread register 74 | elif operation == 'tr': 75 | self._sim.start_thread(i, threadid) 76 | 77 | # thread finish 78 | elif operation == 'tf': 79 | self._sim.finish_thread(i, threadid) 80 | 81 | # function call 82 | elif operation == 'fc': 83 | func_name = l_list[2] 84 | stack_pointer = int(l_list[3]) 85 | arg1 = int(l_list[4]) 86 | arg2 = int(l_list[5]) 87 | arg3 = int(l_list[6]) 88 | self._sim.function_call(i, func_name, threadid, stack_pointer, arg1, arg2, arg3) 89 | 90 | # function return 91 | elif operation == 'fr': 92 | func_name = l_list[2] 93 | stack_pointer = int(l_list[3]) 94 | return_value = int(l_list[4]) 95 | self._sim.function_return(i, func_name, threadid, stack_pointer, return_value) 96 | 97 | # start ROI 98 | elif operation == 'start_roi': 99 | self._sim.start_roi(i) 100 | 101 | # end ROI 102 | elif operation == 'end_roi': 103 | self._sim.end_roi(i) 104 | 105 | # change context 106 | elif operation == 'ctxt_change': 107 | self._sim.end_roi(i, threadid) 108 | 109 | -------------------------------------------------------------------------------- /src/drain_buffer.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013 Steven Pelley 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | // this software and associated documentation files (the "Software"), to deal in 5 | // the Software without restriction, including without limitation the rights to 6 | // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | // the Software, and to permit persons to whom the Software is furnished to do so, 8 | // subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in all 11 | // copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 15 | // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 16 | // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 17 | // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 18 | // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | // 20 | // drain_buffer.cpp 21 | // 22 | // the atomic tracer tends to block on writing to the file handle with 23 | // all other threads blocking on the file handle lock. 24 | // The result is that one thread tends to run at a time, interfering with 25 | // thread interleaving. Instead, we would like to buffer output to memory 26 | // and then completely block while draining the buffer. We can double 27 | // buffer so that one buffer fills while the next is draining 28 | 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | struct buffer_t { 37 | char *buf; 38 | int64_t capacity; 39 | int64_t end_cursor; // next location to insert 40 | 41 | char pad [64 - ( sizeof(char*) + 2*sizeof(int64_t) )]; 42 | }; 43 | 44 | // shared 45 | buffer_t bufs [2]; 46 | pthread_barrier_t *bar; 47 | bool eof; 48 | 49 | // read from std in to the buffer 50 | // until the buffer fills or reach EOF 51 | // use (iteration % 2) buffer 52 | void* reader(void *ptr) { 53 | setvbuf(stdin, (char*)NULL, _IOFBF, 1 << 24); 54 | for (int64_t i = 0; ; ++i) { 55 | int64_t idx = i % 2; 56 | buffer_t *buf = &bufs[idx]; 57 | buf->end_cursor = 0; 58 | 59 | while (buf->end_cursor < buf->capacity && !feof(stdin)) { 60 | int64_t size = fread(&buf->buf[buf->end_cursor], 1, buf->capacity-buf->end_cursor, stdin); 61 | buf->end_cursor += size; 62 | assert(buf->end_cursor <= buf->capacity); 63 | } 64 | 65 | pthread_barrier_wait(bar); 66 | eof = feof(stdin); 67 | pthread_barrier_wait(bar); 68 | if (eof) return NULL; 69 | } 70 | } 71 | 72 | // drain the buffer, writing to std out from buffer 73 | // use (iteration + 1) % 2 buffer 74 | void* writer(void *ptr) { 75 | setvbuf(stdout, (char*)NULL, _IOFBF, 1 << 24); 76 | for (int64_t i = 0; ; ++i) { 77 | int64_t idx = (i+1) % 2; 78 | buffer_t *buf = &bufs[idx]; 79 | 80 | int64_t start = 0; 81 | while (start < buf->end_cursor) { 82 | int64_t size = fwrite(&buf->buf[start], 1, buf->end_cursor-start, stdout); 83 | start += size; 84 | } 85 | assert(start == buf->end_cursor); 86 | if (eof) return NULL; 87 | 88 | // sync and make sure eof is consistent 89 | pthread_barrier_wait(bar); 90 | pthread_barrier_wait(bar); 91 | } 92 | } 93 | 94 | int main(int argc, char** argv) { 95 | if (argc != 2) { 96 | std::cout << "usage: drain_buffer " << std::endl; 97 | assert(false); 98 | } 99 | int64_t total_capacity = atol(argv[1]); 100 | int64_t buffer_capacity = total_capacity / 2; 101 | assert(buffer_capacity > 0); 102 | 103 | eof = false; 104 | bar = new pthread_barrier_t(); 105 | pthread_barrier_init(bar, NULL, 2); 106 | for (int64_t i = 0; i < 2; ++i) { 107 | bufs[i].buf = new char [buffer_capacity]; 108 | bufs[i].capacity = buffer_capacity; 109 | bufs[i].end_cursor = 0; 110 | } 111 | 112 | pthread_t read_thread, write_thread; 113 | assert(pthread_create(&read_thread, NULL, reader, NULL) == 0); 114 | assert(pthread_create(&write_thread, NULL, writer, NULL) == 0); 115 | 116 | assert(pthread_join(read_thread, NULL) == 0); 117 | assert(pthread_join(write_thread, NULL) == 0); 118 | 119 | delete bar; 120 | for (int64_t i = 0; i < 2; ++i) { 121 | delete [] bufs[i].buf; 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /src/inc_sim.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # Copyright (c) 2013 Steven Pelley 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy of 5 | # this software and associated documentation files (the "Software"), to deal in 6 | # the Software without restriction, including without limitation the rights to 7 | # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 8 | # the Software, and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 19 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | 21 | # analyse the inc trace. 22 | # assert: 23 | # exactly 8 bytes of persistent memory are accessed 24 | # each access to this persistent memory a read and write (atomic inc) 25 | # 26 | # compute and print: 27 | # for each registered thread the number of successful increments to counter 28 | # assuming counter is initialized to 0 compute the number of old value-even accesses 29 | # 30 | # print out any context changes as these may cause inconsistencies 31 | 32 | class my_sim: 33 | # constants 34 | set_read1_write = set(['read1', 'write']) 35 | 36 | def __init__(self): 37 | self._special_address = None 38 | self._special_size = 0 39 | self._special_value = 0 40 | 41 | self._active_threads = set() 42 | import collections 43 | # threadid to increment 44 | self._increments = collections.defaultdict(int) 45 | self._even_increments = collections.defaultdict(int) 46 | 47 | def memory_access(self, line_num, threadid, have_read_1, have_read_2, have_write, read_1_address, read_size, read_2_address, write_address, write_size): 48 | if not self._special_address: 49 | return 50 | 51 | # atomic inc to special address? 52 | if ( 53 | # Read-Modify-Write (atomic inc) 54 | have_read_1 and not have_read_2 and have_write and 55 | read_1_address == write_address and 56 | read_size == write_size 57 | ) and ( 58 | # matches special address 59 | (write_address < self._special_address and write_address + write_size >= self._special_address) or 60 | (write_address >= self._special_address and write_address <= self._special_address + self._special_size) 61 | ): 62 | # check that we touch exactly the first 8 bytes 63 | assert write_address == self._special_address and write_size == 8 64 | if self._special_value % 2 == 0: 65 | self._even_increments[threadid] += 1 66 | self._increments[threadid] += 1 67 | self._special_value += 1 68 | 69 | def start_thread(self, line_num, threadid): 70 | assert threadid not in self._active_threads 71 | self._active_threads.add(threadid) 72 | 73 | def finish_thread(self, line_num, threadid): 74 | assert threadid in self._active_threads 75 | self._active_threads.remove(threadid) 76 | 77 | def function_call(self, line_num, name, threadid, stack_pointer, arg1, arg2, arg3): 78 | if name == "atomic_trace::special_malloc": 79 | assert not self._special_address 80 | assert self._special_size == 0 81 | self._special_size = arg1 82 | elif name == "atomic_trace::special_free": 83 | pass 84 | else: 85 | assert False 86 | 87 | def function_return(self, line_num, name, threadid, stack_pointer, return_value): 88 | if name == "atomic_trace::special_malloc": 89 | assert not self._special_address 90 | assert self._special_size >= 8 91 | self._special_address = return_value 92 | elif name == "atomic_trace::special_free": 93 | pass 94 | else: 95 | assert False 96 | 97 | def start_roi(self, line_num): 98 | pass 99 | 100 | def end_roi(self, line_num): 101 | pass 102 | 103 | def ctxt_change(self, line_num, threadid): 104 | print("context change! Line {}".format(line_num)) 105 | 106 | def main(): 107 | import argparse 108 | parser = argparse.ArgumentParser(description='test inc atomic trace simulation') 109 | parser.add_argument('--infile', default="") 110 | args = parser.parse_args() 111 | 112 | import sys 113 | if len(args.infile) == 0: 114 | fin = sys.stdin 115 | else: 116 | fin = open(args.infile) 117 | 118 | import AtomicTrace 119 | 120 | sim = my_sim() 121 | trace = AtomicTrace.Trace(fin, sim) 122 | trace.run() 123 | 124 | # recreate the program output 125 | print("final counter value: {}".format(sim._special_value)) 126 | for threadid in sorted(sim._increments.keys()): 127 | print("thread\t{}\t{}\t{}".format(threadid, sim._increments[threadid], sim._even_increments[threadid])) 128 | 129 | if __name__=="__main__": 130 | main() 131 | -------------------------------------------------------------------------------- /src/inc_sim_cas.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # Copyright (c) 2013 Steven Pelley 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy of 5 | # this software and associated documentation files (the "Software"), to deal in 6 | # the Software without restriction, including without limitation the rights to 7 | # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 8 | # the Software, and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 19 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | 21 | # analyse the inc trace. 22 | # assert: 23 | # exactly 8 bytes of persistent memory are accessed 24 | # each access to this persistent memory is either a read or a read and write 25 | # (inc here is implemented with CAS) 26 | # 27 | # compute and print: 28 | # for each registered thread the number of writes to counter (should be number of incs) 29 | # for each registered thread the number of reads to counter 30 | # -- each inc will read once before the CAS, read as part of the successful CAS 31 | # and then once for every failure 32 | # 33 | # print out any context changes as these may cause inconsistencies 34 | 35 | class my_sim: 36 | def __init__(self): 37 | self._special_address = None 38 | self._special_size = 0 39 | self._special_value = 0 40 | 41 | self._active_threads = set() 42 | import collections 43 | # threadid to increment 44 | self._increments = collections.defaultdict(int) 45 | self._reads = collections.defaultdict(int) 46 | 47 | def memory_access(self, line_num, threadid, have_read_1, have_read_2, have_write, read_1_address, read_size, read_2_address, write_address, write_size): 48 | if not self._special_address: 49 | return 50 | 51 | read1 = have_read_1 and not have_read_2 and not have_write 52 | 53 | RMW = ( 54 | have_read_1 and not have_read_2 and have_write and 55 | read_1_address == write_address and 56 | read_size == write_size 57 | ) 58 | 59 | read1_special = ( have_read_1 and ( 60 | (read_1_address < self._special_address and read_1_address + read_size >= self._special_address) or 61 | (read_1_address >= self._special_address and read_1_address <= self._special_address + self._special_size) 62 | ) ) 63 | 64 | read_first_8 = have_read_1 and read_1_address == self._special_address and read_size == 8 65 | 66 | # atomic inc to special address? 67 | if RMW and read1_special: 68 | # check that we touch exactly the first 8 bytes 69 | assert read_first_8 70 | self._increments[threadid] += 1 71 | self._special_value += 1 72 | self._reads[threadid] += 1 73 | elif read1 and read1_special: 74 | self._reads[threadid] += 1 75 | 76 | def start_thread(self, line_num, threadid): 77 | assert threadid not in self._active_threads 78 | self._active_threads.add(threadid) 79 | 80 | def finish_thread(self, line_num, threadid): 81 | assert threadid in self._active_threads 82 | self._active_threads.remove(threadid) 83 | 84 | def function_call(self, line_num, name, threadid, stack_pointer, arg1, arg2, arg3): 85 | if name == "atomic_trace::special_malloc": 86 | assert not self._special_address 87 | assert self._special_size == 0 88 | self._special_size = arg1 89 | elif name == "atomic_trace::special_free": 90 | pass 91 | else: 92 | assert False 93 | 94 | def function_return(self, line_num, name, threadid, stack_pointer, return_value): 95 | if name == "atomic_trace::special_malloc": 96 | assert not self._special_address 97 | assert self._special_size >= 8 98 | self._special_address = return_value 99 | elif name == "atomic_trace::special_free": 100 | pass 101 | else: 102 | assert False 103 | 104 | def start_roi(self, line_num): 105 | pass 106 | 107 | def end_roi(self, line_num): 108 | pass 109 | 110 | def ctxt_change(self, line_num, threadid): 111 | print("context change! Line {}".format(line_num)) 112 | 113 | def main(): 114 | import argparse 115 | parser = argparse.ArgumentParser(description='test inc atomic trace simulation') 116 | parser.add_argument('--infile', default="") 117 | args = parser.parse_args() 118 | 119 | import sys 120 | if len(args.infile) == 0: 121 | fin = sys.stdin 122 | else: 123 | fin = open(args.infile) 124 | 125 | import AtomicTrace 126 | 127 | sim = my_sim() 128 | trace = AtomicTrace.Trace(fin, sim) 129 | trace.run() 130 | 131 | # recreate the program output 132 | print("final counter value: {}".format(sim._special_value)) 133 | for threadid in sorted(sim._increments.keys()): 134 | print("thread\t{}\t{}".format(threadid, sim._increments[threadid])) 135 | 136 | print("fails: {}".format(sum(sim._reads.values())-(2*sim._special_value))) 137 | 138 | if __name__=="__main__": 139 | main() 140 | -------------------------------------------------------------------------------- /src/inc.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013 Steven Pelley 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | // this software and associated documentation files (the "Software"), to deal in 5 | // the Software without restriction, including without limitation the rights to 6 | // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | // the Software, and to permit persons to whom the Software is furnished to do so, 8 | // subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in all 11 | // copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 15 | // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 16 | // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 17 | // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 18 | // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | 20 | // atomic inc 21 | // 22 | // given a single shared int variable have a number of threads: 23 | // atomic_inc the variable, possibly waiting a small amount of time afterwards 24 | // each thread should count the number of times the variable was even pre-inc 25 | // 26 | // all traced accesses to the counter should be atomic_fetchadd_long 27 | // Use the memory trace to determine how many pre-inc evens were observed from each thread 28 | // if number of pre-inc evens matches between the trace and the application output we have 29 | // strong confidence that the trace is correct and atomic 30 | 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include "annotation.h" 36 | 37 | static __inline u_long 38 | atomic_fetchadd_long(volatile u_long *p, u_long v) { 39 | __asm __volatile( 40 | " lock ; " 41 | " xaddq %0,%1 ; " 42 | "# atomic_fetchadd_long" 43 | : "+r" (v), /* 0 */ 44 | "+m" (*p) /* 1 */ 45 | : : "cc"); 46 | return (v); 47 | } 48 | 49 | struct thread_data_t { 50 | pthread_barrier_t *bar1; 51 | pthread_barrier_t *bar2; 52 | pthread_barrier_t *bar3; 53 | pthread_barrier_t *bar4; 54 | uint64_t *shared; 55 | int64_t delay; 56 | 57 | int64_t threadid; 58 | int64_t count; 59 | int64_t even_count; 60 | int64_t stop_count; 61 | }; 62 | 63 | struct counter_w { 64 | uint64_t counter; 65 | char padding [56]; 66 | }; 67 | 68 | void* thread_incs(void *ptr) { 69 | thread_data_t* tdata = reinterpret_cast(ptr); 70 | uint64_t old_val = 0; 71 | 72 | atomic_trace::register_thread(tdata->threadid); 73 | pthread_barrier_wait(tdata->bar1); 74 | // ROI begins here 75 | pthread_barrier_wait(tdata->bar2); 76 | 77 | do { 78 | old_val = __sync_fetch_and_add(tdata->shared, 1); 79 | if (old_val % 2 == 0) { // inc'ed on even 80 | ++tdata->even_count; 81 | } 82 | ++tdata->count; 83 | } while (old_val < tdata->stop_count); 84 | 85 | pthread_barrier_wait(tdata->bar3); 86 | // ROI ends here 87 | pthread_barrier_wait(tdata->bar4); 88 | return NULL; 89 | } 90 | 91 | int main(int argc, char** argv) { 92 | int64_t num_threads; 93 | int64_t to_insert_total; 94 | int64_t delay; 95 | 96 | assert(argc == 3); 97 | num_threads = atoi(argv[1]); 98 | to_insert_total = atoi(argv[2]); 99 | 100 | assert(num_threads > 0); 101 | 102 | // threads will each increment once beyond to_insert_total 103 | // so subtract the number of threads from the insert total 104 | assert(to_insert_total > num_threads); 105 | to_insert_total -= num_threads; 106 | 107 | counter_w *counter = reinterpret_cast(atomic_trace::special_malloc(sizeof(counter_w))); 108 | counter->counter = 0; 109 | 110 | pthread_barrier_t *bar1 = new pthread_barrier_t(); 111 | pthread_barrier_t *bar2 = new pthread_barrier_t(); 112 | pthread_barrier_t *bar3 = new pthread_barrier_t(); 113 | pthread_barrier_t *bar4 = new pthread_barrier_t(); 114 | pthread_barrier_init(bar1, NULL, num_threads+1); 115 | pthread_barrier_init(bar2, NULL, num_threads+1); 116 | pthread_barrier_init(bar3, NULL, num_threads+1); 117 | pthread_barrier_init(bar4, NULL, num_threads+1); 118 | thread_data_t *tdata = new thread_data_t[num_threads]; 119 | pthread_t *threads = new pthread_t[num_threads]; 120 | 121 | srand(time(NULL)); 122 | 123 | for (int64_t i = 0; i < num_threads; ++i) { 124 | tdata[i].threadid = i; 125 | tdata[i].bar1 = bar1; 126 | tdata[i].bar2 = bar2; 127 | tdata[i].bar3 = bar3; 128 | tdata[i].bar4 = bar4; 129 | tdata[i].shared = &counter->counter; 130 | tdata[i].delay = delay; 131 | tdata[i].count = 0; 132 | tdata[i].even_count = 0; 133 | tdata[i].stop_count = to_insert_total; 134 | 135 | uint64_t ret = pthread_create(&threads[i], NULL, thread_incs, (void*) &tdata[i]); 136 | } 137 | 138 | pthread_barrier_wait(bar1); 139 | atomic_trace::start_roi(); 140 | pthread_barrier_wait(bar2); 141 | 142 | pthread_barrier_wait(bar3); 143 | atomic_trace::end_roi(); 144 | pthread_barrier_wait(bar4); 145 | 146 | for (int64_t i = 0; i < num_threads; ++i) { 147 | uint64_t ret = pthread_join(threads[i], NULL); 148 | } 149 | 150 | assert(counter->counter == to_insert_total+num_threads); 151 | 152 | // print out the count and even count for each thread 153 | for (int64_t i = 0; i < num_threads; ++i) { 154 | std::cout << "thread " << i << "\t" << tdata[i].count << "\t" << tdata[i].even_count << std::endl; 155 | } 156 | 157 | delete [] threads; 158 | delete [] tdata; 159 | delete bar1; 160 | delete bar2; 161 | delete bar3; 162 | delete bar4; 163 | 164 | atomic_trace::special_free(counter); 165 | } 166 | -------------------------------------------------------------------------------- /src/inc_cas.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013 Steven Pelley 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | // this software and associated documentation files (the "Software"), to deal in 5 | // the Software without restriction, including without limitation the rights to 6 | // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | // the Software, and to permit persons to whom the Software is furnished to do so, 8 | // subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in all 11 | // copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 15 | // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 16 | // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 17 | // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 18 | // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | 20 | // atomic inc with CAS 21 | // 22 | // given a single shared int variable have a number of threads: 23 | // atomic_inc the variable, possibly waiting a small amount of time afterwards 24 | // each thread should count the number of times the variable was even pre-inc 25 | // as well as the number of failed CASes 26 | // 27 | // the number of simulated writes to the special memory should be equal to 28 | // the total number of increments. Verify that this is true when not 29 | // tracing CAS fails, and that it breaks when you do log CAS fails as writes 30 | 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include "annotation.h" 36 | 37 | struct thread_data_t { 38 | pthread_barrier_t *bar1; 39 | pthread_barrier_t *bar2; 40 | pthread_barrier_t *bar3; 41 | pthread_barrier_t *bar4; 42 | uint64_t *shared; 43 | int64_t delay; 44 | 45 | int64_t threadid; 46 | int64_t count; 47 | int64_t even_count; 48 | int64_t stop_count; 49 | int64_t fail_count; 50 | }; 51 | 52 | struct counter_w { 53 | uint64_t counter; 54 | char padding [56]; 55 | }; 56 | 57 | // returns the pre-inc value 58 | // increments *fails for every CAS failure 59 | uint64_t inc_with_CAS(uint64_t *counter, int64_t *fails) { 60 | bool first = true; 61 | bool succ = false; 62 | uint64_t orig = *counter; 63 | uint64_t read_val; 64 | while (!succ) { 65 | if (!first) { 66 | *fails += 1; 67 | } 68 | first = false; 69 | read_val = __sync_val_compare_and_swap(counter, orig, orig+1); 70 | succ = read_val == orig; 71 | orig = read_val; 72 | } 73 | return orig; 74 | } 75 | 76 | void* thread_incs(void *ptr) { 77 | thread_data_t* tdata = reinterpret_cast(ptr); 78 | uint64_t old_val = 0; 79 | 80 | atomic_trace::register_thread(tdata->threadid); 81 | pthread_barrier_wait(tdata->bar1); 82 | // ROI begins here 83 | pthread_barrier_wait(tdata->bar2); 84 | 85 | do { 86 | old_val = inc_with_CAS(tdata->shared, &tdata->fail_count); 87 | if (old_val % 2 == 0) { // inc'ed on even 88 | ++tdata->even_count; 89 | } 90 | ++tdata->count; 91 | } while (old_val < tdata->stop_count); 92 | 93 | pthread_barrier_wait(tdata->bar3); 94 | // ROI ends here 95 | pthread_barrier_wait(tdata->bar4); 96 | return NULL; 97 | } 98 | 99 | int main(int argc, char** argv) { 100 | int64_t num_threads; 101 | int64_t to_insert_total; 102 | int64_t delay; 103 | 104 | assert(argc == 3); 105 | num_threads = atoi(argv[1]); 106 | to_insert_total = atoi(argv[2]); 107 | 108 | assert(num_threads > 0); 109 | 110 | // threads will each increment once beyond to_insert_total 111 | // so subtract the number of threads from the insert total 112 | assert(to_insert_total > num_threads); 113 | to_insert_total -= num_threads; 114 | 115 | counter_w *counter = reinterpret_cast(atomic_trace::special_malloc(sizeof(counter_w))); 116 | counter->counter = 0; 117 | 118 | pthread_barrier_t *bar1 = new pthread_barrier_t(); 119 | pthread_barrier_t *bar2 = new pthread_barrier_t(); 120 | pthread_barrier_t *bar3 = new pthread_barrier_t(); 121 | pthread_barrier_t *bar4 = new pthread_barrier_t(); 122 | pthread_barrier_init(bar1, NULL, num_threads+1); 123 | pthread_barrier_init(bar2, NULL, num_threads+1); 124 | pthread_barrier_init(bar3, NULL, num_threads+1); 125 | pthread_barrier_init(bar4, NULL, num_threads+1); 126 | thread_data_t *tdata = new thread_data_t[num_threads]; 127 | pthread_t *threads = new pthread_t[num_threads]; 128 | 129 | srand(time(NULL)); 130 | 131 | for (int64_t i = 0; i < num_threads; ++i) { 132 | tdata[i].threadid = i; 133 | tdata[i].bar1 = bar1; 134 | tdata[i].bar2 = bar2; 135 | tdata[i].bar3 = bar3; 136 | tdata[i].bar4 = bar4; 137 | tdata[i].shared = &counter->counter; 138 | tdata[i].delay = delay; 139 | tdata[i].count = 0; 140 | tdata[i].even_count = 0; 141 | tdata[i].stop_count = to_insert_total; 142 | tdata[i].fail_count = 0; 143 | 144 | uint64_t ret = pthread_create(&threads[i], NULL, thread_incs, (void*) &tdata[i]); 145 | } 146 | 147 | pthread_barrier_wait(bar1); 148 | atomic_trace::start_roi(); 149 | pthread_barrier_wait(bar2); 150 | 151 | pthread_barrier_wait(bar3); 152 | atomic_trace::end_roi(); 153 | pthread_barrier_wait(bar4); 154 | 155 | for (int64_t i = 0; i < num_threads; ++i) { 156 | uint64_t ret = pthread_join(threads[i], NULL); 157 | } 158 | 159 | assert(counter->counter == to_insert_total+num_threads); 160 | 161 | // print out the count and even count for each thread 162 | int64_t cas_fails = 0; 163 | for (int64_t i = 0; i < num_threads; ++i) { 164 | std::cout << "thread " << i << "\t" << tdata[i].count << "\t" << tdata[i].even_count << "\t" << tdata[i].fail_count << std::endl; 165 | cas_fails += tdata[i].fail_count; 166 | } 167 | 168 | std::cout << "CAS fails: " << cas_fails << std::endl; 169 | 170 | delete [] threads; 171 | delete [] tdata; 172 | delete bar1; 173 | delete bar2; 174 | delete bar3; 175 | delete bar4; 176 | 177 | atomic_trace::special_free(counter); 178 | } 179 | -------------------------------------------------------------------------------- /trace/makefile.rules: -------------------------------------------------------------------------------- 1 | ############################################################## 2 | # 3 | # This file includes all the test targets as well as all the 4 | # non-default build rules and test recipes. 5 | # 6 | ############################################################## 7 | 8 | ###### Additional includes that are specific to this directory ###### 9 | 10 | # Placeholder for additional include files. 11 | 12 | 13 | ############################################################## 14 | # 15 | # Test targets 16 | # 17 | ############################################################## 18 | 19 | ###### Place all generic definitions here ###### 20 | 21 | # This defines tests which run tools of the same name. This is simply for convenience to avoid 22 | # defining the test name twice (once in TOOL_ROOTS and again in TEST_ROOTS). 23 | # Tests defined here should not be defined in TOOL_ROOTS and TEST_ROOTS. 24 | TEST_TOOL_ROOTS := trace 25 | 26 | # This defines the tests to be run that were not already defined in TEST_TOOL_ROOTS. 27 | TEST_ROOTS := 28 | 29 | # This defines a list of tests that should run in the "short" sanity. Tests in this list must also 30 | # appear either in the TEST_TOOL_ROOTS or the TEST_ROOTS list. 31 | # If the entire directory should be tested in sanity, assign TEST_TOOL_ROOTS and TEST_ROOTS to the 32 | # SANITY_SUBSET variable in the tests section below (see example in makefile.rules.tmpl). 33 | SANITY_SUBSET := 34 | 35 | # This defines the tools which will be run during the the tests, and were not already defined in 36 | # TEST_TOOL_ROOTS. 37 | TOOL_ROOTS := 38 | 39 | # This defines the static analysis tools which will be run during the the tests. They should not 40 | # be defined in TEST_TOOL_ROOTS. If a test with the same name exists, it should be defined in 41 | # TEST_ROOTS. 42 | # Note: Static analysis tools are in fact executables linked with the Pin Static Analysis Library. 43 | # This library provides a subset of the Pin APIs which allows the tool to perform static analysis 44 | # of an application or dll. Pin itself is not used when this tool runs. 45 | SA_TOOL_ROOTS := 46 | 47 | # This defines all the applications that will be run during the tests. 48 | APP_ROOTS := 49 | 50 | # This defines any additional object files that need to be compiled. 51 | OBJECT_ROOTS := 52 | 53 | # This defines any additional dlls (shared objects), other than the pintools, that need to be compiled. 54 | DLL_ROOTS := 55 | 56 | # This defines any static libraries (archives), that need to be built. 57 | LIB_ROOTS := 58 | 59 | ###### Place architecture-specific definitions here ###### 60 | 61 | # Place ia32-specific definitions here if they apply to all supported operating systems. 62 | ifeq ($(TARGET),ia32) 63 | TEST_TOOL_ROOTS += 64 | TEST_ROOTS += 65 | SANITY_SUBSET += 66 | TOOL_ROOTS += 67 | SA_TOOL_ROOTS += 68 | APP_ROOTS += 69 | OBJECT_ROOTS += 70 | DLL_ROOTS += 71 | LIB_ROOTS += 72 | endif 73 | 74 | # Place intel64-specific definitions here if they apply to all supported operating systems. 75 | ifeq ($(TARGET),intel64) 76 | TEST_TOOL_ROOTS += 77 | TEST_ROOTS += 78 | SANITY_SUBSET += 79 | TOOL_ROOTS += 80 | SA_TOOL_ROOTS += 81 | APP_ROOTS += 82 | OBJECT_ROOTS += 83 | DLL_ROOTS += 84 | LIB_ROOTS += 85 | endif 86 | 87 | ###### Place probe mode tests here ###### 88 | 89 | ifeq ($(PROBE),1) 90 | TEST_TOOL_ROOTS += 91 | TEST_ROOTS += 92 | SANITY_SUBSET += 93 | TOOL_ROOTS += 94 | APP_ROOTS += 95 | OBJECT_ROOTS += 96 | DLL_ROOTS += 97 | LIB_ROOTS += 98 | endif 99 | 100 | ###### Place OS-specific definitions here ###### 101 | 102 | # Android 103 | ifeq ($(TARGET_OS),android) 104 | TEST_TOOL_ROOTS += 105 | TEST_ROOTS += 106 | SANITY_SUBSET += 107 | TOOL_ROOTS += 108 | SA_TOOL_ROOTS += 109 | APP_ROOTS += 110 | OBJECT_ROOTS += 111 | DLL_ROOTS += 112 | LIB_ROOTS += 113 | ifeq ($(TARGET),ia32) 114 | TEST_TOOL_ROOTS += 115 | TEST_ROOTS += 116 | SANITY_SUBSET += 117 | TOOL_ROOTS += 118 | SA_TOOL_ROOTS += 119 | APP_ROOTS += 120 | OBJECT_ROOTS += 121 | DLL_ROOTS += 122 | LIB_ROOTS += 123 | endif 124 | ifeq ($(TARGET),intel64) 125 | TEST_TOOL_ROOTS += 126 | TEST_ROOTS += 127 | SANITY_SUBSET += 128 | TOOL_ROOTS += 129 | SA_TOOL_ROOTS += 130 | APP_ROOTS += 131 | OBJECT_ROOTS += 132 | DLL_ROOTS += 133 | LIB_ROOTS += 134 | endif 135 | endif 136 | 137 | # Linux 138 | ifeq ($(TARGET_OS),linux) 139 | TEST_TOOL_ROOTS += 140 | TEST_ROOTS += 141 | SANITY_SUBSET += 142 | TOOL_ROOTS += 143 | SA_TOOL_ROOTS += 144 | APP_ROOTS += 145 | OBJECT_ROOTS += 146 | DLL_ROOTS += 147 | LIB_ROOTS += 148 | ifeq ($(TARGET),ia32) 149 | TEST_TOOL_ROOTS += 150 | TEST_ROOTS += 151 | SANITY_SUBSET += 152 | TOOL_ROOTS += 153 | SA_TOOL_ROOTS += 154 | APP_ROOTS += 155 | OBJECT_ROOTS += 156 | DLL_ROOTS += 157 | LIB_ROOTS += 158 | endif 159 | ifeq ($(TARGET),intel64) 160 | TEST_TOOL_ROOTS += 161 | TEST_ROOTS += 162 | SANITY_SUBSET += 163 | TOOL_ROOTS += 164 | SA_TOOL_ROOTS += 165 | APP_ROOTS += 166 | OBJECT_ROOTS += 167 | DLL_ROOTS += 168 | LIB_ROOTS += 169 | endif 170 | endif 171 | 172 | # Mac 173 | ifeq ($(TARGET_OS),mac) 174 | TEST_TOOL_ROOTS += 175 | TEST_ROOTS += 176 | SANITY_SUBSET += 177 | TOOL_ROOTS += 178 | SA_TOOL_ROOTS += 179 | APP_ROOTS += 180 | OBJECT_ROOTS += 181 | DLL_ROOTS += 182 | LIB_ROOTS += 183 | ifeq ($(TARGET),ia32) 184 | TEST_TOOL_ROOTS += 185 | TEST_ROOTS += 186 | SANITY_SUBSET += 187 | TOOL_ROOTS += 188 | SA_TOOL_ROOTS += 189 | APP_ROOTS += 190 | OBJECT_ROOTS += 191 | DLL_ROOTS += 192 | LIB_ROOTS += 193 | endif 194 | ifeq ($(TARGET),intel64) 195 | TEST_TOOL_ROOTS += 196 | TEST_ROOTS += 197 | SANITY_SUBSET += 198 | TOOL_ROOTS += 199 | SA_TOOL_ROOTS += 200 | APP_ROOTS += 201 | OBJECT_ROOTS += 202 | DLL_ROOTS += 203 | LIB_ROOTS += 204 | endif 205 | endif 206 | 207 | # Windows 208 | ifeq ($(TARGET_OS),windows) 209 | TEST_TOOL_ROOTS += 210 | TEST_ROOTS += 211 | SANITY_SUBSET += 212 | TOOL_ROOTS += 213 | SA_TOOL_ROOTS += 214 | APP_ROOTS += 215 | OBJECT_ROOTS += 216 | DLL_ROOTS += 217 | LIB_ROOTS += 218 | ifeq ($(TARGET),ia32) 219 | TEST_TOOL_ROOTS += 220 | TEST_ROOTS += 221 | SANITY_SUBSET += 222 | TOOL_ROOTS += 223 | SA_TOOL_ROOTS += 224 | APP_ROOTS += 225 | OBJECT_ROOTS += 226 | DLL_ROOTS += 227 | LIB_ROOTS += 228 | endif 229 | ifeq ($(TARGET),intel64) 230 | TEST_TOOL_ROOTS += 231 | TEST_ROOTS += 232 | SANITY_SUBSET += 233 | TOOL_ROOTS += 234 | SA_TOOL_ROOTS += 235 | APP_ROOTS += 236 | OBJECT_ROOTS += 237 | DLL_ROOTS += 238 | LIB_ROOTS += 239 | endif 240 | endif 241 | 242 | ###### Handle exceptions here ###### 243 | 244 | # If some tests need to be disabled, do this here 245 | 246 | 247 | ############################################################## 248 | # 249 | # Test recipes 250 | # 251 | ############################################################## 252 | 253 | ###### Finalize sanity here ###### 254 | 255 | # If all tests in this directory should run in sanity, uncomment the following line. 256 | # SANITY_SUBSET := $(TEST_TOOL_ROOTS) $(TEST_ROOTS) 257 | 258 | # This section contains recipes for tests other than the default. 259 | # See makefile.default.rules for the default test rules. 260 | # All tests in this section should adhere to the naming convention: .test 261 | 262 | 263 | ############################################################## 264 | # 265 | # Build rules 266 | # 267 | ############################################################## 268 | 269 | # This section contains the build rules for all binaries that have special build rules. 270 | # See makefile.default.rules for the default build rules. 271 | 272 | ###### Special tools' build rules ###### 273 | 274 | # placeholder for special tools' build rules 275 | 276 | ###### Special applications' build rules ###### 277 | 278 | # placeholder for special applications' build rules 279 | 280 | ###### Special objects' build rules ###### 281 | 282 | # placeholder for special objects' build rules 283 | 284 | ###### Special dlls' build rules ###### 285 | 286 | # placeholder for special dlls' build rules 287 | 288 | ###### Special libs' build rules ###### 289 | 290 | # placeholder for special libs' build rules 291 | 292 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | atomic-memory-trace 2 | =================== 3 | 4 | PIN-tool to produce multi-threaded atomic memory traces 5 | 6 | PIN is a useful tool for instrumenting applications and easily producing memory 7 | access traces. However, tracing memory accesses from multiple threads suffers 8 | from the atomic instrumentation problem -- instructions responsible for 9 | tracing/logging an access happen separately from that access. Races between 10 | threads may result in a different order being traced than actually occurs. 11 | This tool provides atomic instrumentation by simulating cache coherence. In 12 | addition, the tool will trace thread start/end, an optional region of interest, 13 | and user-provided fuction calls and returns. 14 | 15 | The primary alternative to this tool is architectural simulation. Most 16 | simulators are complicated to learn, complicated to use (getting OSes and 17 | workloads running properly may be difficult), and slow (most simulators are 18 | single threaded and cannot leverage multithreading to produce a faster 19 | trace/simulation). 20 | 21 | This README documents the tracing pintool and example test case. This tool was 22 | developed using verion 2.12-58423 of PIN on Ubuntu 12.04. There are no plans 23 | to support operating systems other than Linux or systems other than x86_64. 24 | The pintool relies on the boots libraries. This software comes with no support 25 | but may be useful to others. This project uses the MIT license. 26 | 27 | Quick Start 28 | =========== 29 | 30 | build the pintool 31 | Change into the trace directory 32 | 33 | ``` 34 | % make PIN_ROOT= 35 | ``` 36 | 37 | Run the tool as any other pintool: 38 | 39 | ``` 40 | % pin -t trace/obj-intel64/trace.so -- 41 | ``` 42 | 43 | By default, output appears in the file memory_trace.out. All threads and 44 | memory accesses will be traced. The output appears with one event (thread 45 | start, function call, or memory access) per line, starting with an arbitrary 46 | timestamp (used to merge events later. 47 | 48 | Any easy way to produce useful output, sorting by timestamp and then stripping 49 | timestamps away is to use: 50 | 51 | ``` 52 | % sort -k 1 -n memory_trace.out | sed '/thread_sync/d' | awk 'BEGIN {OFS="\t"}; {$1="";sub("\t\t","")}1' > memory_trace.clean 53 | ``` 54 | 55 | memory_trace.clean will contain properly ordered events and accesses and remove 56 | sync entries 57 | 58 | Tool Options 59 | ============ 60 | 61 | * -o 62 | The output file name. By default is 'memory_trace.out' 63 | * -r 64 | Do threads need to be registered? If 0/false all memory accesses from all 65 | threads will be traced. If 1/true only accesses from registered threads will 66 | be traced. See annotation's atomic_trace::register_thread(threadid). 67 | * -f 68 | File with list of functions to trace 69 | * -i 70 | Use Region of Interest? If ROI is used memory tracing will only occur while 71 | the ROI is active. Thread start/stop tracing and function tracing will 72 | always occur. 73 | * -l 74 | Number of locks for simulated cache coherence. Increasing this number will 75 | use more memory and may hurt cache performance but will improve concurrency. 76 | If contention occurs for specific address locks (i.e. cache lines) try 77 | increasing this. A value of 1 serializes all memory accesses across threads. 78 | * -b 79 | Cache block size. By default 64 bytes. 80 | * -a 81 | Accesses per thread before flushing. Each thread keeps a local trace buffer 82 | that is occasionally flushed to the global file. More accesses per thread 83 | ensures that threads grabbing the global lock does not become the primary 84 | bottleneck. 85 | * -t 86 | Test. Turns off address locking, breaking atomicity. Activate this flag to 87 | see the instrumentation atomicity problem. 88 | * -d 89 | Allowable timestamp difference. If threads diverge in timestamps by beyond 90 | this limit threads will synch and flush other threads. This makes merging 91 | the output significantly easier. 92 | * -c 93 | Trace Failed Compare-And-Swaps. Default 0 (no). Generally every CAS 94 | instruction is considered a write, even when the instruction fails. 95 | Use this option to only log CAS as writes when they succeed. 96 | 97 | Output Format 98 | ============= 99 | 100 | Each line contains one event as a tab delimited list. Entries contain 101 | threadids which may be -1 (if threads must be registered but a traced function 102 | is called from an unregistered thread), assigned by the pintool if threads are 103 | not required to be registered, or set by the registration function (described 104 | later). All entries start with a timestamp and threadid: 105 | 106 | * memory: Each instruction may read two addresses and write one. There are 107 | possible sub-entries for each of these accesses. The second read does not 108 | contain a size field as it may only occur with the first read and have the same 109 | size (that is, r2's size is the same as r's). 110 | 111 | ``` 112 | m [r
] [r2
] [w
] 113 | ``` 114 | 115 | * thread registered: 116 | 117 | ``` 118 | threadid tr 119 | ``` 120 | 121 | * thread finished: 122 | 123 | ``` 124 | threadid tf 125 | ``` 126 | 127 | * function call: All requested functions are traced, even if not on a registered 128 | thread. The first 3 arguments of the function are traced as well as the stack 129 | pointer to match up calls and returns 130 | 131 | ``` 132 | fc 133 | ``` 134 | 135 | * function return: 136 | 137 | ``` 138 | fr 139 | ``` 140 | 141 | * start Region of Interest: 142 | 143 | ``` 144 | start_roi 145 | ``` 146 | 147 | * end Region of Interest: 148 | 149 | ``` 150 | end_roi 151 | ``` 152 | 153 | * context change: Context changes may interrupt the locking necessary to provide 154 | atomic tracing. On a context change consider that the next access may not be 155 | traced atomically. -- It is unclear how the PIN internals work and if this is 156 | really a concern (I haven't observed any context changes yet). 157 | 158 | ``` 159 | ctxt_change 160 | ``` 161 | 162 | * thread sync: When threads flush other threads to keep all threads close in 163 | timestamps the merging process must be made aware of this. 164 | 165 | ``` 166 | thread_sync 167 | ``` 168 | 169 | Function Tracing 170 | ================ 171 | 172 | In addition to memory accesses many functions are traced. A few are specific 173 | to this tool, but any user-provided function can be traced. The provided 174 | src/annotation.cpp and src/annotation.h (creates libannotation) provides 175 | header and library for these functions. In general it is easier to provide 176 | these as a library to ensure they are not in-lined. 177 | 178 | ``` 179 | atomic_trace::register_thread 180 | atomic_trace::start_roi 181 | atomic_trace::end_roi 182 | ``` 183 | 184 | These functions allow the pintool to highlight a region of interest (memory 185 | accesses outside of the region will not be traced) and name threads, useful to 186 | match trace threads to user threads. 187 | 188 | In addition, the pintool takes an "-f" argument that is a file with a list of 189 | functions (one per line) that will be traced. The functions should be listed 190 | in their undecorated form (as per pin, see above for examples). 191 | 192 | Merge Utility 193 | ============= 194 | 195 | merge/merge.cpp provides a tool that takes as stdin a memory trace and pushes 196 | the merged (by timestamp) trace to the output, stripping out the timestamp from 197 | each entry as well as omitting sync entries. While not the most efficient (the 198 | sort utility is somewhat faster for file traces), it suffices and allows traces 199 | to be piped while using a small memory footprint. 200 | 201 | Merge is necessary because the sort utility can not be used in a pipe; the 202 | entire file must first be available. When merging thread streams no entry may 203 | be output until certain that no older trace can appear on that thread. The 204 | guarantee is provided when a thread submits a newer trace (because each thread 205 | trace is monotonic increasing), and the threads are kept close together via 206 | sync traces. 207 | 208 | This tool chain works as follow: create a special fifo file to connect pin to 209 | merge 210 | 211 | ``` 212 | % mkfifo mypipe 213 | ``` 214 | 215 | Pipe this file to merge, and the merge to a simulator utility 216 | 217 | ``` 218 | % cat mypipe | ./merge | ./simulator 219 | ``` 220 | 221 | Start the pintool, directing the trace to the fifo file 222 | 223 | ``` 224 | % pin -t trace.so -o mypipe -- ./app 225 | ``` 226 | 227 | Make sure that names are fully qualified where necessary to reach pin, 228 | trace.so, mypipe, and your app; and that any other desired trace arguments are used. 229 | 230 | Alternatively, merge can be used to sort trace files, although the sort command 231 | above may be faster. 232 | 233 | Test - Atomic Increment 234 | ======================= 235 | 236 | src/inc.cpp provides a test case for instrumentation atomicity. Several 237 | threads use atomic_fetchadd_long to repeatedly increment a shared counter. 238 | Each thread counts the number of times it increments the counter and the number 239 | of times the counter was even before the increment. 240 | 241 | A trace simulation can reconstruct the operation of this program. All memory 242 | operations to the counter's address are atomic increments. Simply observing 243 | the order in which increments occurs allows us to reconstruct the number of 244 | increments from each thread, as well as how many time each thread observes a 245 | previously-even number on increment. 246 | 247 | If the trace simulation matches the actual program, there is a good chance 248 | things are working. Additionally, using the "-t" option we can force the 249 | pintool to test and disable locking. If this breaks it we have even more 250 | confidence that the tool works. 251 | 252 | Run as 253 | 254 | ``` 255 | % cat mypipe | ./merge | ./inc_sim.py 256 | ``` 257 | 258 | and 259 | 260 | ``` 261 | % pin -t trace.so -o mypipe -f func_list -- ./inc 262 | ``` 263 | 264 | verify that the output of both the simulation and the actual program are the 265 | same. Use the -t 1 flag for trace.so and see that the outputs differ. 266 | 267 | A similar set of tools (inc_cas.cpp, inc_sim_cas.py) tests the functionality 268 | of the -c option (whether or not to trace failed CAS as a write). 269 | 270 | Internals: Locking Protocol 271 | =========================== 272 | 273 | Some rules need to be followed to prevent deadlock. 274 | These rules adhere to the rules provided by the PIN manual. 275 | 276 | Locks: 277 | ------ 278 | 279 | * Address locks 280 | Responsible for providing instrumentation atomicity tracing an instruction 281 | will acquire whatever locks cover accessed addresses, and these locks are 282 | released on the next pin function (so all pin analaysis and callback 283 | functions first release all address locks). 284 | * Thread start/finish lock 285 | Acquired when a thread starts or finishes, or to block threads from 286 | starting or finishing. Necessary for actions that need to synchronize 287 | timestamp across all threads. Any thread that wishes to access another 288 | thread's data must hold this. 289 | * Thread locks 290 | Covers each thread's trace buffer and Lamport timestamp. Other threads may 291 | have to read/update another thread's timestamp or flush its buffer. 292 | * File lock 293 | Covers the trace file handler, thread count, last_flushed (really all shard 294 | global objects). 295 | 296 | Locking Rules 297 | ------------- 298 | 299 | Function Callbacks must release all locks before returning. 300 | Analysis functions (for instructions and routines) may hold address locks beyond return. 301 | Address locks are released at the beginning of every analysis and callback 302 | routine, treating each routine as the "end" of an instruction analysis routine. 303 | 304 | Locks must always be acquired in the following order: 305 | 306 | 1. Address locks by index order of the address_lock_bank 307 | 2. Thread start/finish lock 308 | 3. Thread locks by pin THREADID order 309 | 4. The global lock 310 | 311 | This implies, for example, that one may not acquire **any** address locks while 312 | holding a thread lock 313 | 314 | Internals: Lamport Clock 315 | ======================== 316 | 317 | Actual trace order is determined by a Lamport clock, and all trace entries use 318 | a Lamport timestamp 319 | 320 | All address locks keep a timestamp. The time of an access = max(timestamp of 321 | all address locks acquired, thread timestamp)+1. All address locks and the 322 | accessing thread must be updated to this access timestamp. 323 | 324 | Functions on registered threads increment that thread's timestamp. To enforce 325 | an order of functions from two threads there must be corresponding memory 326 | accesses (release and acquire, such as a lock) 327 | 328 | Functions from unregistered threads, and roi traces, and the start and finish 329 | of threads synchronize all threads. This is done by acquiring the thread 330 | start/finish lock (so that new threads do not appear) and ALL thread locks, 331 | moving all threads up to the latest global timestamp + 1. 332 | 333 | Time separation: It's possible for 2 threads to diverge in time, requiring 334 | trace merging to use a huge amount of memory (imagine 2 threads, one is 335 | sleeping and the other executes continuously -- we cannot merge the running 336 | thread's entries until we know the sleeping thread won't produce an older 337 | timestamp -- this only happens once we observe a new, large timestamp). Solve 338 | this by keeping track of the minimum "last flushed" thread timestamp. When a 339 | thread tries to flush and sees that any thread is too old, it will try to flush 340 | and update the timestamp for all too-old threads (those below some threshold), 341 | bounding how far apart threads can be in the trace file. Calculating the 342 | minimum last flushed timestamp requires keeping a last_flushed bimap under the 343 | global lock. A last_flushed_cache timestamp (also covered by global file lock) 344 | makes this more efficient. 345 | -------------------------------------------------------------------------------- /src/merge.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013 Steven Pelley 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | // this software and associated documentation files (the "Software"), to deal in 5 | // the Software without restriction, including without limitation the rights to 6 | // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | // the Software, and to permit persons to whom the Software is furnished to do so, 8 | // subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in all 11 | // copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 15 | // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 16 | // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 17 | // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 18 | // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | 20 | // merge.cpp 21 | // merge the output of the atomic trace 22 | // by lamport timestamp, strip the timestamp, and output 23 | // the result. 24 | // 25 | // Break the input stream into thread components, 26 | // assert that the threads are monotonic increasing in timestamp. 27 | // 28 | // We can only move entries to the output once we are certain no 29 | // earlier timestamp is going to show up from another thread. 30 | // this happens once we have observed at least that timestamp 31 | // by every other thread. Entries from unregistered threads (-1) 32 | // must synchronize first, so it is not possible to pop a timestamp 33 | // from any registered thread and then later pop an earlier timestamp 34 | // from an unregistered thread. Assert this as well. 35 | 36 | #include 37 | #include 38 | #include 39 | #include 40 | 41 | #include 42 | #include 43 | #include 44 | #include 45 | 46 | #include 47 | #include 48 | #include 49 | #include 50 | 51 | #if TRACE_DEBUG 52 | #define DO_DEBUG(S) S 53 | #else 54 | #define DO_DEBUG(S) /* */ 55 | #endif 56 | 57 | ///////////////////////////////////////////// 58 | // queue for each thread 59 | // tracks timestamp, string (trace line), and length of string for each 60 | // tracks whether the thread is finished 61 | ///////////////////////////////////////////// 62 | class thread_queue_t { 63 | public: 64 | 65 | thread_queue_t(); 66 | ~thread_queue_t(); 67 | 68 | // enqueue the string in buffer of length with timestamp 69 | // return if the queue had been empty 70 | bool enqueue(int64_t timestamp, const char *buffer, int64_t length); 71 | 72 | // pop an entry into buffer (must be at least 256 bytes) 73 | // and put the string's length into length and timestamp into timestamp 74 | // return true if had not been empty and data is valid 75 | bool dequeue(int64_t *timestamp, char *buffer, int64_t *length); 76 | 77 | // return next timestamp or -1 if empty 78 | int64_t peek(); 79 | 80 | void finish() {_thread_finished = true;} 81 | bool is_finished() {return _thread_finished;} 82 | 83 | private: 84 | static const int64_t _initial_capacity = 1024*1024; // 1mb per thread 85 | void _enlargen_char_buffer(); 86 | void _assert_repinv(); 87 | 88 | typedef std::pair time_size_t; 89 | typedef std::queue time_size_queue_t; 90 | 91 | // holds string traces associated with this thread 92 | char *_buffer; 93 | int64_t _buffer_capacity; 94 | int64_t _buffer_size; 95 | int64_t _buffer_next_insert; 96 | int64_t _buffer_next_pop; 97 | 98 | // holds (timestamp, size) for each entry in char_stream 99 | time_size_queue_t _time_size_queue; 100 | 101 | int64_t _max_timestamp; 102 | bool _thread_finished; 103 | }; 104 | 105 | thread_queue_t::thread_queue_t() 106 | : _buffer(new char[_initial_capacity]) 107 | , _buffer_capacity(_initial_capacity) 108 | , _buffer_size(0) 109 | , _buffer_next_insert(0) 110 | , _buffer_next_pop(0) 111 | , _time_size_queue() 112 | , _max_timestamp(0) 113 | , _thread_finished(false) 114 | {} 115 | 116 | thread_queue_t::~thread_queue_t() { 117 | delete [] _buffer; 118 | } 119 | 120 | bool thread_queue_t::enqueue(int64_t timestamp, const char *buffer, int64_t length) { 121 | DO_DEBUG(_assert_repinv()); 122 | assert(timestamp > _max_timestamp); 123 | assert(!_thread_finished); 124 | 125 | bool ret = _time_size_queue.empty(); 126 | _max_timestamp = timestamp; 127 | 128 | int64_t space_left = _buffer_capacity - _buffer_size; 129 | // if we won't have enough room move to a larger buffer 130 | if (length > space_left) { 131 | _enlargen_char_buffer(); 132 | } 133 | 134 | // may have to wrap, so do in 2 copies: 135 | // 1: from next_insert to end 136 | // 2: from front of buffer 137 | int64_t copy1_idx = _buffer_next_insert; 138 | int64_t copy1_len = std::min(_buffer_capacity - copy1_idx, length); 139 | int64_t copy2_len = length - copy1_len; 140 | memcpy(&_buffer[copy1_idx], buffer, copy1_len); 141 | memcpy(_buffer, &buffer[copy1_len], copy2_len); 142 | 143 | // update buffer counters 144 | _buffer_size += length; 145 | _buffer_next_insert = (_buffer_next_insert + length) % _buffer_capacity; 146 | 147 | _time_size_queue.push( time_size_t(timestamp, length) ); 148 | 149 | DO_DEBUG(_assert_repinv()); 150 | return ret; 151 | } 152 | 153 | bool thread_queue_t::dequeue(int64_t *timestamp, char *buffer, int64_t *length) { 154 | DO_DEBUG( _assert_repinv()); 155 | assert(timestamp); 156 | assert(buffer); 157 | assert(length); 158 | 159 | if (_time_size_queue.empty()) return false; 160 | 161 | time_size_t time_size = _time_size_queue.front(); 162 | _time_size_queue.pop(); 163 | *timestamp = time_size.first; 164 | *length = time_size.second; 165 | 166 | 167 | // copy from next_pop to the end 168 | // copy remaining from the beginning 169 | int64_t copy1_idx = _buffer_next_pop; 170 | int64_t copy1_len = std::min(*length, _buffer_capacity - copy1_idx); 171 | int64_t copy2_len = *length - copy1_len; 172 | memcpy(buffer, &_buffer[copy1_idx], copy1_len); 173 | memcpy(&buffer[copy1_len], _buffer, copy2_len); 174 | 175 | _buffer_size -= *length; 176 | _buffer_next_pop = (_buffer_next_pop + *length) % _buffer_capacity; 177 | DO_DEBUG(_assert_repinv()); 178 | 179 | return true; 180 | } 181 | 182 | int64_t thread_queue_t::peek() { 183 | if (_time_size_queue.empty()) return -1; 184 | return _time_size_queue.front().first; 185 | } 186 | 187 | // grow the buffer 188 | void thread_queue_t::_enlargen_char_buffer() { 189 | DO_DEBUG(_assert_repinv()); 190 | int64_t new_capacity = 4 * _buffer_capacity; 191 | char *new_buffer = new char[new_capacity]; 192 | 193 | int64_t copy1_len = std::min(_buffer_size, _buffer_capacity - _buffer_next_pop); 194 | int64_t copy2_len = _buffer_size - copy1_len; 195 | 196 | memcpy(new_buffer, &_buffer[_buffer_next_pop], copy1_len); 197 | memcpy(&new_buffer[copy1_len], _buffer, copy2_len); 198 | delete [] _buffer; 199 | _buffer = new_buffer; 200 | _buffer_next_pop = 0; 201 | _buffer_next_insert = _buffer_size; 202 | _buffer_capacity = new_capacity; 203 | DO_DEBUG(_assert_repinv()); 204 | } 205 | 206 | void thread_queue_t::_assert_repinv() { 207 | // check that the sum of sizes in _time_size_queue 208 | // match _buffer_size 209 | // queues do not support iteration so copy and pop 210 | time_size_queue_t copy_q(_time_size_queue); 211 | int64_t size = 0; 212 | while (!copy_q.empty()) { 213 | size += copy_q.front().second; 214 | copy_q.pop(); 215 | } 216 | assert(size == _buffer_size); 217 | } 218 | 219 | ///////////////////////////////////////////// 220 | // holds queue per active thread 221 | ///////////////////////////////////////////// 222 | typedef std::pair threadid_queue_t; 223 | typedef std::map thread_queue_map_t; 224 | thread_queue_map_t thread_queue_map; 225 | 226 | ///////////////////////////////////////////// 227 | // utility class for priority queue that holds (time, threadid) tuples 228 | // and returns the lowest time 229 | ///////////////////////////////////////////// 230 | 231 | typedef std::pair time_threadid_t; 232 | class compare_first_greater_t { 233 | public: 234 | bool operator() (const time_threadid_t &x, time_threadid_t &y) const { 235 | return x.first > y.first; 236 | } 237 | }; 238 | 239 | ///////////////////////////////////////////// 240 | // returns the minimum trace read between active threads 241 | ///////////////////////////////////////////// 242 | 243 | std::priority_queue< 244 | time_threadid_t, 245 | std::vector, 246 | compare_first_greater_t 247 | > time_threadid_min; 248 | 249 | ///////////////////////////////////////////// 250 | // returns the minimum of each threads' greatest observed time 251 | // any trace less than this cannot yet be merged because a lower 252 | // timestamp can still appear 253 | ///////////////////////////////////////////// 254 | 255 | class safe_timestamp_t { 256 | public: 257 | safe_timestamp_t(); 258 | int64_t timestamp(); 259 | void set_time(int64_t threadid, int64_t timestamp); 260 | void finish_thread(int64_t threadid); 261 | 262 | private: 263 | int64_t _safe_timestamp_cache; 264 | typedef std::pair threadid_timestamp_t; 265 | typedef std::vector tt_list_t; 266 | tt_list_t _tt_list; 267 | 268 | // compare by threadid in a list of (threadid, timestamp) 269 | class tt_compare_t { 270 | public: 271 | int64_t arg; 272 | int64_t set_arg(int64_t new_arg) {arg = new_arg;} 273 | bool operator() (threadid_timestamp_t &tt){ 274 | return tt.first == arg; 275 | } 276 | }; 277 | 278 | void _update_cache(); 279 | } safe_timestamp; 280 | 281 | safe_timestamp_t::safe_timestamp_t() 282 | : _safe_timestamp_cache(0) 283 | , _tt_list() 284 | {} 285 | 286 | int64_t safe_timestamp_t::timestamp() { 287 | return _safe_timestamp_cache; 288 | } 289 | 290 | void safe_timestamp_t::set_time(int64_t threadid, int64_t timestamp) { 291 | static tt_compare_t tt_comp; 292 | tt_comp.set_arg(threadid); 293 | tt_list_t::iterator it = std::find_if(_tt_list.begin(), _tt_list.end(), tt_comp); 294 | bool need_update = false; 295 | if (it == _tt_list.end()) { 296 | _tt_list.push_back(threadid_timestamp_t(threadid, timestamp)); 297 | need_update = true; 298 | } else { 299 | need_update = it->second <= _safe_timestamp_cache; 300 | (*it) = threadid_timestamp_t(threadid, timestamp); 301 | } 302 | 303 | if (need_update) { 304 | _update_cache(); 305 | } 306 | } 307 | 308 | void safe_timestamp_t::finish_thread(int64_t threadid) { 309 | static tt_compare_t tt_comp; 310 | tt_comp.set_arg(threadid); 311 | tt_list_t::iterator it = std::find_if(_tt_list.begin(), _tt_list.end(), tt_comp); 312 | assert(it != _tt_list.end()); 313 | _tt_list.erase(it); 314 | _update_cache(); 315 | } 316 | 317 | void safe_timestamp_t::_update_cache() { 318 | tt_list_t::iterator it = _tt_list.begin(); 319 | 320 | if (it == _tt_list.end()) { 321 | return; 322 | } else { 323 | int64_t m = it->second; 324 | for (; it != _tt_list.end(); ++it) { 325 | m = std::min(m, it->second); 326 | } 327 | _safe_timestamp_cache = m; 328 | } 329 | } 330 | 331 | ///////////////////////////////////////////// 332 | // helper functions and main for merging 333 | ///////////////////////////////////////////// 334 | 335 | // parse the line, determining if it is an important type of trace 336 | // pos1 is the position of the first tab (for cutting out the timestamp 337 | void parse_line(const char* buf, int64_t *pos1, int64_t *timestamp, int64_t *threadid, bool *is_thread_register, bool *is_thread_finish, bool *is_sync) { 338 | const char *tab1 = strchr(buf, '\t'); 339 | assert(tab1); 340 | *pos1 = tab1 - buf; 341 | const char *tab2 = strchr(&buf[*pos1+1], '\t'); 342 | assert(tab2); 343 | int64_t pos2 = tab2 - buf; 344 | 345 | *timestamp = atoll(buf); 346 | *threadid = atoll(tab1+1); 347 | 348 | // tr, tf, and thread_sync terminate the line 349 | *is_thread_register = strcmp("tr", tab2+1) == 0; 350 | *is_thread_finish = strcmp("tf", tab2+1) == 0; 351 | *is_sync = strcmp("thread_sync", tab2+1) == 0; 352 | } 353 | 354 | int64_t active_threads = 0; 355 | 356 | // merge as many as allowable 357 | // can merge so long as minimum timestamp entry is less/equal min of all 358 | // threads' max observed timestamp 359 | void merge() { 360 | static char buf[128]; // temp space used for merging 361 | bool keep_merging = true; 362 | while (keep_merging) { 363 | time_threadid_t min_entry = time_threadid_min.top(); 364 | 365 | bool any_threads_active = active_threads > 0; 366 | int64_t all_threads_observed = safe_timestamp.timestamp(); 367 | keep_merging = !any_threads_active || min_entry.first <= all_threads_observed; 368 | 369 | // put the minimum entry into the output and pop it 370 | if (keep_merging) { 371 | time_threadid_min.pop(); 372 | 373 | // pop and output from min_entry.second 374 | thread_queue_map_t::iterator it = thread_queue_map.find(min_entry.second); 375 | assert(it != thread_queue_map.end()); 376 | thread_queue_t *q = it->second; 377 | 378 | int64_t popped_time, popped_length; 379 | bool not_empty = q->dequeue(&popped_time, buf, &popped_length); 380 | assert(not_empty); 381 | assert(popped_time == min_entry.first); 382 | std::cout.write(buf, popped_length); 383 | std::cout << std::endl; 384 | 385 | int64_t next_time = q->peek(); 386 | if (next_time == -1 && q->is_finished()) { 387 | delete q; 388 | thread_queue_map.erase(it); 389 | } 390 | // peek the next item from that threadid 391 | if (next_time >= 0) { 392 | time_threadid_min.push(time_threadid_t(next_time, min_entry.second)); 393 | } 394 | } 395 | keep_merging = keep_merging && !time_threadid_min.empty(); 396 | } 397 | } 398 | 399 | int main(int argc, char **argv) { 400 | bool keep_timestamps = argc > 1 && std::string(argv[1]) == "-t"; 401 | std::string line; 402 | int64_t line_count = 0; 403 | char buf[128]; 404 | while (std::cin.getline(buf, 128)) { 405 | int64_t len = strlen(buf); 406 | ++line_count; 407 | 408 | int64_t pos1, timestamp, threadid; 409 | bool is_thread_register, is_thread_finish, is_sync; 410 | parse_line(buf, &pos1, ×tamp, &threadid, &is_thread_register, &is_thread_finish, &is_sync); 411 | 412 | if (is_thread_register) { 413 | ++active_threads; 414 | } else if (is_thread_finish) { 415 | --active_threads; 416 | safe_timestamp.finish_thread(threadid); 417 | } 418 | 419 | // update thread's max observed if registered thread 420 | if (!is_thread_finish && threadid >= 0) { 421 | safe_timestamp.set_time(threadid, timestamp); 422 | if (is_sync) continue; 423 | } 424 | 425 | // get/construct the threadid's queue to add this to 426 | thread_queue_t* thread_queue; 427 | thread_queue_map_t::iterator it = thread_queue_map.find(threadid); 428 | if (it == thread_queue_map.end()) { 429 | thread_queue = new thread_queue_t(); 430 | thread_queue_map.insert(it, threadid_queue_t(threadid, thread_queue)); 431 | } else { 432 | thread_queue = it->second; 433 | } 434 | 435 | // strip out the timestamp from the string as it is enqueued 436 | bool was_empty; 437 | if (keep_timestamps) { 438 | was_empty = thread_queue->enqueue(timestamp, buf, len); 439 | } else { 440 | was_empty = thread_queue->enqueue(timestamp, buf+pos1+1, len - (pos1+1)); 441 | } 442 | 443 | if (is_thread_finish) thread_queue->finish(); 444 | 445 | // if thread had been empty update heap of each thread's min available entry 446 | if (was_empty) { 447 | time_threadid_min.push(time_threadid_t(timestamp, threadid)); 448 | } 449 | 450 | // try to merge some entries 451 | merge(); 452 | } 453 | assert(time_threadid_min.empty()); // should be no more to merge 454 | } 455 | 456 | -------------------------------------------------------------------------------- /trace/trace.cpp: -------------------------------------------------------------------------------- 1 | //Copyright (c) 2013 Steven Pelley 2 | // 3 | //Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | //this software and associated documentation files (the "Software"), to deal in 5 | //the Software without restriction, including without limitation the rights to 6 | //use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | //the Software, and to permit persons to whom the Software is furnished to do so, 8 | //subject to the following conditions: 9 | // 10 | //The above copyright notice and this permission notice shall be included in all 11 | //copies or substantial portions of the Software. 12 | // 13 | //THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | //IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 15 | //FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 16 | //COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 17 | //IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 18 | //CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | 20 | // atomic memory trace pintool 21 | // see README for directions and implementation details. 22 | 23 | #include 24 | #include 25 | #include "pin.H" 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | 38 | /* ===================================================================== */ 39 | /* Global Variables */ 40 | /* ===================================================================== */ 41 | 42 | // helper class to define critical sections 43 | // allocate on the stack within a scope (curly braces) for the CS 44 | // constructor will acquire the lock, destructor (called when CS object 45 | // goes out of scope) releases the lock 46 | class pin_critical_section { 47 | public: 48 | pin_critical_section(PIN_MUTEX *lock) 49 | : _lock(lock), _paused(false), _pred(true) { 50 | PIN_MutexLock(_lock); 51 | } 52 | 53 | // predicated CS. Only do it predicate is true 54 | pin_critical_section(PIN_MUTEX *lock, bool predicate) 55 | : _lock(lock), _paused(false), _pred(predicate) { 56 | if (_pred) { 57 | PIN_MutexLock(_lock); 58 | } 59 | } 60 | 61 | ~pin_critical_section() { 62 | if (_pred && !_paused) { 63 | PIN_MutexUnlock(_lock); 64 | } 65 | } 66 | 67 | void pause() { 68 | if (_pred && !_paused) { 69 | PIN_MutexUnlock(_lock); 70 | } 71 | } 72 | 73 | // make sure not to violate the lock hierarchy if you use this! 74 | void restart() { 75 | if (_pred && _paused) { 76 | PIN_MutexLock(_lock); 77 | } 78 | } 79 | 80 | private: 81 | pin_critical_section(); 82 | PIN_MUTEX *_lock; 83 | bool _paused; 84 | bool _pred; 85 | }; 86 | 87 | bool in_roi; 88 | bool require_roi; 89 | bool log_cas_fails; 90 | bool register_threads = false; 91 | bool turn_off_locks = false; 92 | 93 | ///////////// 94 | // global lock covers the file buffer, thread count, etc 95 | // may not be held beyond analysis function/callback return 96 | // i.e., critical section must be contained in function 97 | ///////////// 98 | struct lock_wrapper_t { 99 | PIN_MUTEX lock; 100 | char padding [56]; 101 | }; 102 | 103 | // bimap of 104 | typedef boost::bimap< 105 | boost::bimaps::set_of, 106 | boost::bimaps::multiset_of 107 | > last_flushed_t; 108 | 109 | // file access 110 | lock_wrapper_t file_lock; 111 | std::ofstream trace_file; 112 | last_flushed_t last_flushed; // pin threadid to last_flushed 113 | int64_t last_flushed_cache; // occassionally computed min flushed 114 | ///////////// 115 | // end of global lock protection 116 | ///////////// 117 | 118 | // thread tracking 119 | lock_wrapper_t thread_start_fini_lock; 120 | int64_t num_threads = 0; 121 | set pin_threadid_set; 122 | 123 | TLS_KEY tls_key; 124 | 125 | int64_t num_locks = 0; 126 | int64_t block_size = 0; 127 | int64_t block_size_log = 0; 128 | int64_t accesses_flush = 0; 129 | int64_t timestamp_difference = 0; 130 | 131 | // address locks allows atomic tracing 132 | // may be held beyond analysis function return 133 | // may not be held beyond callback functino return 134 | // must be released on context change 135 | struct address_lock_t { 136 | PIN_MUTEX lock; 137 | int64_t lamport_timestamp; 138 | char padding [48]; 139 | }; 140 | 141 | address_lock_t *address_lock_bank; 142 | 143 | /* ===================================================================== */ 144 | /* TLS Variables */ 145 | /* ===================================================================== */ 146 | 147 | class thread_data_t; 148 | 149 | thread_data_t* get_tls(THREADID threadid) { 150 | thread_data_t *tdata = 151 | static_cast(PIN_GetThreadData(tls_key, threadid)); 152 | return tdata; 153 | } 154 | 155 | 156 | class thread_data_t { 157 | public: 158 | thread_data_t() 159 | : user_threadid(-1) 160 | , event_count(0) 161 | , memory_instruction_count(0) 162 | , index_lock_list() 163 | , buffered_entries(0) 164 | , lamport_timestamp(0) 165 | { 166 | PIN_MutexInit(&thread_lock); 167 | index_lock_list.reserve(32); 168 | } 169 | 170 | // must hold thread_start_fini_lock 171 | // will either hold no thread locks or all thread locks 172 | // 173 | // try to flush threads in flush_others 174 | // if their timestamp is still below the threshold 175 | // (threads may race and flush their own buffer first) 176 | void attempt_flush_others(int64_t this_time, bool hold_all_thread_locks) { 177 | vector::iterator it; 178 | for (it = flush_others.begin(); it != flush_others.end(); ++it) { 179 | int64_t pin_threadid = *it; 180 | set::iterator set_it = pin_threadid_set.find(pin_threadid); 181 | if (set_it != pin_threadid_set.end()) { 182 | thread_data_t *tdata = get_tls(pin_threadid); 183 | { 184 | pin_critical_section CS(&tdata->thread_lock, !hold_all_thread_locks); 185 | int64_t diff = this_time - tdata->lamport_timestamp; 186 | if (diff >= timestamp_difference * .75) { // sync it! 187 | if (tdata->user_threadid >= 0) { 188 | tdata->trace_stream << this_time << "\t" << tdata->user_threadid << "\tthread_sync" << "\n"; 189 | ++tdata->buffered_entries; 190 | } 191 | tdata->lamport_timestamp = this_time; 192 | 193 | if (tdata->buffered_entries > 0) { // unregistered thread might be empty 194 | pin_critical_section CS2(&file_lock.lock); 195 | trace_file << tdata->trace_stream.rdbuf(); 196 | assert(trace_file); 197 | tdata->buffered_entries = 0; 198 | tdata->trace_stream.str(std::string()); 199 | last_flushed.left.erase(pin_threadid); 200 | last_flushed.left.insert(std::pair(pin_threadid, this_time)); 201 | } 202 | } 203 | } 204 | } 205 | } 206 | } 207 | 208 | // after having written something to the string buffer: 209 | // update timestamp 210 | // increment entry count 211 | // flush the buffer if necessary 212 | // flush buffers of any thread lagging too far behind 213 | // 214 | // requires either: 215 | // thread_start_fini_lock not held, this thread's thread_lock held 216 | // or 217 | // thread_start_fini_lock and all thread_locks held 218 | // 219 | // if force always flush this thread's buffer 220 | void touch_buffer(THREADID pin_threadid, int64_t time, bool force, pin_critical_section *thread_cs, bool have_all_thread_locks) { 221 | lamport_timestamp = time; 222 | flush_others.clear(); 223 | 224 | if (++buffered_entries >= accesses_flush || force) { 225 | pin_critical_section CS(&file_lock.lock); 226 | trace_file << trace_stream.rdbuf(); 227 | assert(trace_file); 228 | buffered_entries = 0; 229 | // clear the trace_stream to use it as a queue 230 | trace_stream.str(std::string()); 231 | 232 | // bimap must erase and insert, no modifying 233 | last_flushed.left.erase(pin_threadid); 234 | last_flushed.left.insert(std::pair(pin_threadid, time)); 235 | 236 | // double check last_flushed_cache and possibly update it 237 | // check for any other threads that should be flushed 238 | if (time - last_flushed_cache > timestamp_difference) { 239 | last_flushed_t::right_const_iterator it = last_flushed.right.begin(); 240 | last_flushed_cache = it->first; 241 | // double check 242 | if (time - last_flushed_cache > timestamp_difference) { 243 | // flush any thread who differs by more than .75*timestamp_difference 244 | for ( ; it != last_flushed.right.end(); ++it) { 245 | int64_t diff = time - it->first; 246 | if (diff >= timestamp_difference * .75) { 247 | flush_others.push_back(it->second); 248 | } else { 249 | // cannot be end (because this thread has a high timestamp) 250 | last_flushed_cache = it->first; 251 | break; 252 | } 253 | } 254 | } 255 | } 256 | } 257 | 258 | // get all the thread locks in the proper order and check-and-flush 259 | // the other threads 260 | if (!flush_others.empty()) { 261 | if (!have_all_thread_locks) { 262 | thread_cs->pause(); 263 | { 264 | pin_critical_section CS(&thread_start_fini_lock.lock); 265 | attempt_flush_others(time, have_all_thread_locks); 266 | } 267 | thread_cs->restart(); 268 | } else { 269 | attempt_flush_others(time, have_all_thread_locks); 270 | } 271 | } 272 | } 273 | 274 | int64_t user_threadid; // -1 implies not a registered thread 275 | int64_t event_count; 276 | int64_t memory_instruction_count; 277 | 278 | // record which locks we hold (by index) 279 | vector index_lock_list; 280 | 281 | // TLS for memory access tracing 282 | bool trace_this_access; 283 | bool is_read; 284 | bool is_read2; 285 | bool is_write; 286 | int64_t read_size; 287 | int64_t write_size; 288 | uint64_t read1_address; 289 | uint64_t read2_address; 290 | uint64_t write_address; 291 | uint64_t max_locked_timestamp; 292 | 293 | vector flush_others; 294 | 295 | //////////////////// 296 | // the following are covered by this lock 297 | //////////////////// 298 | int8_t padding [64]; 299 | PIN_MUTEX thread_lock; 300 | 301 | stringstream trace_stream; 302 | int64_t buffered_entries; 303 | int64_t lamport_timestamp; 304 | 305 | int8_t padding2 [64]; 306 | //////////////////// 307 | // end thread lock protection 308 | //////////////////// 309 | }; 310 | 311 | /* ===================================================================== */ 312 | /* Commandline Switches */ 313 | /* ===================================================================== */ 314 | 315 | KNOB KnobOutputFile(KNOB_MODE_WRITEONCE, "pintool", 316 | "o", "memory_trace.out", "specify trace file name"); 317 | 318 | KNOB KnobRegisterThreads(KNOB_MODE_WRITEONCE, "pintool", 319 | "r", "0", "threads required to be registered?"); 320 | 321 | KNOB KnobFunctionsFile(KNOB_MODE_WRITEONCE, "pintool", 322 | "f", "trace_functions.in", "file with list of functions to trace"); 323 | 324 | KNOB KnobRequireROI(KNOB_MODE_WRITEONCE, "pintool", 325 | "i", "0", "require region of interest annotation?"); 326 | 327 | KNOB KnobNumAddressLocks (KNOB_MODE_WRITEONCE, "pintool", 328 | "l", "64", "number of locks for simulated cache coherence"); 329 | 330 | KNOB KnobBlockSize (KNOB_MODE_WRITEONCE, "pintool", 331 | "b", "64", "cache line/block size to simulate"); 332 | 333 | KNOB KnobAccessesBeforeFlush (KNOB_MODE_WRITEONCE, "pintool", 334 | "a", "64", "accesses per thread before flushing"); 335 | 336 | KNOB KnobTurnOff (KNOB_MODE_WRITEONCE, "pintool", 337 | "t", "0", "turn off address locking for test (should produce incorrect results)"); 338 | 339 | KNOB KnobTimestampDifference (KNOB_MODE_WRITEONCE, "pintool", 340 | "d", "1000", "How far 2 threads can differ in timestamp before a thread attempts to flush the other's buffer and update its timestamp"); 341 | 342 | KNOB KnobCASFailureWrites(KNOB_MODE_WRITEONCE, "pintool", 343 | "c", "0", "log Compare-And-Swap as a write even when compare fails"); 344 | 345 | /* ===================================================================== */ 346 | /* Helper routines */ 347 | /* ===================================================================== */ 348 | 349 | // requires thread_start_fini_lock already held 350 | // acquires thread_start_fini_lock then all thread locks 351 | // returns max timestamp of all threads 352 | int64_t acquire_all_thread_locks() { 353 | set::iterator it; 354 | int64_t max_time = 0; 355 | for (it = pin_threadid_set.begin(); it != pin_threadid_set.end(); ++it) { 356 | thread_data_t *tdata = get_tls(*it); 357 | PIN_MutexLock(&tdata->thread_lock); 358 | if (tdata->lamport_timestamp > max_time) max_time = tdata->lamport_timestamp; 359 | } 360 | return max_time; 361 | } 362 | 363 | // requires thread_start_fini_lock already held 364 | // release all locks, first setting a new timestamp 365 | void release_all_thread_locks(int64_t new_timestamp) { 366 | set::iterator it; 367 | for (it = pin_threadid_set.begin(); it != pin_threadid_set.end(); ++it) { 368 | thread_data_t *tdata = get_tls(*it); 369 | tdata->lamport_timestamp = new_timestamp; 370 | PIN_MutexUnlock(&tdata->thread_lock); 371 | } 372 | } 373 | 374 | // given memory address and size of access, return the first lock index 375 | // if to_lock is not null set it to the number of locks that must be acquired 376 | int64_t lock_index(uint64_t address, uint64_t size, int64_t *to_lock) { 377 | uint64_t removed_blocks = address >> block_size_log; 378 | uint64_t end_removed_blocks = (address+size-1) >> block_size_log; 379 | int64_t number_to_lock = end_removed_blocks - removed_blocks + 1; 380 | // can lock at most num_locks 381 | if (number_to_lock > num_locks) number_to_lock = num_locks; 382 | if (to_lock) *to_lock = number_to_lock; 383 | return removed_blocks % num_locks; 384 | } 385 | 386 | // determine all unique lock_address indices that we must lock 387 | // lock them in index order 388 | // place these indices in index_lock_list 389 | // NOTE: I assume that interrupts cannot occur when this is called, 390 | // as asynchronous interrupts are delayed until the end of the trace, 391 | // and synchronous interrupts from the instrumented program occur 392 | // with/after that instruction. 393 | // See: http://tech.groups.yahoo.com/group/pinheads/message/7742 394 | // 395 | // This allows ctxt_change handler to assume that index_lock_list 396 | // is always consistent 397 | // 398 | // returns the max of the lamport timestamps on acquired locks 399 | int64_t acquire_address_locks(THREADID pin_threadid) { 400 | thread_data_t* tdata = get_tls(pin_threadid); 401 | int64_t to_lock; 402 | if (tdata->is_read) { 403 | int64_t index = lock_index(tdata->read1_address, tdata->read_size, &to_lock); 404 | for (int64_t i = 0; i < to_lock; ++i) { 405 | tdata->index_lock_list.push_back(index); 406 | index = (index+1)%num_locks; 407 | } 408 | } 409 | if (tdata->is_read2) { 410 | int64_t index = lock_index(tdata->read2_address, tdata->read_size, &to_lock); 411 | for (int64_t i = 0; i < to_lock; ++i) { 412 | tdata->index_lock_list.push_back(index); 413 | index = (index+1)%num_locks; 414 | } 415 | } 416 | if (tdata->is_write) { 417 | int64_t index = lock_index(tdata->write_address, tdata->write_size, &to_lock); 418 | for (int64_t i = 0; i < to_lock; ++i) { 419 | tdata->index_lock_list.push_back(index); 420 | index = (index+1)%num_locks; 421 | } 422 | } 423 | 424 | // at this point index_lock_list unsorted and may contain duplicates 425 | std::sort(tdata->index_lock_list.begin(), tdata->index_lock_list.end()); 426 | vector::iterator it; 427 | it = std::unique(tdata->index_lock_list.begin(), tdata->index_lock_list.end()); 428 | tdata->index_lock_list.resize(std::distance(tdata->index_lock_list.begin(), it)); 429 | 430 | int64_t max_time = 0; 431 | for (it = tdata->index_lock_list.begin(); it != tdata->index_lock_list.end(); ++it) { 432 | PIN_MutexLock(&address_lock_bank[*it].lock); 433 | max_time = max_time > address_lock_bank[*it].lamport_timestamp ? max_time : address_lock_bank[*it].lamport_timestamp; 434 | } 435 | return max_time; 436 | } 437 | 438 | // Release all locks in index_lock_list 439 | // set their timestamp from the thread's 440 | // clear the list 441 | void release_address_locks(THREADID pin_threadid) { 442 | thread_data_t* tdata = get_tls(pin_threadid); 443 | vector::iterator it; 444 | for (it = tdata->index_lock_list.begin(); it != tdata->index_lock_list.end(); ++it) { 445 | int64_t idx = (*it); 446 | address_lock_bank[idx].lamport_timestamp = tdata->lamport_timestamp; 447 | PIN_MutexUnlock(&address_lock_bank[idx].lock); 448 | } 449 | tdata->index_lock_list.clear(); 450 | } 451 | 452 | /* ===================================================================== */ 453 | /* Analysis routines */ 454 | /* these functions (in particular the memory ones) may hold address */ 455 | /* locks beyond the duration of the call */ 456 | /* ===================================================================== */ 457 | 458 | ////////////// 459 | // memory access functions 460 | ////////////// 461 | 462 | void memory_access_header_a(THREADID pin_threadid) { 463 | release_address_locks(pin_threadid); 464 | thread_data_t* tdata = get_tls(pin_threadid); 465 | 466 | int64_t threadid = tdata->user_threadid; 467 | { 468 | pin_critical_section CS(&tdata->thread_lock); 469 | bool do_trace = in_roi || !require_roi; 470 | tdata->trace_this_access = do_trace && threadid >= 0; 471 | } 472 | tdata->is_read = false; 473 | tdata->is_read2 = false; 474 | tdata->is_write = false; 475 | } 476 | 477 | void memory_access_read1_a( 478 | THREADID pin_threadid 479 | , ADDRINT address 480 | , UINT32 size 481 | ) { 482 | thread_data_t* tdata = get_tls(pin_threadid); 483 | if (tdata->trace_this_access) { 484 | tdata->is_read = true; 485 | tdata->read1_address = address; 486 | tdata->read_size = size; 487 | } 488 | } 489 | 490 | void memory_access_read2_a( 491 | THREADID pin_threadid 492 | , ADDRINT address 493 | ) { 494 | thread_data_t* tdata = get_tls(pin_threadid); 495 | if (tdata->trace_this_access) { 496 | tdata->is_read2 = true; 497 | tdata->read2_address = address; 498 | } 499 | } 500 | 501 | void memory_access_write_a( 502 | THREADID pin_threadid 503 | , ADDRINT address 504 | , UINT32 size 505 | ) { 506 | thread_data_t* tdata = get_tls(pin_threadid); 507 | if (tdata->trace_this_access) { 508 | tdata->is_write = true; 509 | tdata->write_address = address; 510 | tdata->write_size = size; 511 | } 512 | } 513 | 514 | void memory_access_acquire_locks_a(THREADID pin_threadid) { 515 | thread_data_t* tdata = get_tls(pin_threadid); 516 | if (tdata->trace_this_access) { 517 | // locks released and timestamps updated at next instruction/function 518 | if (turn_off_locks) { 519 | tdata->max_locked_timestamp = 0; 520 | } else { 521 | tdata->max_locked_timestamp = acquire_address_locks(pin_threadid); 522 | } 523 | } 524 | } 525 | 526 | void memory_access_footer_a(THREADID pin_threadid) { 527 | thread_data_t* tdata = get_tls(pin_threadid); 528 | // acquire necessary locks (global or addresses) and trace 529 | if (tdata->trace_this_access) { 530 | int64_t threadid = tdata->user_threadid; 531 | { 532 | pin_critical_section CS(&tdata->thread_lock); 533 | if (*static_cast(&tdata->trace_this_access)) { 534 | int64_t new_time = tdata->max_locked_timestamp; 535 | if (tdata->lamport_timestamp > new_time) new_time = tdata->lamport_timestamp; 536 | ++new_time; 537 | 538 | tdata->trace_stream << new_time << '\t' << threadid << "\tm"; 539 | if (tdata->is_read) { 540 | tdata->trace_stream << "\tr" << 541 | "\t" << tdata->read1_address << 542 | "\t" << tdata->read_size; 543 | } 544 | if (tdata->is_read2) { 545 | tdata->trace_stream << "\tr2" << 546 | "\t" << tdata->read2_address; 547 | } 548 | if (tdata->is_write) { 549 | tdata->trace_stream << "\tw" << 550 | "\t" << tdata->write_address << 551 | "\t" << tdata->write_size; 552 | } 553 | tdata->trace_stream << "\n"; 554 | tdata->touch_buffer(pin_threadid, new_time, false, &CS, false); 555 | } 556 | } 557 | } 558 | } 559 | 560 | // for any atomic RMW that might fail 561 | // on failure set tdata->is_write to false 562 | unsigned int zf_bit = 1 << 6; 563 | void memory_access_CAS_footer_a(THREADID pin_threadid, ADDRINT flags_reg) { 564 | thread_data_t* tdata = get_tls(pin_threadid); 565 | if (tdata->trace_this_access) { 566 | // CAS succeeded if Zero Flag is set, failed otherwise 567 | // ZF is bit 6 of eflags 568 | int result = flags_reg & zf_bit; 569 | assert(tdata->is_write); 570 | if (!result) { 571 | tdata->is_write = false; 572 | } 573 | } 574 | } 575 | 576 | // instructions that do not access memory should 577 | // still attempt to release locks 578 | void memory_access_release_a(THREADID pin_threadid) { 579 | release_address_locks(pin_threadid); 580 | } 581 | 582 | ////////////// 583 | // end memory access functions 584 | ////////////// 585 | 586 | 587 | void function_call_a( 588 | CHAR *name 589 | , THREADID pin_threadid 590 | , ADDRINT stack_pointer 591 | , ADDRINT arg1 592 | , ADDRINT arg2 593 | , ADDRINT arg3 594 | ) { 595 | release_address_locks(pin_threadid); 596 | // always trace regardless of thread or ROI 597 | // (may need to trace experiment startup before ROI or registered threads) 598 | thread_data_t *tdata = get_tls(pin_threadid); 599 | int64_t threadid = tdata->user_threadid; 600 | bool registered = threadid >= 0; 601 | int64_t time = -1; 602 | 603 | { 604 | pin_critical_section CS(&thread_start_fini_lock.lock, !registered); 605 | if (!registered) { 606 | time = acquire_all_thread_locks(); 607 | } 608 | 609 | { 610 | pin_critical_section CS(&tdata->thread_lock, registered); 611 | time = threadid < 0 ? time : tdata->lamport_timestamp; 612 | ++time; 613 | tdata->lamport_timestamp = time; 614 | 615 | tdata->trace_stream << 616 | time << '\t' << threadid << "\tfc" << 617 | "\t" << name << 618 | "\t" << stack_pointer << 619 | "\t" << arg1 << 620 | "\t" << arg2 << 621 | "\t" << arg3 << 622 | "\n"; 623 | tdata->touch_buffer(pin_threadid, time, !registered, &CS, !registered); 624 | } 625 | 626 | if (!registered) { 627 | release_all_thread_locks(time); 628 | } 629 | } 630 | } 631 | 632 | void function_return_a( 633 | CHAR *name 634 | , THREADID pin_threadid 635 | , ADDRINT stack_pointer 636 | , ADDRINT return_value 637 | ) { 638 | release_address_locks(pin_threadid); 639 | thread_data_t *tdata = get_tls(pin_threadid); 640 | int64_t threadid = tdata->user_threadid; 641 | bool registered = threadid >= 0; 642 | // always trace regardless of thread or ROI 643 | // (may need to trace experiment startup before ROI or registered threads) 644 | int64_t time = -1; 645 | 646 | { 647 | pin_critical_section CS(&thread_start_fini_lock.lock, !registered); 648 | if (!registered) { 649 | time = acquire_all_thread_locks(); 650 | } 651 | 652 | { 653 | pin_critical_section CS(&tdata->thread_lock, registered); 654 | time = threadid < 0 ? time : tdata->lamport_timestamp; 655 | ++time; 656 | 657 | tdata->trace_stream << 658 | time << '\t' << threadid << "\tfr" << 659 | "\t" << name << 660 | "\t" << stack_pointer << 661 | "\t" << return_value << 662 | "\n"; 663 | tdata->touch_buffer(pin_threadid, time, !registered, &CS, !registered); 664 | } 665 | 666 | if (!registered) { 667 | release_all_thread_locks(time); 668 | } 669 | } 670 | } 671 | 672 | // Always synchronize threads, even if this occurs on registered thread 673 | void change_roi_a(THREADID pin_threadid, bool new_roi, CHAR *change_to) { 674 | release_address_locks(pin_threadid); 675 | thread_data_t *tdata = get_tls(pin_threadid); 676 | 677 | { 678 | pin_critical_section CS(&thread_start_fini_lock.lock); 679 | int64_t time = acquire_all_thread_locks() + 1; 680 | in_roi = new_roi; 681 | tdata->trace_stream << time << '\t' << tdata->user_threadid << 682 | '\t' << change_to << "_roi\n"; 683 | tdata->touch_buffer(pin_threadid, time, true, NULL, true); 684 | release_all_thread_locks(time); 685 | } 686 | } 687 | 688 | /* ===================================================================== */ 689 | /* Callback routines */ 690 | /* these may not hold locks beyond duration of call */ 691 | /* ===================================================================== */ 692 | 693 | ////////////// 694 | // thread start and end tracing 695 | // 696 | // for consistent merging must hold all thread locks while forcing 697 | // thread registration to global file 698 | ////////////// 699 | 700 | // helper 701 | // must hold thread_start_fini_lock and all thread locks 702 | void trace_start_thread(THREADID pin_threadid, int64_t time) { 703 | thread_data_t *tdata = get_tls(pin_threadid); 704 | tdata->trace_stream << time << '\t' << tdata->user_threadid << "\ttr\n"; 705 | tdata->touch_buffer(pin_threadid, time, true, NULL, true); // force flush for merging 706 | } 707 | 708 | void register_thread_a(THREADID pin_threadid, ADDRINT user_threadid) { 709 | release_address_locks(pin_threadid); 710 | thread_data_t *tdata = get_tls(pin_threadid); 711 | if (register_threads) { 712 | pin_critical_section CS(&thread_start_fini_lock.lock); 713 | int64_t new_time = acquire_all_thread_locks() + 1; 714 | 715 | tdata->user_threadid = user_threadid; 716 | trace_start_thread(pin_threadid, new_time); 717 | 718 | release_all_thread_locks(new_time); 719 | } 720 | } 721 | 722 | void thread_start_a(THREADID threadid, CONTEXT *ctxt, INT32 flags, VOID *v) { 723 | thread_data_t *tdata = new thread_data_t(); 724 | { 725 | pin_critical_section CS(&thread_start_fini_lock.lock); 726 | ++num_threads; 727 | PIN_SetThreadData(tls_key, tdata, threadid); 728 | 729 | if (!register_threads) { 730 | tdata->user_threadid = PIN_ThreadUid(); 731 | // synchronize threads -- already holding thread_s/f lock 732 | int64_t start_timestamp = acquire_all_thread_locks() + 1; 733 | trace_start_thread(threadid, start_timestamp); 734 | release_all_thread_locks(start_timestamp); 735 | } else { 736 | pin_critical_section CS2(&tdata->thread_lock); 737 | tdata->lamport_timestamp = 0; 738 | } 739 | 740 | pin_threadid_set.insert(threadid); 741 | { 742 | pin_critical_section CS3(&file_lock.lock); 743 | last_flushed.left.insert(std::pair(threadid, 0)); 744 | } 745 | } 746 | } 747 | 748 | void thread_fini_a(THREADID threadid, const CONTEXT *ctxt, INT32 code, VOID *v) { 749 | release_address_locks(threadid); 750 | thread_data_t *tdata = get_tls(threadid); 751 | bool registered_thread = tdata->user_threadid >= 0; 752 | { 753 | pin_critical_section CS(&thread_start_fini_lock.lock); 754 | if (registered_thread) { 755 | // synchronize threads -- already holding thread_s/f lock 756 | int64_t timestamp = acquire_all_thread_locks() + 1; 757 | tdata->trace_stream << timestamp << '\t' << tdata->user_threadid << "\ttf\n"; 758 | tdata->touch_buffer(threadid, timestamp, true, NULL, true); // force flush for merging 759 | release_all_thread_locks(timestamp); 760 | } 761 | 762 | pin_threadid_set.erase(threadid); 763 | // always flush -- might be an unregistered thread with function traces 764 | { 765 | pin_critical_section CS2(&tdata->thread_lock); 766 | if (tdata->buffered_entries > 0) { 767 | pin_critical_section CS3(&file_lock.lock); 768 | trace_file << tdata->trace_stream.rdbuf(); 769 | assert(trace_file); 770 | tdata->trace_stream.str(std::string()); 771 | last_flushed.left.erase(threadid); 772 | } 773 | } 774 | delete get_tls(threadid); 775 | } 776 | } 777 | 778 | ////////////// 779 | // end thread start and end tracing 780 | ////////////// 781 | 782 | void fini_a(INT32 code, VOID *v) { 783 | // We do not have access to a THREADID, leading me to believe this callback 784 | // occurs only after all threads (including main thread) have joined 785 | // no need to release locks 786 | { 787 | pin_critical_section CS(&file_lock.lock); 788 | trace_file.close(); 789 | } 790 | } 791 | 792 | // only record if on a registered thread 793 | void ctxt_change_release( 794 | THREADID pin_threadid 795 | , CONTEXT_CHANGE_REASON reason 796 | , const CONTEXT *from 797 | , CONTEXT *to 798 | , INT32 info 799 | , VOID *v 800 | ) { 801 | release_address_locks(pin_threadid); 802 | thread_data_t *tdata = get_tls(pin_threadid); 803 | int64_t threadid = tdata->user_threadid; 804 | { 805 | pin_critical_section CS(&tdata->thread_lock); 806 | int64_t time = tdata->lamport_timestamp + 1; 807 | tdata->trace_stream 808 | << time << '\t' << threadid << "\tctxt_change\n"; 809 | tdata->touch_buffer(pin_threadid, time, false, &CS, false); 810 | } 811 | } 812 | 813 | /* ===================================================================== */ 814 | /* Instrumentation routines */ 815 | /* ===================================================================== */ 816 | 817 | VOID Image(IMG img, VOID *v) 818 | { 819 | vector* trace_functions = reinterpret_cast< vector* >(v); 820 | for (SEC sec = IMG_SecHead(img); SEC_Valid(sec); sec = SEC_Next(sec)) { 821 | for (RTN rtn = SEC_RtnHead(sec); RTN_Valid(rtn); rtn = RTN_Next(rtn)) { 822 | string und_func_name = PIN_UndecorateSymbolName(RTN_Name(rtn), UNDECORATION_NAME_ONLY); 823 | 824 | if (und_func_name == "atomic_trace::register_thread") { 825 | RTN_Open(rtn); 826 | RTN_InsertCall(rtn, IPOINT_BEFORE, (AFUNPTR)register_thread_a, 827 | IARG_THREAD_ID, 828 | IARG_FUNCARG_ENTRYPOINT_VALUE, 0, // arg1 -- user threadid 829 | IARG_END); 830 | RTN_Close(rtn); 831 | } 832 | if (und_func_name == "atomic_trace::start_roi") { 833 | RTN_Open(rtn); 834 | RTN_InsertCall(rtn, IPOINT_BEFORE, (AFUNPTR)change_roi_a 835 | , IARG_THREAD_ID 836 | , IARG_BOOL, true 837 | , IARG_ADDRINT, "start" 838 | , IARG_END); 839 | RTN_Close(rtn); 840 | } 841 | if (und_func_name == "atomic_trace::end_roi") { 842 | RTN_Open(rtn); 843 | RTN_InsertCall(rtn, IPOINT_BEFORE, (AFUNPTR)change_roi_a 844 | , IARG_THREAD_ID 845 | , IARG_BOOL, false 846 | , IARG_ADDRINT, "end" 847 | , IARG_END); 848 | RTN_Close(rtn); 849 | } 850 | 851 | // try to find this function in our list 852 | vector::iterator it = find(trace_functions->begin(), trace_functions->end(), und_func_name); 853 | if (it != trace_functions->end()) { 854 | const char *func_name = it->c_str(); 855 | RTN_Open(rtn); 856 | // call traces name, new stack pointer (after call), values of first three arguments 857 | RTN_InsertCall(rtn, IPOINT_BEFORE, (AFUNPTR)function_call_a, 858 | IARG_ADDRINT, func_name, 859 | IARG_THREAD_ID, 860 | IARG_REG_VALUE, REG_STACK_PTR, 861 | IARG_FUNCARG_ENTRYPOINT_VALUE, 0, 862 | IARG_FUNCARG_ENTRYPOINT_VALUE, 1, 863 | IARG_FUNCARG_ENTRYPOINT_VALUE, 2, 864 | IARG_END); 865 | 866 | // return traces name, old stack pointer (before return), value of return 867 | RTN_InsertCall(rtn, IPOINT_AFTER, (AFUNPTR)function_return_a, 868 | IARG_ADDRINT, func_name, 869 | IARG_THREAD_ID, 870 | IARG_REG_VALUE, REG_STACK_PTR, 871 | IARG_FUNCRET_EXITPOINT_VALUE, 872 | IARG_END); 873 | RTN_Close(rtn); 874 | } 875 | } 876 | } 877 | } 878 | 879 | VOID Instruction(INS ins, void * v) { 880 | if (INS_IsMemoryRead(ins) || INS_IsMemoryWrite(ins)) { 881 | INS_InsertPredicatedCall( 882 | ins, IPOINT_BEFORE, (AFUNPTR) memory_access_header_a 883 | , IARG_THREAD_ID 884 | , IARG_END 885 | ); 886 | 887 | if (INS_IsMemoryRead(ins)) { 888 | INS_InsertPredicatedCall( 889 | ins, IPOINT_BEFORE, (AFUNPTR) memory_access_read1_a 890 | , IARG_THREAD_ID 891 | , IARG_MEMORYREAD_EA 892 | , IARG_MEMORYREAD_SIZE 893 | , IARG_END 894 | ); 895 | } 896 | 897 | if (INS_HasMemoryRead2(ins)) { 898 | INS_InsertPredicatedCall( 899 | ins, IPOINT_BEFORE, (AFUNPTR) memory_access_read2_a 900 | , IARG_THREAD_ID 901 | , IARG_MEMORYREAD2_EA 902 | , IARG_END 903 | ); 904 | } 905 | 906 | if (INS_IsMemoryWrite(ins)) { 907 | INS_InsertPredicatedCall( 908 | ins, IPOINT_BEFORE, (AFUNPTR) memory_access_write_a 909 | , IARG_THREAD_ID 910 | , IARG_MEMORYWRITE_EA 911 | , IARG_MEMORYWRITE_SIZE 912 | , IARG_END 913 | ); 914 | } 915 | 916 | INS_InsertPredicatedCall( 917 | ins, IPOINT_BEFORE, (AFUNPTR) memory_access_acquire_locks_a 918 | , IARG_THREAD_ID 919 | , IARG_END 920 | ); 921 | 922 | OPCODE op = INS_Opcode(ins); 923 | bool cmpxchg = op == XED_ICLASS_CMPXCHG || op == XED_ICLASS_CMPXCHG16B || op == XED_ICLASS_CMPXCHG8B; 924 | // if atomic cas test for CAS success and trace AFTER instruction 925 | if (!log_cas_fails && INS_IsAtomicUpdate(ins) && cmpxchg) { 926 | assert(INS_HasFallThrough(ins)); 927 | assert(INS_IsMemoryWrite(ins)); 928 | INS_InsertPredicatedCall( 929 | ins, IPOINT_AFTER, (AFUNPTR) memory_access_CAS_footer_a 930 | , IARG_THREAD_ID 931 | // must use REG_RFLAGS (although it is not documented) 932 | // REG_EFLAGS and REG_FLAGS produce failures 933 | // see http://tech.groups.yahoo.com/group/pinheads/message/6581 934 | , IARG_REG_VALUE, REG_RFLAGS 935 | , IARG_END 936 | ); 937 | 938 | INS_InsertPredicatedCall( 939 | ins, IPOINT_AFTER, (AFUNPTR) memory_access_footer_a 940 | , IARG_THREAD_ID 941 | , IARG_END 942 | ); 943 | } else { // if not atomic CAS trace BEFORE instruction 944 | INS_InsertPredicatedCall( 945 | ins, IPOINT_BEFORE, (AFUNPTR) memory_access_footer_a 946 | , IARG_THREAD_ID 947 | , IARG_END 948 | ); 949 | } 950 | } else { 951 | INS_InsertPredicatedCall( 952 | ins, IPOINT_BEFORE, (AFUNPTR) memory_access_release_a 953 | , IARG_THREAD_ID 954 | , IARG_END 955 | ); 956 | } 957 | } 958 | 959 | /* ===================================================================== */ 960 | /* Print Help Message */ 961 | /* ===================================================================== */ 962 | 963 | INT32 Usage() 964 | { 965 | cerr << "This tool produces a consistent memory access trace and persistence annotation." << endl; 966 | cerr << endl << KNOB_BASE::StringKnobSummary() << endl; 967 | return -1; 968 | } 969 | 970 | /* ===================================================================== */ 971 | /* Helper functions for Main */ 972 | /* ===================================================================== */ 973 | 974 | vector* read_trace_functions(string file_name) { 975 | vector *vec = new vector; 976 | 977 | std::ifstream function_file(file_name.c_str()); 978 | string func; 979 | while (function_file.good()) { 980 | getline(function_file, func); 981 | boost::algorithm::trim(func); 982 | vec->push_back(func); 983 | } 984 | 985 | return vec; 986 | } 987 | 988 | bool is_power_2(int64_t num) { 989 | return ((num > 0) && !(num & (num - 1))); 990 | } 991 | 992 | int64_t binary_log(int64_t num) { 993 | if (num <= 0) return 0; 994 | uint64_t unum = num; 995 | int64_t l = 0; 996 | while (unum >>= 1) ++l; 997 | return l; 998 | } 999 | 1000 | /* ===================================================================== */ 1001 | /* Main */ 1002 | /* ===================================================================== */ 1003 | 1004 | int main(int argc, char *argv[]) 1005 | { 1006 | // Initialize pin & symbol manager 1007 | PIN_InitSymbols(); 1008 | if( PIN_Init(argc,argv) ) 1009 | { 1010 | return Usage(); 1011 | } 1012 | 1013 | register_threads = KnobRegisterThreads.Value(); 1014 | require_roi = KnobRequireROI.Value(); 1015 | log_cas_fails = KnobCASFailureWrites.Value(); 1016 | vector* trace_functions = read_trace_functions(KnobFunctionsFile.Value()); 1017 | 1018 | accesses_flush = KnobAccessesBeforeFlush.Value(); 1019 | 1020 | // these must both be a power of 2 1021 | num_locks = KnobNumAddressLocks.Value(); 1022 | assert(is_power_2(num_locks)); 1023 | block_size = KnobBlockSize.Value(); 1024 | assert(is_power_2(block_size)); 1025 | block_size_log = binary_log(block_size); 1026 | turn_off_locks = KnobTurnOff.Value(); 1027 | timestamp_difference = KnobTimestampDifference.Value(); 1028 | 1029 | address_lock_bank = new address_lock_t [num_locks]; 1030 | for (int64_t i = 0; i < num_locks; ++i) { 1031 | address_lock_bank[i].lamport_timestamp = 0; 1032 | PIN_MutexInit(&address_lock_bank[i].lock); 1033 | } 1034 | tls_key = PIN_CreateThreadDataKey(0); 1035 | PIN_MutexInit(&file_lock.lock); 1036 | PIN_MutexInit(&thread_start_fini_lock.lock); 1037 | 1038 | last_flushed_cache = 0; 1039 | 1040 | // Write to a file since cout and cerr maybe closed by the application 1041 | trace_file.open(KnobOutputFile.Value().c_str()); 1042 | trace_file << dec; 1043 | trace_file.setf(ios::showbase); 1044 | 1045 | // Register Image to be called to instrument functions. 1046 | IMG_AddInstrumentFunction(Image, trace_functions); 1047 | INS_AddInstrumentFunction(Instruction, 0); 1048 | 1049 | PIN_AddThreadStartFunction(thread_start_a, 0); 1050 | PIN_AddThreadFiniFunction(thread_fini_a, 0); 1051 | PIN_AddFiniFunction(fini_a, 0); 1052 | PIN_AddContextChangeFunction(ctxt_change_release, 0); 1053 | 1054 | // Never returns 1055 | PIN_StartProgram(); 1056 | 1057 | return 0; 1058 | } 1059 | 1060 | --------------------------------------------------------------------------------