├── src
    ├── func_list
    ├── annotation.cpp
    ├── annotation.h
    ├── SConstruct
    ├── AtomicTrace.py
    ├── drain_buffer.cpp
    ├── inc_sim.py
    ├── inc_sim_cas.py
    ├── inc.cpp
    ├── inc_cas.cpp
    └── merge.cpp
├── merge_test
    ├── my_sort.sh
    ├── inc_test.sh
    ├── README.md
    ├── Test.py
    └── CompareMerge.py
├── trace
    ├── makefile
    ├── makefile.rules
    └── trace.cpp
└── README.md


/src/func_list:
--------------------------------------------------------------------------------
1 | atomic_trace::special_malloc
2 | atomic_trace::special_free
3 | 


--------------------------------------------------------------------------------
/merge_test/my_sort.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | sort -k 1 -n memory_trace.out | sed '/thread_sync/d'
3 | 


--------------------------------------------------------------------------------
/merge_test/inc_test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | ~/pin/pin -t ~/work/atomic-memory-trace/trace/obj-intel64/trace.so -f ~/work/atomic-memory-trace/src/func_list -i 1 -r 1 -- ~/work/atomic-memory-trace/src/inc 5 1000
3 | 


--------------------------------------------------------------------------------
/merge_test/README.md:
--------------------------------------------------------------------------------
 1 | Merge Test
 2 | ----------
 3 | 
 4 | The provided merge utility is useful for streaming/piping a
 5 | pintool-provided memory trace directly to a simulation application.
 6 | 
 7 | This merge test compares the output of the merge to the linux
 8 | "sort" utility (which I assume will be correct).
 9 | A proper comparison requires that the lamport timestamps be included
10 | in both the merge output and sort output.
11 | Additionally, trace entries with equal timestamps may correctly
12 | appear in any order, and this order may differ between the merge
13 | and sort outputs.  Therefore, we chech that identical entries
14 | appear with the same timestamp.
15 | 


--------------------------------------------------------------------------------
/trace/makefile:
--------------------------------------------------------------------------------
 1 | ##############################################################
 2 | #
 3 | #                   DO NOT EDIT THIS FILE!
 4 | #
 5 | ##############################################################
 6 | 
 7 | # If the tool is built out of the kit, PIN_ROOT must be specified in the make invocation and point to the kit root.
 8 | ifdef PIN_ROOT
 9 | CONFIG_ROOT := $(PIN_ROOT)/source/tools/Config
10 | else
11 | CONFIG_ROOT := ../Config
12 | endif
13 | include $(CONFIG_ROOT)/makefile.config
14 | include makefile.rules
15 | include $(TOOLS_ROOT)/Config/makefile.default.rules
16 | 
17 | ##############################################################
18 | #
19 | #                   DO NOT EDIT THIS FILE!
20 | #
21 | ##############################################################
22 | 


--------------------------------------------------------------------------------
/src/annotation.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2013 Steven Pelley
 2 | // 
 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy of
 4 | // this software and associated documentation files (the "Software"), to deal in
 5 | // the Software without restriction, including without limitation the rights to
 6 | // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 7 | // the Software, and to permit persons to whom the Software is furnished to do so, 
 8 | // subject to the following conditions:
 9 | // 
10 | // The above copyright notice and this permission notice shall be included in all 
11 | // copies or substantial portions of the Software.
12 | // 
13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
15 | // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
16 | // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
17 | // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18 | // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19 | 
20 | #include "annotation.h"
21 | 
22 | // outside of header to make sure functions are not in-lined
23 | 
24 | void* atomic_trace::special_malloc(size_t size) {
25 |   return malloc(size);
26 | }
27 | 
28 | void atomic_trace::special_free(void *addr) {
29 |   free(addr);
30 | }
31 | 
32 | void atomic_trace::register_thread(int64_t thread_num) {
33 | }
34 | 
35 | void atomic_trace::start_roi() {}
36 | void atomic_trace::end_roi() {}
37 | 


--------------------------------------------------------------------------------
/src/annotation.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2013 Steven Pelley
 2 | // 
 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy of
 4 | // this software and associated documentation files (the "Software"), to deal in
 5 | // the Software without restriction, including without limitation the rights to
 6 | // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 7 | // the Software, and to permit persons to whom the Software is furnished to do so, 
 8 | // subject to the following conditions:
 9 | // 
10 | // The above copyright notice and this permission notice shall be included in all 
11 | // copies or substantial portions of the Software.
12 | // 
13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
15 | // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
16 | // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
17 | // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18 | // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19 | 
20 | #ifndef PMC_ATOMIC_TRACE_H
21 | #define PMC_ATOMIC_TRACE_H
22 | 
23 | // provide annotation for persistent memory traces
24 | // need general trace annotation (threads, region of interest)
25 | // persistent memory regions (pers_malloc)
26 | // and persist barriers
27 | 
28 | #include <stdlib.h>
29 | #include <stdint.h>
30 | 
31 | namespace atomic_trace {
32 | 
33 | void* special_malloc(size_t size);
34 | void special_free(void *addr);
35 | void register_thread(int64_t thread_num);
36 | void start_roi();
37 | void end_roi();
38 | }
39 | 
40 | #endif
41 | 


--------------------------------------------------------------------------------
/src/SConstruct:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # Copyright (c) 2013 Steven Pelley
 3 | # 
 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy of
 5 | # this software and associated documentation files (the "Software"), to deal in
 6 | # the Software without restriction, including without limitation the rights to
 7 | # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 8 | # the Software, and to permit persons to whom the Software is furnished to do so, 
 9 | # subject to the following conditions:
10 | # 
11 | # The above copyright notice and this permission notice shall be included in all 
12 | # copies or substantial portions of the Software.
13 | # 
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
16 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
17 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
18 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 | 
21 | env = Environment()
22 | # for clang color if using gnu screen or terminal emulation
23 | import os
24 | env['ENV']['TERM'] = os.environ['TERM']
25 | 
26 | debug = ARGUMENTS.get('debug', 0)
27 | if int(debug):
28 |   env.Append(CCFLAGS = ['-g', '-O0'])
29 | 
30 | cppdefines = []
31 | for key, value in ARGLIST:
32 |   if key == 'define':
33 |     cppdefines.append(value)
34 |     env.Append(CPPDEFINES=cppdefines)
35 | 
36 | env_annotation = env.Clone()
37 | annotation_lib = env_annotation.Library('annotation', 'annotation.cpp')
38 | 
39 | env_inc = env.Clone()
40 | env_inc.Append(CCFLAGS = '-pthread', LIBS=['pthread', annotation_lib])
41 | env_inc.Program('inc.cpp')
42 | 
43 | env_inc_cas = env.Clone()
44 | env_inc_cas.Append(CCFLAGS = '-pthread', LIBS=['pthread', annotation_lib])
45 | env_inc_cas.Program('inc_cas.cpp')
46 | 
47 | env_merge = env.Clone()
48 | env_merge.Program(['merge.cpp'])
49 | 
50 | env_drain_buffer = env.Clone()
51 | env_drain_buffer.Append(CCFLAGS = '-pthread', LIBS=['pthread'])
52 | env_drain_buffer.Program(['drain_buffer.cpp'])
53 | 


--------------------------------------------------------------------------------
/merge_test/Test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # repeatedly test merge utility for given pintool command
 3 | # inputs:
 4 | #   script that produces output trace in memory_trace.out
 5 | #     for now the script must use fully qualified names of files
 6 | #   number of attempts
 7 | #
 8 | # for each output file that merge fails (either merge process fails)
 9 | # of output of merge and sort differs)
10 | # produce a directory with the original memory_trace.out,
11 | # sort.out, and merge.out
12 | 
13 | # return True if success, False if some failure
14 | # if fails produce a 'fail' file with some output related to failure
15 | #
16 | # test file  and merge utility must be fully qualified names
17 | def test1(test_file, sort_utility, merge_utility):
18 |   import subprocess
19 |   import shlex
20 | 
21 |   fout = open('command_out', 'w')
22 |   ret = subprocess.call(shlex.split(test_file), stdout=fout, stderr=subprocess.STDOUT)
23 |   assert ret == 0
24 |   fout.close()
25 | 
26 |   # memory_trace.out now exists
27 |   # first sort with linux sort utility
28 |   fout = open('memory_trace.sort', 'w')
29 |   ret = subprocess.call(shlex.split(sort_utility), stdout=fout)
30 |   assert ret == 0
31 |   fout.close()
32 | 
33 |   # now run merge, capturing stderr in a file
34 |   fin = open('memory_trace.out')
35 |   fout = open('memory_trace.merge', 'w')
36 |   ferr = open('merge_err', 'w')
37 |   ret = subprocess.call(shlex.split(merge_utility + " -t"), stdin = fin, stdout=fout, stderr=ferr)
38 |   fin.close()
39 |   fout.close()
40 |   ferr.close()
41 | 
42 |   if ret != 0:
43 |     return False
44 | 
45 |   f1 = open('memory_trace.sort')
46 |   f2 = open('memory_trace.merge')
47 | 
48 |   eq_ret = CompareMerge.compare(f1, f2)
49 |   f1.close()
50 |   f2.close()
51 |   if not eq_ret[0]:
52 |     fout = open('merge_results')
53 |     fout.write("files differ:\n")
54 |     fout.write("file {}:\ttime: {}\tline {}\n".format(f1_name, out[1], out[3]))
55 |     fout.write("file {}:\ttime: {}\tline {}\n".format(f2_name, out[3], out[4]))
56 |     fout.close()
57 | 
58 |   return eq_ret[0]
59 | 
60 | if __name__=="__main__":
61 |   import CompareMerge
62 |   import sys
63 |   import os.path
64 |   import shutil
65 | 
66 |   num_test = int(sys.argv[1])
67 |   test_file = os.path.abspath(sys.argv[2])
68 |   sort_utility = os.path.abspath('my_sort.sh')
69 |   merge_utility = os.path.abspath('../src/merge')
70 | 
71 |   if os.path.exists('compare_test'):
72 |     shutil.rmtree('compare_test')
73 |   os.mkdir('compare_test')
74 |   os.chdir('compare_test')
75 | 
76 |   file_count = 1
77 |   for i in range(num_test):
78 |     dir_name = 'test{}'.format(file_count)
79 |     os.mkdir(dir_name)
80 |     os.chdir(dir_name)
81 | 
82 |     succ = test1(test_file, sort_utility, merge_utility)
83 |     os.chdir('..')
84 |     if not succ:
85 |       file_count += 1
86 |     else:
87 |       shutil.rmtree(dir_name)
88 |   fail_count = file_count - 1
89 |   print "failures:\t{}".format(fail_count)
90 | 
91 | 


--------------------------------------------------------------------------------
/merge_test/CompareMerge.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # compare the 2 input files
  3 | # if not identical find the first lines where they differ
  4 | # as well as the lamport timestamp
  5 | 
  6 | # return
  7 | # (True,) if they are equal and
  8 | #
  9 | # (
 10 | #   False,
 11 | #   f1 timestamp where first differ,
 12 | #   f2 timestamp where first differ,
 13 | #   f1 line,
 14 | #   f2 line
 15 | # )
 16 | # otherwise
 17 | def compare(f1, f2, failassert=False):
 18 |   # initialize by reading first lines
 19 |   f1_line = 1
 20 |   f2_line = 1
 21 | 
 22 |   f1_last_line = f1.readline()
 23 |   if f1_last_line != "":
 24 |     f1_time = get_time(f1_last_line)
 25 |   f2_last_line = f2.readline()
 26 |   if f2_last_line != "":
 27 |     f2_time = get_time(f2_last_line)
 28 | 
 29 |   f1_time = 0
 30 |   f2_time = 0
 31 | 
 32 |   while True:
 33 |     # read all lines from each file that have the next timestamp
 34 |     # check that the timestamp is the same for both files
 35 |     # and check that the set of lines with this timestamp are
 36 |     # identical (but different orders allowed)
 37 | 
 38 |     f1_cont = f1_last_line != ""
 39 |     f2_cont = f2_last_line != ""
 40 |     if f1_cont != f2_cont:
 41 |       fail(failassert)
 42 |       return (False, f1_time, f2_time, f1_line, f2_line,)
 43 |     elif (not f1_cont and not f2_cont):
 44 |       return (True,)
 45 |     # else both are continue and last_lines contain valid strings
 46 | 
 47 |     (bunch1, f1_last_line, f1_new_time, f1_line) = \
 48 |       create_bunch(f1, f1_last_line, f1_line, failassert)
 49 | 
 50 |     (bunch2, f2_last_line, f2_new_time, f2_line) = \
 51 |       create_bunch(f2, f2_last_line, f2_line, failassert)
 52 | 
 53 |     if (f1_new_time <= f1_time or
 54 |         f2_new_time <= f2_time or
 55 |         bunch1 != bunch2 or
 56 |         f1_new_time != f2_new_time
 57 |        ):
 58 |       fail(failassert)
 59 |       return (False, f1_time, f2_time, f1_line, f2_line,)
 60 |     f1_time = f1_new_time
 61 |     f2_time = f2_new_time
 62 | 
 63 |   assert False, "fell through without break?"
 64 | 
 65 | # create the next bunch
 66 | # return
 67 | #   (tuple bunch, new last_line, bunch's time, new line_num,)
 68 | #
 69 | # if last_line == "" then we have reached new line
 70 | # so return (True, (), "", 0, same line_num)
 71 | def create_bunch(f, last_line, last_line_num, failassert):
 72 |   if last_line == "":
 73 |     return ((), "", 0, last_line_num)
 74 |   bunch_time = get_time(last_line)
 75 |   
 76 |   line_num = last_line_num
 77 |   bunch = [last_line]
 78 |   while True: # do while
 79 |     line_num += 1
 80 |     last_line = f.readline()
 81 |     if last_line == "":
 82 |       break
 83 |     new_time = get_time(last_line)
 84 |     if new_time != bunch_time:
 85 |       break
 86 |     bunch.append(last_line)
 87 |   return (tuple(sorted(bunch)), last_line, bunch_time, line_num)
 88 | 
 89 | def fail(failassert):
 90 |   if failassert:
 91 |     assert False
 92 | 
 93 | def get_time(s):
 94 |   return int(s.split('\t', 1)[0])
 95 |   
 96 | if __name__=="__main__":
 97 |   import sys
 98 |   f1_name = sys.argv[1]
 99 |   f1 = open(f1_name)
100 |   f2_name = sys.argv[2]
101 |   f2 = open(f2_name)
102 | 
103 |   out = compare(f1, f2, True)
104 | 
105 |   f1.close()
106 |   f2.close()
107 | 
108 |   if out[0]:
109 |     print "equal"
110 |     sys.exit(0)
111 |   else:
112 |     print "files differ:"
113 |     print "file {}:\ttime: {}\tline {}".format(f1_name, out[1], out[3])
114 |     print "file {}:\ttime: {}\tline {}".format(f2_name, out[3], out[4])
115 |     sys.exit(1)
116 | 


--------------------------------------------------------------------------------
/src/AtomicTrace.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2013 Steven Pelley
  2 | # 
  3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of
  4 | # this software and associated documentation files (the "Software"), to deal in
  5 | # the Software without restriction, including without limitation the rights to
  6 | # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
  7 | # the Software, and to permit persons to whom the Software is furnished to do so, 
  8 | # subject to the following conditions:
  9 | # 
 10 | # The above copyright notice and this permission notice shall be included in all 
 11 | # copies or substantial portions of the Software.
 12 | # 
 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
 15 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
 16 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 17 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 18 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 19 | 
 20 | # framework for running atomic memory trace simulations
 21 | # takes in a trace file and a simulation object
 22 | # simulation object provides callback function and state for the sim
 23 | 
 24 | import re
 25 | 
 26 | class Trace:
 27 | 
 28 |   # caller is responsible for initializing sim
 29 |   def __init__(self, trace_file, sim):
 30 |     self._sim = sim
 31 |     self._trace_file = trace_file 
 32 | 
 33 |   def run(self):
 34 |     # need to exist before callback
 35 |     read_1_address = 0
 36 |     read_size      = 0
 37 |     read_2_address = 0
 38 |     write_address  = 0
 39 |     write_size     = 0
 40 | 
 41 |     for i,l in enumerate(self._trace_file):
 42 |       l = l.strip()
 43 |       l_list = l.split('\t')
 44 |       threadid = int(l_list[0])
 45 |       operation = l_list[1]
 46 | 
 47 |       # memory
 48 |       if operation == 'm':
 49 |         have_read_1 = len(l_list) > 2 and l_list[2] == 'r'
 50 |         have_read_2 = have_read_1 and len(l_list) > 5 and l_list[5] == 'r2'
 51 |         have_write = l_list[-3] == 'w'
 52 |         if have_read_1:
 53 |           read_1_address = int(l_list[3])
 54 |           read_size = int(l_list[4])
 55 |         if have_read_2:
 56 |           read_2_address = int(l_list[6])
 57 |         if have_write:
 58 |           write_address = int(l_list[-2])
 59 |           write_size = int(l_list[-1])
 60 |         self._sim.memory_access(
 61 |             i
 62 |           , threadid
 63 |           , have_read_1
 64 |           , have_read_2
 65 |           , have_write
 66 |           , read_1_address
 67 |           , read_size
 68 |           , read_2_address
 69 |           , write_address
 70 |           , write_size
 71 |         )
 72 | 
 73 |       # thread register
 74 |       elif operation == 'tr':
 75 |         self._sim.start_thread(i, threadid)
 76 | 
 77 |       # thread finish
 78 |       elif operation == 'tf':
 79 |         self._sim.finish_thread(i, threadid)
 80 | 
 81 |       # function call
 82 |       elif operation == 'fc':
 83 |         func_name = l_list[2]
 84 |         stack_pointer = int(l_list[3])
 85 |         arg1 = int(l_list[4])
 86 |         arg2 = int(l_list[5])
 87 |         arg3 = int(l_list[6])
 88 |         self._sim.function_call(i, func_name, threadid, stack_pointer, arg1, arg2, arg3)
 89 | 
 90 |       # function return
 91 |       elif operation == 'fr':
 92 |         func_name = l_list[2]
 93 |         stack_pointer = int(l_list[3])
 94 |         return_value = int(l_list[4])
 95 |         self._sim.function_return(i, func_name, threadid, stack_pointer, return_value)
 96 | 
 97 |       # start ROI
 98 |       elif operation == 'start_roi':
 99 |         self._sim.start_roi(i)
100 | 
101 |       # end ROI
102 |       elif operation == 'end_roi':
103 |         self._sim.end_roi(i)
104 | 
105 |       # change context
106 |       elif operation == 'ctxt_change':
107 |         self._sim.end_roi(i, threadid)
108 | 
109 | 


--------------------------------------------------------------------------------
/src/drain_buffer.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2013 Steven Pelley
  2 | // 
  3 | // Permission is hereby granted, free of charge, to any person obtaining a copy of
  4 | // this software and associated documentation files (the "Software"), to deal in
  5 | // the Software without restriction, including without limitation the rights to
  6 | // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
  7 | // the Software, and to permit persons to whom the Software is furnished to do so, 
  8 | // subject to the following conditions:
  9 | // 
 10 | // The above copyright notice and this permission notice shall be included in all 
 11 | // copies or substantial portions of the Software.
 12 | // 
 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
 15 | // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
 16 | // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 17 | // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 18 | // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 19 | //
 20 | // drain_buffer.cpp
 21 | //
 22 | // the atomic tracer tends to block on writing to the file handle with
 23 | // all other threads blocking on the file handle lock.
 24 | // The result is that one thread tends to run at a time, interfering with
 25 | // thread interleaving.  Instead, we would like to buffer output to memory
 26 | // and then completely block while draining the buffer.  We can double
 27 | // buffer so that one buffer fills while the next is draining
 28 | 
 29 | #include <pthread.h>
 30 | #include <iostream>
 31 | #include <stdio.h>
 32 | #include <assert.h>
 33 | #include <stdint.h>
 34 | #include <stdlib.h>
 35 | 
 36 | struct buffer_t {
 37 |   char *buf;
 38 |   int64_t capacity;
 39 |   int64_t end_cursor; // next location to insert
 40 | 
 41 |   char pad [64 - ( sizeof(char*) + 2*sizeof(int64_t) )];
 42 | };
 43 | 
 44 | // shared
 45 | buffer_t bufs [2];
 46 | pthread_barrier_t *bar;
 47 | bool eof;
 48 | 
 49 | // read from std in to the buffer
 50 | // until the buffer fills or reach EOF
 51 | // use (iteration % 2) buffer
 52 | void* reader(void *ptr) {
 53 |   setvbuf(stdin, (char*)NULL, _IOFBF, 1 << 24);
 54 |   for (int64_t i = 0; ; ++i) {
 55 |     int64_t idx = i % 2;
 56 |     buffer_t *buf = &bufs[idx];
 57 |     buf->end_cursor = 0;
 58 | 
 59 |     while (buf->end_cursor < buf->capacity && !feof(stdin)) {
 60 |       int64_t size = fread(&buf->buf[buf->end_cursor], 1, buf->capacity-buf->end_cursor, stdin);
 61 |       buf->end_cursor += size;
 62 |       assert(buf->end_cursor <= buf->capacity);
 63 |     }
 64 | 
 65 |     pthread_barrier_wait(bar);
 66 |     eof = feof(stdin);
 67 |     pthread_barrier_wait(bar);
 68 |     if (eof) return NULL;
 69 |   }
 70 | }
 71 | 
 72 | // drain the buffer, writing to std out from buffer
 73 | // use (iteration + 1) % 2 buffer
 74 | void* writer(void *ptr) {
 75 |   setvbuf(stdout, (char*)NULL, _IOFBF, 1 << 24);
 76 |   for (int64_t i = 0; ; ++i) {
 77 |     int64_t idx = (i+1) % 2;
 78 |     buffer_t *buf = &bufs[idx];
 79 | 
 80 |     int64_t start = 0;
 81 |     while (start < buf->end_cursor) {
 82 |       int64_t size = fwrite(&buf->buf[start], 1, buf->end_cursor-start, stdout);
 83 |       start += size;
 84 |     }
 85 |     assert(start == buf->end_cursor);
 86 |     if (eof) return NULL;
 87 | 
 88 |     // sync and make sure eof is consistent
 89 |     pthread_barrier_wait(bar);
 90 |     pthread_barrier_wait(bar);
 91 |   }
 92 | }
 93 | 
 94 | int main(int argc, char** argv) {
 95 |   if (argc != 2) {
 96 |     std::cout << "usage: drain_buffer <total_capacity>" << std::endl;
 97 |     assert(false);
 98 |   }
 99 |   int64_t total_capacity = atol(argv[1]);
100 |   int64_t buffer_capacity = total_capacity / 2;
101 |   assert(buffer_capacity > 0);
102 | 
103 |   eof = false;
104 |   bar = new pthread_barrier_t();
105 |   pthread_barrier_init(bar, NULL, 2);
106 |   for (int64_t i = 0; i < 2; ++i) {
107 |     bufs[i].buf = new char [buffer_capacity];
108 |     bufs[i].capacity = buffer_capacity;
109 |     bufs[i].end_cursor = 0;
110 |   }
111 | 
112 |   pthread_t read_thread, write_thread;
113 |   assert(pthread_create(&read_thread, NULL, reader, NULL) == 0);
114 |   assert(pthread_create(&write_thread, NULL, writer, NULL) == 0);
115 | 
116 |   assert(pthread_join(read_thread, NULL) == 0);
117 |   assert(pthread_join(write_thread, NULL) == 0);
118 | 
119 |   delete bar;
120 |   for (int64_t i = 0; i < 2; ++i) {
121 |     delete [] bufs[i].buf;
122 |   }
123 | }
124 | 


--------------------------------------------------------------------------------
/src/inc_sim.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # Copyright (c) 2013 Steven Pelley
  3 | # 
  4 | # Permission is hereby granted, free of charge, to any person obtaining a copy of
  5 | # this software and associated documentation files (the "Software"), to deal in
  6 | # the Software without restriction, including without limitation the rights to
  7 | # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
  8 | # the Software, and to permit persons to whom the Software is furnished to do so, 
  9 | # subject to the following conditions:
 10 | # 
 11 | # The above copyright notice and this permission notice shall be included in all 
 12 | # copies or substantial portions of the Software.
 13 | # 
 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
 16 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
 17 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 18 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 19 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 20 | 
 21 | # analyse the inc trace.
 22 | # assert:
 23 | #   exactly 8 bytes of persistent memory are accessed
 24 | #   each access to this persistent memory a read and write (atomic inc)
 25 | #
 26 | # compute and print:
 27 | #   for each registered thread the number of successful increments to counter
 28 | #   assuming counter is initialized to 0 compute the number of old value-even accesses
 29 | #
 30 | # print out any context changes as these may cause inconsistencies
 31 | 
 32 | class my_sim:
 33 |   # constants
 34 |   set_read1_write = set(['read1', 'write'])
 35 | 
 36 |   def __init__(self):
 37 |     self._special_address = None
 38 |     self._special_size = 0
 39 |     self._special_value = 0
 40 | 
 41 |     self._active_threads = set()
 42 |     import collections
 43 |     # threadid to increment
 44 |     self._increments = collections.defaultdict(int)
 45 |     self._even_increments = collections.defaultdict(int)
 46 | 
 47 |   def memory_access(self, line_num, threadid, have_read_1, have_read_2, have_write, read_1_address, read_size, read_2_address, write_address, write_size):
 48 |     if not self._special_address:
 49 |       return
 50 | 
 51 |     # atomic inc to special address?
 52 |     if (
 53 |        # Read-Modify-Write (atomic inc)
 54 |          have_read_1 and not have_read_2 and have_write and 
 55 |          read_1_address == write_address and
 56 |          read_size == write_size
 57 |        ) and (
 58 |        # matches special address
 59 |          (write_address < self._special_address and write_address + write_size >= self._special_address) or
 60 |          (write_address >= self._special_address and write_address <= self._special_address + self._special_size)
 61 |        ):
 62 |       # check that we touch exactly the first 8 bytes
 63 |       assert write_address == self._special_address and write_size == 8
 64 |       if self._special_value % 2 == 0:
 65 |         self._even_increments[threadid] += 1
 66 |       self._increments[threadid] += 1
 67 |       self._special_value += 1
 68 | 
 69 |   def start_thread(self, line_num, threadid):
 70 |     assert threadid not in self._active_threads
 71 |     self._active_threads.add(threadid)
 72 | 
 73 |   def finish_thread(self, line_num, threadid):
 74 |     assert threadid in self._active_threads
 75 |     self._active_threads.remove(threadid)
 76 |   
 77 |   def function_call(self, line_num, name, threadid, stack_pointer, arg1, arg2, arg3):
 78 |     if name == "atomic_trace::special_malloc":
 79 |       assert not self._special_address
 80 |       assert self._special_size == 0
 81 |       self._special_size = arg1
 82 |     elif name == "atomic_trace::special_free":
 83 |       pass
 84 |     else:
 85 |       assert False
 86 | 
 87 |   def function_return(self, line_num, name, threadid, stack_pointer, return_value):
 88 |     if name == "atomic_trace::special_malloc":
 89 |       assert not self._special_address
 90 |       assert self._special_size >= 8
 91 |       self._special_address = return_value
 92 |     elif name == "atomic_trace::special_free":
 93 |       pass
 94 |     else:
 95 |       assert False
 96 | 
 97 |   def start_roi(self, line_num):
 98 |     pass
 99 | 
100 |   def end_roi(self, line_num):
101 |     pass
102 | 
103 |   def ctxt_change(self, line_num, threadid):
104 |     print("context change!  Line {}".format(line_num))
105 | 
106 | def main():
107 |   import argparse
108 |   parser = argparse.ArgumentParser(description='test inc atomic trace simulation')
109 |   parser.add_argument('--infile', default="")
110 |   args = parser.parse_args()
111 | 
112 |   import sys
113 |   if len(args.infile) == 0:
114 |     fin = sys.stdin
115 |   else:
116 |     fin = open(args.infile)
117 | 
118 |   import AtomicTrace
119 | 
120 |   sim = my_sim()
121 |   trace = AtomicTrace.Trace(fin, sim)
122 |   trace.run()
123 | 
124 |   # recreate the program output
125 |   print("final counter value: {}".format(sim._special_value))
126 |   for threadid in sorted(sim._increments.keys()):
127 |     print("thread\t{}\t{}\t{}".format(threadid, sim._increments[threadid], sim._even_increments[threadid]))
128 | 
129 | if __name__=="__main__":
130 |   main()
131 | 


--------------------------------------------------------------------------------
/src/inc_sim_cas.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # Copyright (c) 2013 Steven Pelley
  3 | # 
  4 | # Permission is hereby granted, free of charge, to any person obtaining a copy of
  5 | # this software and associated documentation files (the "Software"), to deal in
  6 | # the Software without restriction, including without limitation the rights to
  7 | # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
  8 | # the Software, and to permit persons to whom the Software is furnished to do so, 
  9 | # subject to the following conditions:
 10 | # 
 11 | # The above copyright notice and this permission notice shall be included in all 
 12 | # copies or substantial portions of the Software.
 13 | # 
 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
 16 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
 17 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 18 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 19 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 20 | 
 21 | # analyse the inc trace.
 22 | # assert:
 23 | #   exactly 8 bytes of persistent memory are accessed
 24 | #   each access to this persistent memory is either a read or a read and write
 25 | #     (inc here is implemented with CAS)
 26 | #
 27 | # compute and print:
 28 | #   for each registered thread the number of writes to counter (should be number of incs)
 29 | #   for each registered thread the number of reads to counter
 30 | #   -- each inc will read once before the CAS, read as part of the successful CAS
 31 | #      and then once for every failure
 32 | #
 33 | # print out any context changes as these may cause inconsistencies
 34 | 
 35 | class my_sim:
 36 |   def __init__(self):
 37 |     self._special_address = None
 38 |     self._special_size = 0
 39 |     self._special_value = 0
 40 | 
 41 |     self._active_threads = set()
 42 |     import collections
 43 |     # threadid to increment
 44 |     self._increments = collections.defaultdict(int)
 45 |     self._reads = collections.defaultdict(int)
 46 | 
 47 |   def memory_access(self, line_num, threadid, have_read_1, have_read_2, have_write, read_1_address, read_size, read_2_address, write_address, write_size):
 48 |     if not self._special_address:
 49 |       return
 50 | 
 51 |     read1 = have_read_1 and not have_read_2 and not have_write
 52 | 
 53 |     RMW = (
 54 |       have_read_1 and not have_read_2 and have_write and 
 55 |       read_1_address == write_address and
 56 |       read_size == write_size
 57 |     )
 58 | 
 59 |     read1_special = ( have_read_1 and (
 60 |       (read_1_address < self._special_address and read_1_address + read_size >= self._special_address) or
 61 |       (read_1_address >= self._special_address and read_1_address <= self._special_address + self._special_size)
 62 |     ) )
 63 | 
 64 |     read_first_8 = have_read_1 and read_1_address == self._special_address and read_size == 8
 65 | 
 66 |     # atomic inc to special address?
 67 |     if RMW and read1_special:
 68 |       # check that we touch exactly the first 8 bytes
 69 |       assert read_first_8 
 70 |       self._increments[threadid] += 1
 71 |       self._special_value += 1
 72 |       self._reads[threadid] += 1
 73 |     elif read1 and read1_special:
 74 |       self._reads[threadid] += 1
 75 | 
 76 |   def start_thread(self, line_num, threadid):
 77 |     assert threadid not in self._active_threads
 78 |     self._active_threads.add(threadid)
 79 | 
 80 |   def finish_thread(self, line_num, threadid):
 81 |     assert threadid in self._active_threads
 82 |     self._active_threads.remove(threadid)
 83 |   
 84 |   def function_call(self, line_num, name, threadid, stack_pointer, arg1, arg2, arg3):
 85 |     if name == "atomic_trace::special_malloc":
 86 |       assert not self._special_address
 87 |       assert self._special_size == 0
 88 |       self._special_size = arg1
 89 |     elif name == "atomic_trace::special_free":
 90 |       pass
 91 |     else:
 92 |       assert False
 93 | 
 94 |   def function_return(self, line_num, name, threadid, stack_pointer, return_value):
 95 |     if name == "atomic_trace::special_malloc":
 96 |       assert not self._special_address
 97 |       assert self._special_size >= 8
 98 |       self._special_address = return_value
 99 |     elif name == "atomic_trace::special_free":
100 |       pass
101 |     else:
102 |       assert False
103 | 
104 |   def start_roi(self, line_num):
105 |     pass
106 | 
107 |   def end_roi(self, line_num):
108 |     pass
109 | 
110 |   def ctxt_change(self, line_num, threadid):
111 |     print("context change!  Line {}".format(line_num))
112 | 
113 | def main():
114 |   import argparse
115 |   parser = argparse.ArgumentParser(description='test inc atomic trace simulation')
116 |   parser.add_argument('--infile', default="")
117 |   args = parser.parse_args()
118 | 
119 |   import sys
120 |   if len(args.infile) == 0:
121 |     fin = sys.stdin
122 |   else:
123 |     fin = open(args.infile)
124 | 
125 |   import AtomicTrace
126 | 
127 |   sim = my_sim()
128 |   trace = AtomicTrace.Trace(fin, sim)
129 |   trace.run()
130 | 
131 |   # recreate the program output
132 |   print("final counter value: {}".format(sim._special_value))
133 |   for threadid in sorted(sim._increments.keys()):
134 |     print("thread\t{}\t{}".format(threadid, sim._increments[threadid]))
135 | 
136 |   print("fails: {}".format(sum(sim._reads.values())-(2*sim._special_value)))
137 | 
138 | if __name__=="__main__":
139 |   main()
140 | 


--------------------------------------------------------------------------------
/src/inc.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2013 Steven Pelley
  2 | // 
  3 | // Permission is hereby granted, free of charge, to any person obtaining a copy of
  4 | // this software and associated documentation files (the "Software"), to deal in
  5 | // the Software without restriction, including without limitation the rights to
  6 | // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
  7 | // the Software, and to permit persons to whom the Software is furnished to do so, 
  8 | // subject to the following conditions:
  9 | // 
 10 | // The above copyright notice and this permission notice shall be included in all 
 11 | // copies or substantial portions of the Software.
 12 | // 
 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
 15 | // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
 16 | // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 17 | // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 18 | // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 19 | 
 20 | // atomic inc
 21 | //
 22 | // given a single shared int variable have a number of threads:
 23 | // atomic_inc the variable, possibly waiting a small amount of time afterwards
 24 | // each thread should count the number of times the variable was even pre-inc
 25 | //
 26 | // all traced accesses to the counter should be atomic_fetchadd_long
 27 | // Use the memory trace to determine how many pre-inc evens were observed from each thread
 28 | // if number of pre-inc evens matches between the trace and the application output we have
 29 | // strong confidence that the trace is correct and atomic
 30 | 
 31 | #include <pthread.h>
 32 | #include <iostream>
 33 | #include <assert.h>
 34 | #include <stdint.h>
 35 | #include "annotation.h"
 36 | 
 37 | static __inline u_long
 38 | atomic_fetchadd_long(volatile u_long *p, u_long v) {
 39 |    __asm __volatile(
 40 |    "       lock ;                  "
 41 |    "       xaddq   %0,%1 ;         "
 42 |    "# atomic_fetchadd_long"
 43 |    : "+r" (v),                     /* 0 */
 44 |    "+m" (*p)                     /* 1 */
 45 |    : : "cc");
 46 |    return (v);
 47 | }
 48 | 
 49 | struct thread_data_t {
 50 |   pthread_barrier_t *bar1;
 51 |   pthread_barrier_t *bar2;
 52 |   pthread_barrier_t *bar3;
 53 |   pthread_barrier_t *bar4;
 54 |   uint64_t *shared;
 55 |   int64_t delay;
 56 | 
 57 |   int64_t threadid;
 58 |   int64_t count;
 59 |   int64_t even_count;
 60 |   int64_t stop_count;
 61 | };
 62 | 
 63 | struct counter_w {
 64 |   uint64_t counter;
 65 |   char padding [56];
 66 | };
 67 | 
 68 | void* thread_incs(void *ptr) {
 69 |   thread_data_t* tdata = reinterpret_cast<thread_data_t*>(ptr);
 70 |   uint64_t old_val = 0;
 71 | 
 72 |   atomic_trace::register_thread(tdata->threadid);
 73 |   pthread_barrier_wait(tdata->bar1);
 74 |   // ROI begins here
 75 |   pthread_barrier_wait(tdata->bar2);
 76 | 
 77 |   do {
 78 |     old_val = __sync_fetch_and_add(tdata->shared, 1);
 79 |     if (old_val % 2 == 0) { // inc'ed on even
 80 |       ++tdata->even_count;
 81 |     }
 82 |     ++tdata->count;
 83 |   } while (old_val < tdata->stop_count);
 84 | 
 85 |   pthread_barrier_wait(tdata->bar3);
 86 |   // ROI ends here
 87 |   pthread_barrier_wait(tdata->bar4);
 88 |   return NULL;
 89 | }
 90 | 
 91 | int main(int argc, char** argv) {
 92 |   int64_t num_threads;
 93 |   int64_t to_insert_total;
 94 |   int64_t delay;
 95 | 
 96 |   assert(argc == 3);
 97 |   num_threads = atoi(argv[1]);
 98 |   to_insert_total = atoi(argv[2]);
 99 | 
100 |   assert(num_threads > 0);
101 | 
102 |   // threads will each increment once beyond to_insert_total
103 |   // so subtract the number of threads from the insert total
104 |   assert(to_insert_total > num_threads);
105 |   to_insert_total -= num_threads;
106 | 
107 |   counter_w *counter = reinterpret_cast<counter_w*>(atomic_trace::special_malloc(sizeof(counter_w)));
108 |   counter->counter = 0;
109 | 
110 |   pthread_barrier_t *bar1 = new pthread_barrier_t();
111 |   pthread_barrier_t *bar2 = new pthread_barrier_t();
112 |   pthread_barrier_t *bar3 = new pthread_barrier_t();
113 |   pthread_barrier_t *bar4 = new pthread_barrier_t();
114 |   pthread_barrier_init(bar1, NULL, num_threads+1);
115 |   pthread_barrier_init(bar2, NULL, num_threads+1);
116 |   pthread_barrier_init(bar3, NULL, num_threads+1);
117 |   pthread_barrier_init(bar4, NULL, num_threads+1);
118 |   thread_data_t *tdata = new thread_data_t[num_threads];
119 |   pthread_t *threads = new pthread_t[num_threads];
120 | 
121 |   srand(time(NULL));
122 | 
123 |   for (int64_t i = 0; i < num_threads; ++i) {
124 |     tdata[i].threadid = i;
125 |     tdata[i].bar1 = bar1;
126 |     tdata[i].bar2 = bar2;
127 |     tdata[i].bar3 = bar3;
128 |     tdata[i].bar4 = bar4;
129 |     tdata[i].shared = &counter->counter;
130 |     tdata[i].delay = delay;
131 |     tdata[i].count = 0;
132 |     tdata[i].even_count = 0;
133 |     tdata[i].stop_count = to_insert_total;
134 | 
135 |     uint64_t ret = pthread_create(&threads[i], NULL, thread_incs, (void*) &tdata[i]);
136 |   }
137 | 
138 |   pthread_barrier_wait(bar1);
139 |   atomic_trace::start_roi();
140 |   pthread_barrier_wait(bar2);
141 | 
142 |   pthread_barrier_wait(bar3);
143 |   atomic_trace::end_roi();
144 |   pthread_barrier_wait(bar4);
145 | 
146 |   for (int64_t i = 0; i < num_threads; ++i) {
147 |     uint64_t ret = pthread_join(threads[i], NULL);
148 |   }
149 | 
150 |   assert(counter->counter == to_insert_total+num_threads);
151 | 
152 |   // print out the count and even count for each thread
153 |   for (int64_t i = 0; i < num_threads; ++i) {
154 |     std::cout << "thread " << i << "\t" << tdata[i].count << "\t" << tdata[i].even_count << std::endl;
155 |   }
156 | 
157 |   delete [] threads;
158 |   delete [] tdata;
159 |   delete bar1;
160 |   delete bar2;
161 |   delete bar3;
162 |   delete bar4;
163 | 
164 |   atomic_trace::special_free(counter);
165 | }
166 | 


--------------------------------------------------------------------------------
/src/inc_cas.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2013 Steven Pelley
  2 | // 
  3 | // Permission is hereby granted, free of charge, to any person obtaining a copy of
  4 | // this software and associated documentation files (the "Software"), to deal in
  5 | // the Software without restriction, including without limitation the rights to
  6 | // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
  7 | // the Software, and to permit persons to whom the Software is furnished to do so, 
  8 | // subject to the following conditions:
  9 | // 
 10 | // The above copyright notice and this permission notice shall be included in all 
 11 | // copies or substantial portions of the Software.
 12 | // 
 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
 15 | // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
 16 | // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 17 | // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 18 | // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 19 | 
 20 | // atomic inc with CAS
 21 | //
 22 | // given a single shared int variable have a number of threads:
 23 | // atomic_inc the variable, possibly waiting a small amount of time afterwards
 24 | // each thread should count the number of times the variable was even pre-inc
 25 | // as well as the number of failed CASes
 26 | //
 27 | // the number of simulated writes to the special memory should be equal to
 28 | // the total number of increments.  Verify that this is true when not
 29 | // tracing CAS fails, and that it breaks when you do log CAS fails as writes
 30 | 
 31 | #include <pthread.h>
 32 | #include <iostream>
 33 | #include <assert.h>
 34 | #include <stdint.h>
 35 | #include "annotation.h"
 36 | 
 37 | struct thread_data_t {
 38 |   pthread_barrier_t *bar1;
 39 |   pthread_barrier_t *bar2;
 40 |   pthread_barrier_t *bar3;
 41 |   pthread_barrier_t *bar4;
 42 |   uint64_t *shared;
 43 |   int64_t delay;
 44 | 
 45 |   int64_t threadid;
 46 |   int64_t count;
 47 |   int64_t even_count;
 48 |   int64_t stop_count;
 49 |   int64_t fail_count;
 50 | };
 51 | 
 52 | struct counter_w {
 53 |   uint64_t counter;
 54 |   char padding [56];
 55 | };
 56 | 
 57 | // returns the pre-inc value
 58 | // increments *fails for every CAS failure 
 59 | uint64_t inc_with_CAS(uint64_t *counter, int64_t *fails) {
 60 |   bool first = true;
 61 |   bool succ = false;
 62 |   uint64_t orig = *counter;
 63 |   uint64_t read_val;
 64 |   while (!succ) {
 65 |     if (!first) {
 66 |       *fails += 1;
 67 |     }
 68 |     first = false;
 69 |     read_val = __sync_val_compare_and_swap(counter, orig, orig+1);
 70 |     succ = read_val == orig;
 71 |     orig = read_val;
 72 |   }
 73 |   return orig;
 74 | }
 75 | 
 76 | void* thread_incs(void *ptr) {
 77 |   thread_data_t* tdata = reinterpret_cast<thread_data_t*>(ptr);
 78 |   uint64_t old_val = 0;
 79 | 
 80 |   atomic_trace::register_thread(tdata->threadid);
 81 |   pthread_barrier_wait(tdata->bar1);
 82 |   // ROI begins here
 83 |   pthread_barrier_wait(tdata->bar2);
 84 | 
 85 |   do {
 86 |     old_val = inc_with_CAS(tdata->shared, &tdata->fail_count);
 87 |     if (old_val % 2 == 0) { // inc'ed on even
 88 |       ++tdata->even_count;
 89 |     }
 90 |     ++tdata->count;
 91 |   } while (old_val < tdata->stop_count);
 92 | 
 93 |   pthread_barrier_wait(tdata->bar3);
 94 |   // ROI ends here
 95 |   pthread_barrier_wait(tdata->bar4);
 96 |   return NULL;
 97 | }
 98 | 
 99 | int main(int argc, char** argv) {
100 |   int64_t num_threads;
101 |   int64_t to_insert_total;
102 |   int64_t delay;
103 | 
104 |   assert(argc == 3);
105 |   num_threads = atoi(argv[1]);
106 |   to_insert_total = atoi(argv[2]);
107 | 
108 |   assert(num_threads > 0);
109 | 
110 |   // threads will each increment once beyond to_insert_total
111 |   // so subtract the number of threads from the insert total
112 |   assert(to_insert_total > num_threads);
113 |   to_insert_total -= num_threads;
114 | 
115 |   counter_w *counter = reinterpret_cast<counter_w*>(atomic_trace::special_malloc(sizeof(counter_w)));
116 |   counter->counter = 0;
117 | 
118 |   pthread_barrier_t *bar1 = new pthread_barrier_t();
119 |   pthread_barrier_t *bar2 = new pthread_barrier_t();
120 |   pthread_barrier_t *bar3 = new pthread_barrier_t();
121 |   pthread_barrier_t *bar4 = new pthread_barrier_t();
122 |   pthread_barrier_init(bar1, NULL, num_threads+1);
123 |   pthread_barrier_init(bar2, NULL, num_threads+1);
124 |   pthread_barrier_init(bar3, NULL, num_threads+1);
125 |   pthread_barrier_init(bar4, NULL, num_threads+1);
126 |   thread_data_t *tdata = new thread_data_t[num_threads];
127 |   pthread_t *threads = new pthread_t[num_threads];
128 | 
129 |   srand(time(NULL));
130 | 
131 |   for (int64_t i = 0; i < num_threads; ++i) {
132 |     tdata[i].threadid = i;
133 |     tdata[i].bar1 = bar1;
134 |     tdata[i].bar2 = bar2;
135 |     tdata[i].bar3 = bar3;
136 |     tdata[i].bar4 = bar4;
137 |     tdata[i].shared = &counter->counter;
138 |     tdata[i].delay = delay;
139 |     tdata[i].count = 0;
140 |     tdata[i].even_count = 0;
141 |     tdata[i].stop_count = to_insert_total;
142 |     tdata[i].fail_count = 0;
143 | 
144 |     uint64_t ret = pthread_create(&threads[i], NULL, thread_incs, (void*) &tdata[i]);
145 |   }
146 | 
147 |   pthread_barrier_wait(bar1);
148 |   atomic_trace::start_roi();
149 |   pthread_barrier_wait(bar2);
150 | 
151 |   pthread_barrier_wait(bar3);
152 |   atomic_trace::end_roi();
153 |   pthread_barrier_wait(bar4);
154 | 
155 |   for (int64_t i = 0; i < num_threads; ++i) {
156 |     uint64_t ret = pthread_join(threads[i], NULL);
157 |   }
158 | 
159 |   assert(counter->counter == to_insert_total+num_threads);
160 | 
161 |   // print out the count and even count for each thread
162 |   int64_t cas_fails = 0;
163 |   for (int64_t i = 0; i < num_threads; ++i) {
164 |     std::cout << "thread " << i << "\t" << tdata[i].count << "\t" << tdata[i].even_count << "\t" << tdata[i].fail_count << std::endl;
165 |     cas_fails += tdata[i].fail_count;
166 |   }
167 | 
168 |   std::cout << "CAS fails: " << cas_fails << std::endl;
169 | 
170 |   delete [] threads;
171 |   delete [] tdata;
172 |   delete bar1;
173 |   delete bar2;
174 |   delete bar3;
175 |   delete bar4;
176 | 
177 |   atomic_trace::special_free(counter);
178 | }
179 | 


--------------------------------------------------------------------------------
/trace/makefile.rules:
--------------------------------------------------------------------------------
  1 | ##############################################################
  2 | #
  3 | # This file includes all the test targets as well as all the
  4 | # non-default build rules and test recipes.
  5 | #
  6 | ##############################################################
  7 | 
  8 | ###### Additional includes that are specific to this directory ######
  9 | 
 10 | # Placeholder for additional include files.
 11 | 
 12 | 
 13 | ##############################################################
 14 | #
 15 | # Test targets
 16 | #
 17 | ##############################################################
 18 | 
 19 | ###### Place all generic definitions here ######
 20 | 
 21 | # This defines tests which run tools of the same name.  This is simply for convenience to avoid
 22 | # defining the test name twice (once in TOOL_ROOTS and again in TEST_ROOTS).
 23 | # Tests defined here should not be defined in TOOL_ROOTS and TEST_ROOTS.
 24 | TEST_TOOL_ROOTS := trace
 25 | 
 26 | # This defines the tests to be run that were not already defined in TEST_TOOL_ROOTS.
 27 | TEST_ROOTS :=
 28 | 
 29 | # This defines a list of tests that should run in the "short" sanity. Tests in this list must also
 30 | # appear either in the TEST_TOOL_ROOTS or the TEST_ROOTS list.
 31 | # If the entire directory should be tested in sanity, assign TEST_TOOL_ROOTS and TEST_ROOTS to the
 32 | # SANITY_SUBSET variable in the tests section below (see example in makefile.rules.tmpl).
 33 | SANITY_SUBSET :=
 34 | 
 35 | # This defines the tools which will be run during the the tests, and were not already defined in
 36 | # TEST_TOOL_ROOTS.
 37 | TOOL_ROOTS :=
 38 | 
 39 | # This defines the static analysis tools which will be run during the the tests. They should not
 40 | # be defined in TEST_TOOL_ROOTS. If a test with the same name exists, it should be defined in
 41 | # TEST_ROOTS.
 42 | # Note: Static analysis tools are in fact executables linked with the Pin Static Analysis Library.
 43 | # This library provides a subset of the Pin APIs which allows the tool to perform static analysis
 44 | # of an application or dll. Pin itself is not used when this tool runs.
 45 | SA_TOOL_ROOTS :=
 46 | 
 47 | # This defines all the applications that will be run during the tests.
 48 | APP_ROOTS :=
 49 | 
 50 | # This defines any additional object files that need to be compiled.
 51 | OBJECT_ROOTS :=
 52 | 
 53 | # This defines any additional dlls (shared objects), other than the pintools, that need to be compiled.
 54 | DLL_ROOTS :=
 55 | 
 56 | # This defines any static libraries (archives), that need to be built.
 57 | LIB_ROOTS :=
 58 | 
 59 | ###### Place architecture-specific definitions here ######
 60 | 
 61 | # Place ia32-specific definitions here if they apply to all supported operating systems.
 62 | ifeq ($(TARGET),ia32)
 63 |     TEST_TOOL_ROOTS +=
 64 |     TEST_ROOTS +=
 65 |     SANITY_SUBSET +=
 66 |     TOOL_ROOTS +=
 67 |     SA_TOOL_ROOTS +=
 68 |     APP_ROOTS +=
 69 |     OBJECT_ROOTS +=
 70 |     DLL_ROOTS +=
 71 |     LIB_ROOTS +=
 72 | endif
 73 | 
 74 | # Place intel64-specific definitions here if they apply to all supported operating systems.
 75 | ifeq ($(TARGET),intel64)
 76 |     TEST_TOOL_ROOTS +=
 77 |     TEST_ROOTS +=
 78 |     SANITY_SUBSET +=
 79 |     TOOL_ROOTS +=
 80 |     SA_TOOL_ROOTS +=
 81 |     APP_ROOTS +=
 82 |     OBJECT_ROOTS +=
 83 |     DLL_ROOTS +=
 84 |     LIB_ROOTS +=
 85 | endif
 86 | 
 87 | ###### Place probe mode tests here ######
 88 | 
 89 | ifeq ($(PROBE),1)
 90 |     TEST_TOOL_ROOTS +=
 91 |     TEST_ROOTS +=
 92 |     SANITY_SUBSET +=
 93 |     TOOL_ROOTS +=
 94 |     APP_ROOTS +=
 95 |     OBJECT_ROOTS +=
 96 |     DLL_ROOTS +=
 97 |     LIB_ROOTS +=
 98 | endif
 99 | 
100 | ###### Place OS-specific definitions here ######
101 | 
102 | # Android
103 | ifeq ($(TARGET_OS),android)
104 |     TEST_TOOL_ROOTS +=
105 |     TEST_ROOTS +=
106 |     SANITY_SUBSET +=
107 |     TOOL_ROOTS +=
108 |     SA_TOOL_ROOTS +=
109 |     APP_ROOTS +=
110 |     OBJECT_ROOTS +=
111 |     DLL_ROOTS +=
112 |     LIB_ROOTS +=
113 |     ifeq ($(TARGET),ia32)
114 |         TEST_TOOL_ROOTS +=
115 |         TEST_ROOTS +=
116 |         SANITY_SUBSET +=
117 |         TOOL_ROOTS +=
118 |         SA_TOOL_ROOTS +=
119 |         APP_ROOTS +=
120 |         OBJECT_ROOTS +=
121 |         DLL_ROOTS +=
122 |         LIB_ROOTS +=
123 |     endif
124 |     ifeq ($(TARGET),intel64)
125 |         TEST_TOOL_ROOTS +=
126 |         TEST_ROOTS +=
127 |         SANITY_SUBSET +=
128 |         TOOL_ROOTS +=
129 |         SA_TOOL_ROOTS +=
130 |         APP_ROOTS +=
131 |         OBJECT_ROOTS +=
132 |         DLL_ROOTS +=
133 |         LIB_ROOTS +=
134 |     endif
135 | endif
136 | 
137 | # Linux
138 | ifeq ($(TARGET_OS),linux)
139 |     TEST_TOOL_ROOTS +=
140 |     TEST_ROOTS +=
141 |     SANITY_SUBSET +=
142 |     TOOL_ROOTS +=
143 |     SA_TOOL_ROOTS +=
144 |     APP_ROOTS +=
145 |     OBJECT_ROOTS +=
146 |     DLL_ROOTS +=
147 |     LIB_ROOTS +=
148 |     ifeq ($(TARGET),ia32)
149 |         TEST_TOOL_ROOTS +=
150 |         TEST_ROOTS +=
151 |         SANITY_SUBSET +=
152 |         TOOL_ROOTS +=
153 |         SA_TOOL_ROOTS +=
154 |         APP_ROOTS +=
155 |         OBJECT_ROOTS +=
156 |         DLL_ROOTS +=
157 |         LIB_ROOTS +=
158 |     endif
159 |     ifeq ($(TARGET),intel64)
160 |         TEST_TOOL_ROOTS +=
161 |         TEST_ROOTS +=
162 |         SANITY_SUBSET +=
163 |         TOOL_ROOTS +=
164 |         SA_TOOL_ROOTS +=
165 |         APP_ROOTS +=
166 |         OBJECT_ROOTS +=
167 |         DLL_ROOTS +=
168 |         LIB_ROOTS +=
169 |     endif
170 | endif
171 | 
172 | # Mac
173 | ifeq ($(TARGET_OS),mac)
174 |     TEST_TOOL_ROOTS +=
175 |     TEST_ROOTS +=
176 |     SANITY_SUBSET +=
177 |     TOOL_ROOTS +=
178 |     SA_TOOL_ROOTS +=
179 |     APP_ROOTS +=
180 |     OBJECT_ROOTS +=
181 |     DLL_ROOTS +=
182 |     LIB_ROOTS +=
183 |     ifeq ($(TARGET),ia32)
184 |         TEST_TOOL_ROOTS +=
185 |         TEST_ROOTS +=
186 |         SANITY_SUBSET +=
187 |         TOOL_ROOTS +=
188 |         SA_TOOL_ROOTS +=
189 |         APP_ROOTS +=
190 |         OBJECT_ROOTS +=
191 |         DLL_ROOTS +=
192 |         LIB_ROOTS +=
193 |     endif
194 |     ifeq ($(TARGET),intel64)
195 |         TEST_TOOL_ROOTS +=
196 |         TEST_ROOTS +=
197 |         SANITY_SUBSET +=
198 |         TOOL_ROOTS +=
199 |         SA_TOOL_ROOTS +=
200 |         APP_ROOTS +=
201 |         OBJECT_ROOTS +=
202 |         DLL_ROOTS +=
203 |         LIB_ROOTS +=
204 |     endif
205 | endif
206 | 
207 | # Windows
208 | ifeq ($(TARGET_OS),windows)
209 |     TEST_TOOL_ROOTS +=
210 |     TEST_ROOTS +=
211 |     SANITY_SUBSET +=
212 |     TOOL_ROOTS +=
213 |     SA_TOOL_ROOTS +=
214 |     APP_ROOTS +=
215 |     OBJECT_ROOTS +=
216 |     DLL_ROOTS +=
217 |     LIB_ROOTS +=
218 |     ifeq ($(TARGET),ia32)
219 |         TEST_TOOL_ROOTS +=
220 |         TEST_ROOTS +=
221 |         SANITY_SUBSET +=
222 |         TOOL_ROOTS +=
223 |         SA_TOOL_ROOTS +=
224 |         APP_ROOTS +=
225 |         OBJECT_ROOTS +=
226 |         DLL_ROOTS +=
227 |         LIB_ROOTS +=
228 |     endif
229 |     ifeq ($(TARGET),intel64)
230 |         TEST_TOOL_ROOTS +=
231 |         TEST_ROOTS +=
232 |         SANITY_SUBSET +=
233 |         TOOL_ROOTS +=
234 |         SA_TOOL_ROOTS +=
235 |         APP_ROOTS +=
236 |         OBJECT_ROOTS +=
237 |         DLL_ROOTS +=
238 |         LIB_ROOTS +=
239 |     endif
240 | endif
241 | 
242 | ###### Handle exceptions here ######
243 | 
244 | # If some tests need to be disabled, do this here
245 | 
246 | 
247 | ##############################################################
248 | #
249 | # Test recipes
250 | #
251 | ##############################################################
252 | 
253 | ###### Finalize sanity here ######
254 | 
255 | # If all tests in this directory should run in sanity, uncomment the following line.
256 | # SANITY_SUBSET := $(TEST_TOOL_ROOTS) $(TEST_ROOTS)
257 | 
258 | # This section contains recipes for tests other than the default.
259 | # See makefile.default.rules for the default test rules.
260 | # All tests in this section should adhere to the naming convention: <testname>.test
261 | 
262 | 
263 | ##############################################################
264 | #
265 | # Build rules
266 | #
267 | ##############################################################
268 | 
269 | # This section contains the build rules for all binaries that have special build rules.
270 | # See makefile.default.rules for the default build rules.
271 | 
272 | ###### Special tools' build rules ######
273 | 
274 | # placeholder for special tools' build rules
275 | 
276 | ###### Special applications' build rules ######
277 | 
278 | # placeholder for special applications' build rules
279 | 
280 | ###### Special objects' build rules ######
281 | 
282 | # placeholder for special objects' build rules
283 | 
284 | ###### Special dlls' build rules ######
285 | 
286 | # placeholder for special dlls' build rules
287 | 
288 | ###### Special libs' build rules ######
289 | 
290 | # placeholder for special libs' build rules
291 | 
292 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | atomic-memory-trace
  2 | ===================
  3 | 
  4 | PIN-tool to produce multi-threaded atomic memory traces
  5 | 
  6 | PIN is a useful tool for instrumenting applications and easily producing memory
  7 | access traces.  However, tracing memory accesses from multiple threads suffers
  8 | from the atomic instrumentation problem -- instructions responsible for 
  9 | tracing/logging an access happen separately from that access.  Races between
 10 | threads may result in a different order being traced than actually occurs.
 11 | This tool provides atomic instrumentation by simulating cache coherence.  In
 12 | addition, the tool will trace thread start/end, an optional region of interest,
 13 | and user-provided fuction calls and returns.
 14 | 
 15 | The primary alternative to this tool is architectural simulation.  Most
 16 | simulators are complicated to learn, complicated to use (getting OSes and 
 17 | workloads running properly may be difficult), and slow (most simulators are 
 18 | single threaded and cannot leverage multithreading to produce a faster
 19 | trace/simulation).
 20 | 
 21 | This README documents the tracing pintool and example test case.  This tool was 
 22 | developed using verion 2.12-58423 of PIN on Ubuntu 12.04.  There are no plans
 23 | to support operating systems other than Linux or systems other than x86_64.
 24 | The pintool relies on the boots libraries.  This software comes with no support
 25 | but may be useful to others.  This project uses the MIT license.
 26 | 
 27 | Quick Start
 28 | ===========
 29 | 
 30 | build the pintool
 31 | Change into the trace directory
 32 | 
 33 | ```
 34 | % make PIN_ROOT=<your pin root>
 35 | ```
 36 | 
 37 | Run the tool as any other pintool:
 38 | 
 39 | ```
 40 | % pin -t trace/obj-intel64/trace.so -- <your program>
 41 | ```
 42 | 
 43 | By default, output appears in the file memory_trace.out.  All threads and 
 44 | memory accesses will be traced.  The output appears with one event (thread
 45 | start, function call, or memory access) per line, starting with an arbitrary
 46 | timestamp (used to merge events later.
 47 | 
 48 | Any easy way to produce useful output, sorting by timestamp and then stripping
 49 | timestamps away is to use:
 50 | 
 51 | ```
 52 | % sort -k 1 -n memory_trace.out | sed '/thread_sync/d' | awk 'BEGIN {OFS="\t"}; {$1="";sub("\t\t","")}1' > memory_trace.clean
 53 | ```
 54 | 
 55 | memory_trace.clean will contain properly ordered events and accesses and remove
 56 | sync entries
 57 | 
 58 | Tool Options
 59 | ============
 60 | 
 61 | * -o  
 62 |     The output file name.  By default is 'memory_trace.out'
 63 | * -r  
 64 |     Do threads need to be registered?  If 0/false all memory accesses from all
 65 |     threads will be traced.  If 1/true only accesses from registered threads will
 66 |     be traced.  See annotation's atomic_trace::register_thread(threadid).
 67 | * -f  
 68 |     File with list of functions to trace
 69 | * -i  
 70 |     Use Region of Interest?  If ROI is used memory tracing will only occur while
 71 |     the ROI is active.  Thread start/stop tracing and function tracing will
 72 |     always occur.
 73 | * -l  
 74 |     Number of locks for simulated cache coherence.  Increasing this number will
 75 |     use more memory and may hurt cache performance but will improve concurrency.
 76 |     If contention occurs for specific address locks (i.e. cache lines) try
 77 |     increasing this.  A value of 1 serializes all memory accesses across threads.
 78 | * -b  
 79 |     Cache block size.  By default 64 bytes.
 80 | * -a  
 81 |     Accesses per thread before flushing.  Each thread keeps a local trace buffer
 82 |     that is occasionally flushed to the global file.  More accesses per thread
 83 |     ensures that threads grabbing the global lock does not become the primary
 84 |     bottleneck.
 85 | * -t  
 86 |     Test.  Turns off address locking, breaking atomicity.  Activate this flag to
 87 |     see the instrumentation atomicity problem.
 88 | * -d  
 89 |     Allowable timestamp difference.  If threads diverge in timestamps by beyond
 90 |     this limit threads will synch and flush other threads.  This makes merging
 91 |     the output significantly easier.
 92 | * -c
 93 |     Trace Failed Compare-And-Swaps.  Default 0 (no).  Generally every CAS
 94 |     instruction is considered a write, even when the instruction fails.
 95 |     Use this option to only log CAS as writes when they succeed.
 96 | 
 97 | Output Format
 98 | =============
 99 | 
100 | Each line contains one event as a tab delimited list.  Entries contain
101 | threadids which may be -1 (if threads must be registered but a traced function
102 | is called from an unregistered thread), assigned by the pintool if threads are
103 | not required to be registered, or set by the registration function (described
104 | later).  All entries start with a timestamp and threadid:
105 | 
106 | * memory: Each instruction may read two addresses and write one.  There are
107 | possible sub-entries for each of these accesses.  The second read does not
108 | contain a size field as it may only occur with the first read and have the same
109 | size (that is, r2's size is the same as r's).
110 | 
111 | ```
112 | <timestamp> <thread> m [r <address> <size>] [r2 <address>] [w <address> <size>]
113 | ```
114 | 
115 | * thread registered:
116 | 
117 | ```
118 | <timestamp> threadid tr
119 | ```
120 | 
121 | * thread finished:
122 | 
123 | ```
124 | <timestamp> threadid tf
125 | ```
126 | 
127 | * function call: All requested functions are traced, even if not on a registered
128 | thread.  The first 3 arguments of the function are traced as well as the stack
129 | pointer to match up calls and returns
130 | 
131 | ```
132 | <timestamp> <threadid> fc <function name> <function stack pointer> <first arg> <second arg> <third arg>
133 | ```
134 | 
135 | * function return:
136 | 
137 | ```
138 | <timestamp> <threadid> fr <function name> <function stack pointer> <return value>
139 | ```
140 | 
141 | * start Region of Interest:
142 | 
143 | ```
144 | <timestamp> <threadid> start_roi
145 | ```
146 | 
147 | * end Region of Interest:
148 | 
149 | ```
150 | <timestamp> <threadid> end_roi
151 | ```
152 | 
153 | * context change: Context changes may interrupt the locking necessary to provide
154 | atomic tracing.  On a context change consider that the next access may not be
155 | traced atomically.  -- It is unclear how the PIN internals work and if this is
156 | really a concern (I haven't observed any context changes yet).
157 | 
158 | ```
159 | <timestamp> <threadid> ctxt_change
160 | ```
161 | 
162 | * thread sync: When threads flush other threads to keep all threads close in
163 | timestamps the merging process must be made aware of this.
164 | 
165 | ```
166 | <timestamp> <threadid> thread_sync
167 | ```
168 | 
169 | Function Tracing
170 | ================
171 | 
172 | In addition to memory accesses many functions are traced.  A few are specific
173 | to this tool, but any user-provided function can be traced.  The provided
174 | src/annotation.cpp and src/annotation.h (creates libannotation) provides
175 | header and library for these functions.  In general it is easier to provide
176 | these as a library to ensure they are not in-lined.
177 | 
178 | ```
179 | atomic_trace::register_thread
180 | atomic_trace::start_roi
181 | atomic_trace::end_roi
182 | ```
183 | 
184 | These functions allow the pintool to highlight a region of interest (memory
185 | accesses outside of the region will not be traced) and name threads, useful to
186 | match trace threads to user threads.
187 | 
188 | In addition, the pintool takes an "-f" argument that is a file with a list of
189 | functions (one per line) that will be traced.  The functions should be listed
190 | in their undecorated form (as per pin, see above for examples).
191 | 
192 | Merge Utility
193 | =============
194 | 
195 | merge/merge.cpp provides a tool that takes as stdin a memory trace and pushes
196 | the merged (by timestamp) trace to the output, stripping out the timestamp from
197 | each entry as well as omitting sync entries.  While not the most efficient (the
198 | sort utility is somewhat faster for file traces), it suffices and allows traces
199 | to be piped while using a small memory footprint.
200 | 
201 | Merge is necessary because the sort utility can not be used in a pipe; the
202 | entire file must first be available.  When merging thread streams no entry may
203 | be output until certain that no older trace can appear on that thread.  The
204 | guarantee is provided when a thread submits a newer trace (because each thread
205 | trace is monotonic increasing), and the threads are kept close together via
206 | sync traces.
207 | 
208 | This tool chain works as follow: create a special fifo file to connect pin to
209 | merge
210 | 
211 | ```
212 | % mkfifo mypipe
213 | ```
214 | 
215 | Pipe this file to merge, and the merge to a simulator utility
216 | 
217 | ```
218 | % cat mypipe | ./merge | ./simulator
219 | ```
220 | 
221 | Start the pintool, directing the trace to the fifo file
222 | 
223 | ```
224 | % pin -t trace.so -o mypipe -- ./app
225 | ```
226 | 
227 | Make sure that names are fully qualified where necessary to reach pin,
228 | trace.so, mypipe, and your app; and that any other desired trace arguments are used.
229 | 
230 | Alternatively, merge can be used to sort trace files, although the sort command
231 | above may be faster.
232 | 
233 | Test - Atomic Increment
234 | =======================
235 | 
236 | src/inc.cpp provides a test case for instrumentation atomicity.  Several
237 | threads use atomic_fetchadd_long to repeatedly increment a shared counter.
238 | Each thread counts the number of times it increments the counter and the number
239 | of times the counter was even before the increment.
240 | 
241 | A trace simulation can reconstruct the operation of this program.  All memory
242 | operations to the counter's address are atomic increments.  Simply observing
243 | the order in which increments occurs allows us to reconstruct the number of
244 | increments from each thread, as well as how many time each thread observes a
245 | previously-even number on increment.
246 | 
247 | If the trace simulation matches the actual program, there is a good chance
248 | things are working.  Additionally, using the "-t" option we can force the
249 | pintool to test and disable locking.  If this breaks it we have even more
250 | confidence that the tool works.
251 | 
252 | Run as
253 | 
254 | ```
255 | % cat mypipe | ./merge | ./inc_sim.py
256 | ```
257 | 
258 | and
259 | 
260 | ```
261 | % pin -t trace.so -o mypipe -f func_list -- ./inc <threads> <total incs>
262 | ```
263 | 
264 | verify that the output of both the simulation and the actual program are the
265 | same.  Use the -t 1 flag for trace.so and see that the outputs differ.
266 | 
267 | A similar set of tools (inc_cas.cpp, inc_sim_cas.py) tests the functionality
268 | of the -c option (whether or not to trace failed CAS as a write).
269 | 
270 | Internals: Locking Protocol
271 | ===========================
272 | 
273 | Some rules need to be followed to prevent deadlock.
274 | These rules adhere to the rules provided by the PIN manual.
275 | 
276 | Locks:
277 | ------
278 | 
279 | * Address locks  
280 |     Responsible for providing instrumentation atomicity tracing an instruction
281 |     will acquire whatever locks cover accessed addresses, and these locks are
282 |     released on the next pin function (so all pin analaysis and callback
283 |     functions first release all address locks).
284 | * Thread start/finish lock  
285 |     Acquired when a thread starts or finishes, or to block threads from
286 |     starting or finishing.  Necessary for actions that need to synchronize
287 |     timestamp across all threads.  Any thread that wishes to access another
288 |     thread's data must hold this.
289 | * Thread locks  
290 |     Covers each thread's trace buffer and Lamport timestamp.  Other threads may
291 |     have to read/update another thread's timestamp or flush its buffer.
292 | * File lock  
293 |     Covers the trace file handler, thread count, last_flushed (really all shard
294 |     global objects).
295 | 
296 | Locking Rules
297 | -------------
298 | 
299 | Function Callbacks must release all locks before returning.
300 | Analysis functions (for instructions and routines) may hold address locks beyond return.
301 | Address locks are released at the beginning of every analysis and callback
302 | routine, treating each routine as the "end" of an instruction analysis routine.
303 | 
304 | Locks must always be acquired in the following order:
305 | 
306 | 1.  Address locks by index order of the address_lock_bank
307 | 2.  Thread start/finish lock
308 | 3.  Thread locks by pin THREADID order
309 | 4.  The global lock
310 | 
311 | This implies, for example, that one may not acquire **any** address locks while
312 | holding a thread lock
313 | 
314 | Internals: Lamport Clock
315 | ========================
316 | 
317 | Actual trace order is determined by a Lamport clock, and all trace entries use
318 | a Lamport timestamp
319 | 
320 | All address locks keep a timestamp.  The time of an access = max(timestamp of
321 | all address locks acquired, thread timestamp)+1.  All address locks and the
322 | accessing thread must be updated to this access timestamp.
323 | 
324 | Functions on registered threads increment that thread's timestamp.  To enforce
325 | an order of functions from two threads there must be corresponding memory
326 | accesses (release and acquire, such as a lock)
327 | 
328 | Functions from unregistered threads, and roi traces, and the start and finish
329 | of threads synchronize all threads.  This is done by acquiring the thread
330 | start/finish lock (so that new threads do not appear)  and ALL thread locks,
331 | moving all threads up to the latest global timestamp + 1.
332 | 
333 | Time separation: It's possible for 2 threads to diverge in time, requiring
334 | trace merging to use a huge amount of memory (imagine 2 threads, one is
335 | sleeping and the other executes continuously -- we cannot merge the running
336 | thread's entries until we know the sleeping thread won't produce an older
337 | timestamp -- this only happens once we observe a new, large timestamp).  Solve
338 | this by keeping track of the minimum "last flushed" thread timestamp.  When a
339 | thread tries to flush and sees that any thread is too old, it will try to flush
340 | and update the timestamp for all too-old threads (those below some threshold),
341 | bounding how far apart threads can be in the trace file.  Calculating the
342 | minimum last flushed timestamp requires keeping a last_flushed bimap under the
343 | global lock.  A last_flushed_cache timestamp (also covered by global file lock)
344 | makes this more efficient.
345 | 


--------------------------------------------------------------------------------
/src/merge.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2013 Steven Pelley
  2 | // 
  3 | // Permission is hereby granted, free of charge, to any person obtaining a copy of
  4 | // this software and associated documentation files (the "Software"), to deal in
  5 | // the Software without restriction, including without limitation the rights to
  6 | // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
  7 | // the Software, and to permit persons to whom the Software is furnished to do so, 
  8 | // subject to the following conditions:
  9 | // 
 10 | // The above copyright notice and this permission notice shall be included in all 
 11 | // copies or substantial portions of the Software.
 12 | // 
 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
 15 | // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
 16 | // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 17 | // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 18 | // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 19 | 
 20 | // merge.cpp
 21 | // merge the output of the atomic trace
 22 | // by lamport timestamp, strip the timestamp, and output
 23 | // the result.
 24 | //
 25 | // Break the input stream into thread components,
 26 | // assert that the threads are monotonic increasing in timestamp.
 27 | //
 28 | // We can only move entries to the output once we are certain no
 29 | // earlier timestamp is going to show up from another thread.
 30 | // this happens once we have observed at least that timestamp
 31 | // by every other thread.  Entries from unregistered threads (-1)
 32 | // must synchronize first, so it is not possible to pop a timestamp
 33 | // from any registered thread and then later pop an earlier timestamp
 34 | // from an unregistered thread.  Assert this as well.
 35 | 
 36 | #include <stdint.h>
 37 | #include <assert.h>
 38 | #include <iostream>
 39 | #include <string.h>
 40 | 
 41 | #include <string>
 42 | #include <queue>
 43 | #include <map>
 44 | #include <queue>
 45 | 
 46 | #include <boost/lexical_cast.hpp>
 47 | #include <boost/bimap.hpp>
 48 | #include <boost/bimap/set_of.hpp>
 49 | #include <boost/bimap/multiset_of.hpp>
 50 | 
 51 | #if TRACE_DEBUG
 52 |   #define DO_DEBUG(S) S
 53 | #else
 54 |   #define DO_DEBUG(S) /* */
 55 | #endif
 56 | 
 57 | /////////////////////////////////////////////
 58 | // queue for each thread
 59 | // tracks timestamp, string (trace line), and length of string for each
 60 | // tracks whether the thread is finished
 61 | /////////////////////////////////////////////
 62 | class thread_queue_t {
 63 |  public:
 64 | 
 65 |   thread_queue_t();
 66 |   ~thread_queue_t();
 67 | 
 68 |   // enqueue the string in buffer of length with timestamp
 69 |   // return if the queue had been empty
 70 |   bool enqueue(int64_t timestamp, const char *buffer, int64_t length);
 71 | 
 72 |   // pop an entry into buffer (must be at least 256 bytes)
 73 |   // and put the string's length into length and timestamp into timestamp
 74 |   // return true if had not been empty and data is valid
 75 |   bool dequeue(int64_t *timestamp, char *buffer, int64_t *length);
 76 | 
 77 |   // return next timestamp or -1 if empty
 78 |   int64_t peek();
 79 |   
 80 |   void finish() {_thread_finished = true;}
 81 |   bool is_finished() {return _thread_finished;}
 82 | 
 83 |  private:
 84 |   static const int64_t _initial_capacity = 1024*1024; // 1mb per thread
 85 |   void _enlargen_char_buffer();
 86 |   void _assert_repinv();
 87 | 
 88 |   typedef std::pair<int64_t, int64_t> time_size_t;
 89 |   typedef std::queue<time_size_t> time_size_queue_t;
 90 | 
 91 |   // holds string traces associated with this thread
 92 |   char   *_buffer;
 93 |   int64_t _buffer_capacity;
 94 |   int64_t _buffer_size;
 95 |   int64_t _buffer_next_insert;
 96 |   int64_t _buffer_next_pop;
 97 | 
 98 |   // holds (timestamp, size) for each entry in char_stream
 99 |   time_size_queue_t _time_size_queue;
100 | 
101 |   int64_t _max_timestamp;
102 |   bool _thread_finished;
103 | };
104 | 
105 | thread_queue_t::thread_queue_t()
106 |   : _buffer(new char[_initial_capacity])
107 |   , _buffer_capacity(_initial_capacity)
108 |   , _buffer_size(0)
109 |   , _buffer_next_insert(0)
110 |   , _buffer_next_pop(0)
111 |   , _time_size_queue()
112 |   , _max_timestamp(0)
113 |   , _thread_finished(false)
114 | {}
115 | 
116 | thread_queue_t::~thread_queue_t() {
117 |   delete [] _buffer;
118 | }
119 | 
120 | bool thread_queue_t::enqueue(int64_t timestamp, const char *buffer, int64_t length) {
121 |   DO_DEBUG(_assert_repinv());
122 |   assert(timestamp > _max_timestamp);
123 |   assert(!_thread_finished);
124 | 
125 |   bool ret = _time_size_queue.empty();
126 |   _max_timestamp = timestamp;
127 | 
128 |   int64_t space_left = _buffer_capacity - _buffer_size;
129 |   // if we won't have enough room move to a larger buffer
130 |   if (length > space_left) {
131 |     _enlargen_char_buffer();
132 |   }
133 | 
134 |   // may have to wrap, so do in 2 copies:
135 |   // 1: from next_insert to end
136 |   // 2: from front of buffer
137 |   int64_t copy1_idx = _buffer_next_insert;
138 |   int64_t copy1_len = std::min(_buffer_capacity - copy1_idx, length);
139 |   int64_t copy2_len = length - copy1_len;
140 |   memcpy(&_buffer[copy1_idx], buffer, copy1_len);
141 |   memcpy(_buffer, &buffer[copy1_len], copy2_len);
142 | 
143 |   // update buffer counters
144 |   _buffer_size += length;
145 |   _buffer_next_insert = (_buffer_next_insert + length) % _buffer_capacity;
146 | 
147 |   _time_size_queue.push( time_size_t(timestamp, length) );
148 | 
149 |   DO_DEBUG(_assert_repinv());
150 |   return ret;
151 | }
152 | 
153 | bool thread_queue_t::dequeue(int64_t *timestamp, char *buffer, int64_t *length) {
154 |   DO_DEBUG( _assert_repinv());
155 |   assert(timestamp);
156 |   assert(buffer);
157 |   assert(length);
158 | 
159 |   if (_time_size_queue.empty()) return false;
160 | 
161 |   time_size_t time_size = _time_size_queue.front();
162 |   _time_size_queue.pop();
163 |   *timestamp = time_size.first;
164 |   *length = time_size.second;
165 | 
166 | 
167 |   // copy from next_pop to the end
168 |   // copy remaining from the beginning
169 |   int64_t copy1_idx = _buffer_next_pop;
170 |   int64_t copy1_len = std::min(*length, _buffer_capacity - copy1_idx);
171 |   int64_t copy2_len = *length - copy1_len;
172 |   memcpy(buffer, &_buffer[copy1_idx], copy1_len);
173 |   memcpy(&buffer[copy1_len], _buffer, copy2_len);
174 | 
175 |   _buffer_size -= *length;
176 |   _buffer_next_pop = (_buffer_next_pop + *length) % _buffer_capacity;
177 |   DO_DEBUG(_assert_repinv());
178 | 
179 |   return true;
180 | }
181 | 
182 | int64_t thread_queue_t::peek() {
183 |   if (_time_size_queue.empty()) return -1;
184 |   return _time_size_queue.front().first;
185 | }
186 | 
187 | // grow the buffer
188 | void thread_queue_t::_enlargen_char_buffer() {
189 |   DO_DEBUG(_assert_repinv());
190 |   int64_t new_capacity = 4 * _buffer_capacity;
191 |   char *new_buffer = new char[new_capacity];
192 | 
193 |   int64_t copy1_len = std::min(_buffer_size, _buffer_capacity - _buffer_next_pop);
194 |   int64_t copy2_len = _buffer_size - copy1_len;
195 | 
196 |   memcpy(new_buffer, &_buffer[_buffer_next_pop], copy1_len);
197 |   memcpy(&new_buffer[copy1_len], _buffer, copy2_len);
198 |   delete [] _buffer;
199 |   _buffer = new_buffer;
200 |   _buffer_next_pop = 0;
201 |   _buffer_next_insert = _buffer_size;
202 |   _buffer_capacity = new_capacity;
203 |   DO_DEBUG(_assert_repinv());
204 | }
205 | 
206 | void thread_queue_t::_assert_repinv() {
207 |   // check that the sum of sizes in _time_size_queue
208 |   // match _buffer_size
209 |   // queues do not support iteration so copy and pop
210 |   time_size_queue_t copy_q(_time_size_queue);
211 |   int64_t size = 0;
212 |   while (!copy_q.empty()) {
213 |     size += copy_q.front().second;
214 |     copy_q.pop();
215 |   }
216 |   assert(size == _buffer_size);
217 | }
218 | 
219 | /////////////////////////////////////////////
220 | // holds queue per active thread
221 | /////////////////////////////////////////////
222 | typedef std::pair<int64_t, thread_queue_t*> threadid_queue_t;
223 | typedef std::map<int64_t, thread_queue_t*> thread_queue_map_t;
224 | thread_queue_map_t thread_queue_map;
225 | 
226 | /////////////////////////////////////////////
227 | // utility class for priority queue that holds (time, threadid) tuples
228 | // and returns the lowest time
229 | /////////////////////////////////////////////
230 | 
231 | typedef std::pair<int64_t, int64_t> time_threadid_t;
232 | class compare_first_greater_t  {
233 |  public:
234 |   bool operator() (const time_threadid_t &x, time_threadid_t &y) const {
235 |     return x.first > y.first;
236 |   }
237 | };
238 | 
239 | /////////////////////////////////////////////
240 | // returns the minimum trace read between active threads
241 | /////////////////////////////////////////////
242 | 
243 | std::priority_queue<
244 |                      time_threadid_t,
245 |                      std::vector<time_threadid_t>,
246 |                      compare_first_greater_t
247 |                    > time_threadid_min;
248 | 
249 | /////////////////////////////////////////////
250 | // returns the minimum of each threads' greatest observed time
251 | // any trace less than this cannot yet be merged because a lower
252 | // timestamp can still appear
253 | /////////////////////////////////////////////
254 | 
255 | class safe_timestamp_t {
256 |  public:
257 |   safe_timestamp_t();
258 |   int64_t timestamp();
259 |   void set_time(int64_t threadid, int64_t timestamp);
260 |   void finish_thread(int64_t threadid);
261 | 
262 |  private:
263 |   int64_t _safe_timestamp_cache;
264 |   typedef std::pair<int64_t, int64_t> threadid_timestamp_t;
265 |   typedef std::vector<threadid_timestamp_t> tt_list_t;
266 |   tt_list_t _tt_list;
267 | 
268 |   // compare by threadid in a list of (threadid, timestamp)
269 |   class tt_compare_t {
270 |    public:
271 |     int64_t arg;
272 |     int64_t set_arg(int64_t new_arg) {arg = new_arg;}
273 |     bool operator() (threadid_timestamp_t &tt){
274 |       return tt.first == arg;
275 |     }
276 |   };
277 | 
278 |   void _update_cache();
279 | } safe_timestamp;
280 | 
281 | safe_timestamp_t::safe_timestamp_t()
282 |   : _safe_timestamp_cache(0)
283 |   , _tt_list()
284 | {}
285 | 
286 | int64_t safe_timestamp_t::timestamp() {
287 |   return _safe_timestamp_cache;
288 | }
289 | 
290 | void safe_timestamp_t::set_time(int64_t threadid, int64_t timestamp) {
291 |   static tt_compare_t tt_comp;
292 |   tt_comp.set_arg(threadid);
293 |   tt_list_t::iterator it = std::find_if(_tt_list.begin(), _tt_list.end(), tt_comp);
294 |   bool need_update = false;
295 |   if (it == _tt_list.end()) {
296 |     _tt_list.push_back(threadid_timestamp_t(threadid, timestamp));
297 |     need_update = true;
298 |   } else {
299 |     need_update = it->second <= _safe_timestamp_cache;
300 |     (*it) = threadid_timestamp_t(threadid, timestamp);
301 |   }
302 | 
303 |   if (need_update) {
304 |     _update_cache();
305 |   }
306 | }
307 | 
308 | void safe_timestamp_t::finish_thread(int64_t threadid) {
309 |   static tt_compare_t tt_comp;
310 |   tt_comp.set_arg(threadid);
311 |   tt_list_t::iterator it = std::find_if(_tt_list.begin(), _tt_list.end(), tt_comp);
312 |   assert(it != _tt_list.end());
313 |   _tt_list.erase(it);
314 |   _update_cache();
315 | }
316 | 
317 | void safe_timestamp_t::_update_cache() {
318 |   tt_list_t::iterator it = _tt_list.begin();
319 | 
320 |   if (it == _tt_list.end()) {
321 |     return;
322 |   } else {
323 |     int64_t m = it->second;
324 |     for (; it != _tt_list.end(); ++it) {
325 |       m = std::min(m, it->second);
326 |     }
327 |     _safe_timestamp_cache = m;
328 |   }
329 | }
330 | 
331 | /////////////////////////////////////////////
332 | // helper functions and main for merging
333 | /////////////////////////////////////////////
334 | 
335 | // parse the line, determining if it is an important type of trace
336 | // pos1 is the position of the first tab (for cutting out the timestamp
337 | void parse_line(const char* buf, int64_t *pos1, int64_t *timestamp, int64_t *threadid, bool *is_thread_register, bool *is_thread_finish, bool *is_sync) {
338 |   const char *tab1 = strchr(buf, '\t');
339 |   assert(tab1);
340 |   *pos1 = tab1 - buf;
341 |   const char *tab2 = strchr(&buf[*pos1+1], '\t');
342 |   assert(tab2);
343 |   int64_t pos2 = tab2 - buf;
344 | 
345 |   *timestamp = atoll(buf);
346 |   *threadid = atoll(tab1+1);
347 | 
348 |   // tr, tf, and thread_sync terminate the line
349 |   *is_thread_register = strcmp("tr", tab2+1) == 0;
350 |   *is_thread_finish = strcmp("tf", tab2+1) == 0;
351 |   *is_sync = strcmp("thread_sync", tab2+1) == 0;
352 | }
353 | 
354 | int64_t active_threads = 0;
355 | 
356 | // merge as many as allowable
357 | // can merge so long as minimum timestamp entry is less/equal min of all
358 | // threads' max observed timestamp
359 | void merge() {
360 |   static char buf[128]; // temp space used for merging
361 |   bool keep_merging = true;
362 |   while (keep_merging) {
363 |     time_threadid_t min_entry = time_threadid_min.top();
364 | 
365 |     bool any_threads_active = active_threads > 0;
366 |     int64_t all_threads_observed = safe_timestamp.timestamp();
367 |     keep_merging = !any_threads_active || min_entry.first <= all_threads_observed;
368 | 
369 |     // put the minimum entry into the output and pop it
370 |     if (keep_merging) {
371 |       time_threadid_min.pop();
372 | 
373 |       // pop and output from min_entry.second
374 |       thread_queue_map_t::iterator it = thread_queue_map.find(min_entry.second);
375 |       assert(it != thread_queue_map.end());
376 |       thread_queue_t *q = it->second;
377 | 
378 |       int64_t popped_time, popped_length;
379 |       bool not_empty = q->dequeue(&popped_time, buf, &popped_length);
380 |       assert(not_empty);
381 |       assert(popped_time == min_entry.first);
382 |       std::cout.write(buf, popped_length);
383 |       std::cout << std::endl;
384 | 
385 |       int64_t next_time = q->peek();
386 |       if (next_time == -1 && q->is_finished()) {
387 |         delete q;
388 |         thread_queue_map.erase(it);
389 |       }
390 |       // peek the next item from that threadid
391 |       if (next_time >= 0) {
392 |         time_threadid_min.push(time_threadid_t(next_time, min_entry.second));
393 |       }
394 |     }
395 |     keep_merging = keep_merging && !time_threadid_min.empty();
396 |   }
397 | }
398 | 
399 | int main(int argc, char **argv) {
400 |   bool keep_timestamps = argc > 1 && std::string(argv[1]) == "-t";
401 |   std::string line;
402 |   int64_t line_count = 0;
403 |   char buf[128];
404 |   while (std::cin.getline(buf, 128)) {
405 |     int64_t len = strlen(buf);
406 |     ++line_count;
407 | 
408 |     int64_t pos1, timestamp, threadid;
409 |     bool is_thread_register, is_thread_finish, is_sync;
410 |     parse_line(buf, &pos1, &timestamp, &threadid, &is_thread_register, &is_thread_finish, &is_sync);
411 | 
412 |     if (is_thread_register) {
413 |       ++active_threads;
414 |     } else if (is_thread_finish) {
415 |       --active_threads;
416 |       safe_timestamp.finish_thread(threadid);
417 |     }
418 | 
419 |     // update thread's max observed if registered thread
420 |     if (!is_thread_finish && threadid >= 0) {
421 |       safe_timestamp.set_time(threadid, timestamp);
422 |       if (is_sync) continue;
423 |     }
424 | 
425 |     // get/construct the threadid's queue to add this to
426 |     thread_queue_t* thread_queue;
427 |     thread_queue_map_t::iterator it = thread_queue_map.find(threadid);
428 |     if (it == thread_queue_map.end()) {
429 |       thread_queue = new thread_queue_t();
430 |       thread_queue_map.insert(it, threadid_queue_t(threadid, thread_queue));
431 |     } else {
432 |       thread_queue = it->second;
433 |     }
434 | 
435 |     // strip out the timestamp from the string as it is enqueued
436 |     bool was_empty;
437 |     if (keep_timestamps) {
438 |       was_empty = thread_queue->enqueue(timestamp, buf, len);
439 |     } else {
440 |       was_empty = thread_queue->enqueue(timestamp, buf+pos1+1, len - (pos1+1));
441 |     }
442 | 
443 |     if (is_thread_finish) thread_queue->finish();
444 | 
445 |     // if thread had been empty update heap of each thread's min available entry
446 |     if (was_empty) {
447 |       time_threadid_min.push(time_threadid_t(timestamp, threadid));
448 |     }
449 |     
450 |     // try to merge some entries
451 |     merge();
452 |   }
453 |   assert(time_threadid_min.empty()); // should be no more to merge
454 | }
455 | 
456 | 


--------------------------------------------------------------------------------
/trace/trace.cpp:
--------------------------------------------------------------------------------
   1 | //Copyright (c) 2013 Steven Pelley
   2 | //
   3 | //Permission is hereby granted, free of charge, to any person obtaining a copy of
   4 | //this software and associated documentation files (the "Software"), to deal in
   5 | //the Software without restriction, including without limitation the rights to
   6 | //use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
   7 | //the Software, and to permit persons to whom the Software is furnished to do so, 
   8 | //subject to the following conditions:
   9 | //
  10 | //The above copyright notice and this permission notice shall be included in all 
  11 | //copies or substantial portions of the Software.
  12 | //
  13 | //THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14 | //IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
  15 | //FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
  16 | //COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
  17 | //IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  18 | //CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  19 | 
  20 | // atomic memory trace pintool
  21 | // see README for directions and implementation details.
  22 | 
  23 | #include <string>
  24 | #include <algorithm>
  25 | #include "pin.H"
  26 | #include <iostream>
  27 | #include <fstream>
  28 | #include <sstream>
  29 | #include <stdint.h>
  30 | #include <vector>
  31 | #include <set>
  32 | #include <assert.h>
  33 | #include <boost/algorithm/string.hpp>
  34 | #include <boost/bimap.hpp>
  35 | #include <boost/bimap/set_of.hpp>
  36 | #include <boost/bimap/multiset_of.hpp>
  37 | 
  38 | /* ===================================================================== */
  39 | /* Global Variables */
  40 | /* ===================================================================== */
  41 | 
  42 | // helper class to define critical sections
  43 | // allocate on the stack within a scope (curly braces) for the CS
  44 | // constructor will acquire the lock, destructor (called when CS object
  45 | // goes out of scope) releases the lock
  46 | class pin_critical_section {
  47 |  public:
  48 |   pin_critical_section(PIN_MUTEX *lock)
  49 |   : _lock(lock), _paused(false), _pred(true) {
  50 |     PIN_MutexLock(_lock);
  51 |   }
  52 | 
  53 |   // predicated CS.  Only do it predicate is true
  54 |   pin_critical_section(PIN_MUTEX *lock, bool predicate)
  55 |   : _lock(lock), _paused(false), _pred(predicate) {
  56 |     if (_pred) {
  57 |       PIN_MutexLock(_lock);
  58 |     }
  59 |   }
  60 | 
  61 |   ~pin_critical_section() {
  62 |     if (_pred && !_paused) {
  63 |       PIN_MutexUnlock(_lock);
  64 |     }
  65 |   }
  66 | 
  67 |   void pause() {
  68 |     if (_pred && !_paused) {
  69 |       PIN_MutexUnlock(_lock);
  70 |     }
  71 |   }
  72 | 
  73 |   // make sure not to violate the lock hierarchy if you use this!
  74 |   void restart() {
  75 |     if (_pred && _paused) {
  76 |       PIN_MutexLock(_lock);
  77 |     }
  78 |   }
  79 | 
  80 |  private:
  81 |   pin_critical_section();
  82 |   PIN_MUTEX *_lock;
  83 |   bool _paused;
  84 |   bool _pred;
  85 | };
  86 | 
  87 | bool in_roi;
  88 | bool require_roi;
  89 | bool log_cas_fails;
  90 | bool register_threads = false;
  91 | bool turn_off_locks = false;
  92 | 
  93 | /////////////
  94 | // global lock covers the file buffer, thread count, etc
  95 | //   may not be held beyond analysis function/callback return
  96 | //   i.e., critical section must be contained in function
  97 | /////////////
  98 | struct lock_wrapper_t {
  99 |   PIN_MUTEX lock;
 100 |   char padding [56];
 101 | };
 102 | 
 103 | // bimap of <pin_threadid, last flushed timestamp>
 104 | typedef boost::bimap< 
 105 |                  boost::bimaps::set_of<int64_t>,
 106 |                  boost::bimaps::multiset_of<int64_t>
 107 |                  > last_flushed_t;
 108 | 
 109 | // file access
 110 | lock_wrapper_t file_lock;
 111 | std::ofstream trace_file;
 112 | last_flushed_t last_flushed; // pin threadid to last_flushed
 113 | int64_t last_flushed_cache; // occassionally computed min flushed
 114 | /////////////
 115 | // end of global lock protection
 116 | /////////////
 117 | 
 118 | // thread tracking
 119 | lock_wrapper_t thread_start_fini_lock;
 120 | int64_t num_threads = 0;
 121 | set<int64_t> pin_threadid_set;
 122 | 
 123 | TLS_KEY tls_key;
 124 | 
 125 | int64_t num_locks = 0;
 126 | int64_t block_size = 0;
 127 | int64_t block_size_log = 0;
 128 | int64_t accesses_flush = 0;
 129 | int64_t timestamp_difference = 0;
 130 | 
 131 | // address locks allows atomic tracing
 132 | //   may be held beyond analysis function return
 133 | //   may not be held beyond callback functino return
 134 | //   must be released on context change
 135 | struct address_lock_t {
 136 |   PIN_MUTEX lock;
 137 |   int64_t lamport_timestamp;
 138 |   char padding [48];
 139 | };
 140 | 
 141 | address_lock_t *address_lock_bank;
 142 | 
 143 | /* ===================================================================== */
 144 | /* TLS Variables */
 145 | /* ===================================================================== */
 146 | 
 147 | class thread_data_t;
 148 | 
 149 | thread_data_t* get_tls(THREADID threadid) {
 150 |   thread_data_t *tdata =
 151 |     static_cast<thread_data_t*>(PIN_GetThreadData(tls_key, threadid));
 152 |   return tdata;
 153 | }
 154 | 
 155 | 
 156 | class thread_data_t {
 157 |  public:
 158 |   thread_data_t()
 159 |   :   user_threadid(-1)
 160 |     , event_count(0)
 161 |     , memory_instruction_count(0)
 162 |     , index_lock_list()
 163 |     , buffered_entries(0)
 164 |     , lamport_timestamp(0)
 165 |   {
 166 |     PIN_MutexInit(&thread_lock);
 167 |     index_lock_list.reserve(32);
 168 |   }
 169 | 
 170 |   // must hold thread_start_fini_lock
 171 |   // will either hold no thread locks or all thread locks
 172 |   //
 173 |   // try to flush threads in flush_others
 174 |   // if their timestamp is still below the threshold
 175 |   // (threads may race and flush their own buffer first)
 176 |   void attempt_flush_others(int64_t this_time, bool hold_all_thread_locks) {
 177 |     vector<int64_t>::iterator it;
 178 |     for (it = flush_others.begin(); it != flush_others.end(); ++it) {
 179 |       int64_t pin_threadid = *it;
 180 |       set<int64_t>::iterator set_it = pin_threadid_set.find(pin_threadid);
 181 |       if (set_it != pin_threadid_set.end()) {
 182 |         thread_data_t *tdata = get_tls(pin_threadid);
 183 |         {
 184 |           pin_critical_section CS(&tdata->thread_lock, !hold_all_thread_locks);
 185 |           int64_t diff = this_time - tdata->lamport_timestamp;
 186 |           if (diff >= timestamp_difference * .75) { // sync it!
 187 |             if (tdata->user_threadid >= 0) {
 188 |                tdata->trace_stream << this_time << "\t" << tdata->user_threadid << "\tthread_sync" << "\n";
 189 |                ++tdata->buffered_entries;
 190 |             }
 191 |             tdata->lamport_timestamp = this_time;
 192 | 
 193 |             if (tdata->buffered_entries > 0) { // unregistered thread might be empty
 194 |               pin_critical_section CS2(&file_lock.lock);
 195 |               trace_file << tdata->trace_stream.rdbuf();
 196 |               assert(trace_file);
 197 |               tdata->buffered_entries = 0;
 198 |               tdata->trace_stream.str(std::string());
 199 |               last_flushed.left.erase(pin_threadid);
 200 |               last_flushed.left.insert(std::pair<int64_t, int64_t>(pin_threadid, this_time));
 201 |             }
 202 |           }
 203 |         }
 204 |       }
 205 |     }
 206 |   }
 207 | 
 208 |   // after having written something to the string buffer:
 209 |   // update timestamp
 210 |   // increment entry count
 211 |   // flush the buffer if necessary
 212 |   // flush buffers of any thread lagging too far behind
 213 |   //
 214 |   // requires either:
 215 |   // thread_start_fini_lock not held, this thread's thread_lock held
 216 |   // or
 217 |   // thread_start_fini_lock and all thread_locks held
 218 |   //
 219 |   // if force always flush this thread's buffer
 220 |   void touch_buffer(THREADID pin_threadid, int64_t time, bool force, pin_critical_section *thread_cs, bool have_all_thread_locks) {
 221 |     lamport_timestamp = time;
 222 |     flush_others.clear();
 223 | 
 224 |     if (++buffered_entries >= accesses_flush || force) {
 225 |       pin_critical_section CS(&file_lock.lock);
 226 |       trace_file << trace_stream.rdbuf();
 227 |       assert(trace_file);
 228 |       buffered_entries = 0;
 229 |       // clear the trace_stream to use it as a queue
 230 |       trace_stream.str(std::string());
 231 | 
 232 |       // bimap must erase and insert, no modifying
 233 |       last_flushed.left.erase(pin_threadid);
 234 |       last_flushed.left.insert(std::pair<int64_t, int64_t>(pin_threadid, time));
 235 | 
 236 |       // double check last_flushed_cache and possibly update it
 237 |       // check for any other threads that should be flushed
 238 |       if (time - last_flushed_cache > timestamp_difference) {
 239 |         last_flushed_t::right_const_iterator it = last_flushed.right.begin();
 240 |         last_flushed_cache = it->first;
 241 |         // double check
 242 |         if (time - last_flushed_cache > timestamp_difference) {
 243 |           // flush any thread who differs by more than .75*timestamp_difference
 244 |           for ( ; it != last_flushed.right.end(); ++it) {
 245 |             int64_t diff = time - it->first;
 246 |             if (diff >= timestamp_difference * .75) {
 247 |               flush_others.push_back(it->second);
 248 |             } else {
 249 |               // cannot be end (because this thread has a high timestamp)
 250 |               last_flushed_cache = it->first;
 251 |               break;
 252 |             }
 253 |           }
 254 |         }
 255 |       }
 256 |     }
 257 | 
 258 |     // get all the thread locks in the proper order and check-and-flush
 259 |     // the other threads
 260 |     if (!flush_others.empty()) {
 261 |       if (!have_all_thread_locks) {
 262 |         thread_cs->pause();
 263 |         {
 264 |           pin_critical_section CS(&thread_start_fini_lock.lock);
 265 |           attempt_flush_others(time, have_all_thread_locks);
 266 |         }
 267 |         thread_cs->restart();
 268 |       } else {
 269 |         attempt_flush_others(time, have_all_thread_locks);
 270 |       }
 271 |     }
 272 |   }
 273 | 
 274 |   int64_t user_threadid; // -1 implies not a registered thread
 275 |   int64_t event_count;
 276 |   int64_t memory_instruction_count;
 277 | 
 278 |   // record which locks we hold (by index)
 279 |   vector<int64_t> index_lock_list;
 280 | 
 281 |   // TLS for memory access tracing
 282 |   bool trace_this_access;
 283 |   bool is_read;
 284 |   bool is_read2;
 285 |   bool is_write;
 286 |   int64_t read_size;
 287 |   int64_t write_size;
 288 |   uint64_t read1_address;
 289 |   uint64_t read2_address;
 290 |   uint64_t write_address;
 291 |   uint64_t max_locked_timestamp;
 292 | 
 293 |   vector<int64_t> flush_others;
 294 | 
 295 |   ////////////////////
 296 |   // the following are covered by this lock
 297 |   ////////////////////
 298 |   int8_t padding [64];
 299 |   PIN_MUTEX thread_lock;
 300 | 
 301 |   stringstream trace_stream;
 302 |   int64_t buffered_entries;
 303 |   int64_t lamport_timestamp;
 304 | 
 305 |   int8_t padding2 [64];
 306 |   ////////////////////
 307 |   // end thread lock protection
 308 |   ////////////////////
 309 | };
 310 | 
 311 | /* ===================================================================== */
 312 | /* Commandline Switches */
 313 | /* ===================================================================== */
 314 | 
 315 | KNOB<string> KnobOutputFile(KNOB_MODE_WRITEONCE, "pintool",
 316 |     "o", "memory_trace.out", "specify trace file name");
 317 | 
 318 | KNOB<bool> KnobRegisterThreads(KNOB_MODE_WRITEONCE, "pintool",
 319 |     "r", "0", "threads required to be registered?");
 320 | 
 321 | KNOB<string> KnobFunctionsFile(KNOB_MODE_WRITEONCE, "pintool",
 322 |     "f", "trace_functions.in", "file with list of functions to trace");
 323 | 
 324 | KNOB<bool> KnobRequireROI(KNOB_MODE_WRITEONCE, "pintool",
 325 |     "i", "0", "require region of interest annotation?");
 326 | 
 327 | KNOB<int64_t> KnobNumAddressLocks (KNOB_MODE_WRITEONCE, "pintool",
 328 |     "l", "64", "number of locks for simulated cache coherence");
 329 | 
 330 | KNOB<int64_t> KnobBlockSize (KNOB_MODE_WRITEONCE, "pintool",
 331 |     "b", "64", "cache line/block size to simulate");
 332 | 
 333 | KNOB<int64_t> KnobAccessesBeforeFlush (KNOB_MODE_WRITEONCE, "pintool",
 334 |     "a", "64", "accesses per thread before flushing");
 335 | 
 336 | KNOB<bool> KnobTurnOff (KNOB_MODE_WRITEONCE, "pintool",
 337 |     "t", "0", "turn off address locking for test (should produce incorrect results)");
 338 | 
 339 | KNOB<int64_t> KnobTimestampDifference (KNOB_MODE_WRITEONCE, "pintool",
 340 |     "d", "1000", "How far 2 threads can differ in timestamp before a thread attempts to flush the other's buffer and update its timestamp");
 341 | 
 342 | KNOB<bool> KnobCASFailureWrites(KNOB_MODE_WRITEONCE, "pintool",
 343 |     "c", "0", "log Compare-And-Swap as a write even when compare fails");
 344 | 
 345 | /* ===================================================================== */
 346 | /* Helper routines                                                     */
 347 | /* ===================================================================== */
 348 | 
 349 | // requires thread_start_fini_lock already held
 350 | // acquires thread_start_fini_lock then all thread locks
 351 | // returns max timestamp of all threads
 352 | int64_t acquire_all_thread_locks() {
 353 |   set<int64_t>::iterator it;
 354 |   int64_t max_time = 0;
 355 |   for (it = pin_threadid_set.begin(); it != pin_threadid_set.end(); ++it) {
 356 |     thread_data_t *tdata = get_tls(*it);
 357 |     PIN_MutexLock(&tdata->thread_lock);
 358 |     if (tdata->lamport_timestamp > max_time) max_time = tdata->lamport_timestamp;
 359 |   }
 360 |   return max_time;
 361 | }
 362 | 
 363 | // requires thread_start_fini_lock already held
 364 | // release all locks, first setting a new timestamp
 365 | void release_all_thread_locks(int64_t new_timestamp) {
 366 |   set<int64_t>::iterator it;
 367 |   for (it = pin_threadid_set.begin(); it != pin_threadid_set.end(); ++it) {
 368 |     thread_data_t *tdata = get_tls(*it);
 369 |     tdata->lamport_timestamp = new_timestamp;
 370 |     PIN_MutexUnlock(&tdata->thread_lock);
 371 |   }
 372 | }
 373 | 
 374 | // given memory address and size of access, return the first lock index
 375 | // if to_lock is not null set it to the number of locks that must be acquired
 376 | int64_t lock_index(uint64_t address, uint64_t size, int64_t *to_lock) {
 377 |   uint64_t removed_blocks = address >> block_size_log;
 378 |   uint64_t end_removed_blocks = (address+size-1) >> block_size_log;
 379 |   int64_t number_to_lock = end_removed_blocks - removed_blocks + 1;
 380 |   // can lock at most num_locks
 381 |   if (number_to_lock > num_locks) number_to_lock = num_locks;
 382 |   if (to_lock) *to_lock = number_to_lock;
 383 |   return removed_blocks % num_locks;
 384 | }
 385 | 
 386 | // determine all unique lock_address indices that we must lock
 387 | // lock them in index order
 388 | // place these indices in index_lock_list
 389 | // NOTE: I assume that interrupts cannot occur when this is called,
 390 | // as asynchronous interrupts are delayed until the end of the trace,
 391 | // and synchronous interrupts from the instrumented program occur
 392 | // with/after that instruction.
 393 | // See: http://tech.groups.yahoo.com/group/pinheads/message/7742
 394 | //
 395 | // This allows ctxt_change handler to assume that index_lock_list
 396 | // is always consistent
 397 | //
 398 | // returns the max of the lamport timestamps on acquired locks
 399 | int64_t acquire_address_locks(THREADID pin_threadid) {
 400 |   thread_data_t* tdata = get_tls(pin_threadid);
 401 |   int64_t to_lock;
 402 |   if (tdata->is_read) {
 403 |     int64_t index = lock_index(tdata->read1_address, tdata->read_size, &to_lock);
 404 |     for (int64_t i = 0; i < to_lock; ++i) {
 405 |       tdata->index_lock_list.push_back(index);
 406 |       index = (index+1)%num_locks;
 407 |     }
 408 |   }
 409 |   if (tdata->is_read2) {
 410 |     int64_t index = lock_index(tdata->read2_address, tdata->read_size, &to_lock);
 411 |     for (int64_t i = 0; i < to_lock; ++i) {
 412 |       tdata->index_lock_list.push_back(index);
 413 |       index = (index+1)%num_locks;
 414 |     }
 415 |   }
 416 |   if (tdata->is_write) {
 417 |     int64_t index = lock_index(tdata->write_address, tdata->write_size, &to_lock);
 418 |     for (int64_t i = 0; i < to_lock; ++i) {
 419 |       tdata->index_lock_list.push_back(index);
 420 |       index = (index+1)%num_locks;
 421 |     }
 422 |   }
 423 | 
 424 |   // at this point index_lock_list unsorted and may contain duplicates
 425 |   std::sort(tdata->index_lock_list.begin(), tdata->index_lock_list.end());
 426 |   vector<int64_t>::iterator it;
 427 |   it = std::unique(tdata->index_lock_list.begin(), tdata->index_lock_list.end());
 428 |   tdata->index_lock_list.resize(std::distance(tdata->index_lock_list.begin(), it));
 429 | 
 430 |   int64_t max_time = 0;
 431 |   for (it = tdata->index_lock_list.begin(); it != tdata->index_lock_list.end(); ++it) {
 432 |     PIN_MutexLock(&address_lock_bank[*it].lock);
 433 |     max_time = max_time > address_lock_bank[*it].lamport_timestamp ? max_time : address_lock_bank[*it].lamport_timestamp;
 434 |   }
 435 |   return max_time;
 436 | }
 437 | 
 438 | // Release all locks in index_lock_list
 439 | // set their timestamp from the thread's
 440 | // clear the list
 441 | void release_address_locks(THREADID pin_threadid) {
 442 |   thread_data_t* tdata = get_tls(pin_threadid);
 443 |   vector<int64_t>::iterator it;
 444 |   for (it = tdata->index_lock_list.begin(); it != tdata->index_lock_list.end(); ++it) {
 445 |     int64_t idx = (*it);
 446 |     address_lock_bank[idx].lamport_timestamp = tdata->lamport_timestamp;
 447 |     PIN_MutexUnlock(&address_lock_bank[idx].lock);
 448 |   }
 449 |   tdata->index_lock_list.clear();
 450 | }
 451 | 
 452 | /* ===================================================================== */
 453 | /* Analysis routines                                                     */
 454 | /* these functions (in particular the memory ones) may hold address      */
 455 | /* locks beyond the duration of the call                                 */
 456 | /* ===================================================================== */
 457 | 
 458 | //////////////
 459 | // memory access functions
 460 | //////////////
 461 | 
 462 | void memory_access_header_a(THREADID pin_threadid) {
 463 |   release_address_locks(pin_threadid);
 464 |   thread_data_t* tdata = get_tls(pin_threadid);
 465 | 
 466 |   int64_t threadid = tdata->user_threadid;
 467 |   {
 468 |     pin_critical_section CS(&tdata->thread_lock);
 469 |     bool do_trace = in_roi || !require_roi;
 470 |     tdata->trace_this_access = do_trace && threadid >= 0;
 471 |   }
 472 |   tdata->is_read  = false;
 473 |   tdata->is_read2 = false;
 474 |   tdata->is_write = false;
 475 | }
 476 | 
 477 | void  memory_access_read1_a(
 478 |     THREADID pin_threadid
 479 |   , ADDRINT address
 480 |   , UINT32 size
 481 |   ) {
 482 |   thread_data_t* tdata = get_tls(pin_threadid);
 483 |   if (tdata->trace_this_access) {
 484 |     tdata->is_read = true;
 485 |     tdata->read1_address = address;
 486 |     tdata->read_size = size;
 487 |   }
 488 | }
 489 | 
 490 | void  memory_access_read2_a(
 491 |     THREADID pin_threadid
 492 |   , ADDRINT address
 493 |   ) {
 494 |   thread_data_t* tdata = get_tls(pin_threadid);
 495 |   if (tdata->trace_this_access) {
 496 |     tdata->is_read2 = true;
 497 |     tdata->read2_address = address;
 498 |   }
 499 | }
 500 | 
 501 | void  memory_access_write_a(
 502 |     THREADID pin_threadid
 503 |   , ADDRINT address
 504 |   , UINT32 size
 505 |   ) {
 506 |   thread_data_t* tdata = get_tls(pin_threadid);
 507 |   if (tdata->trace_this_access) {
 508 |     tdata->is_write = true;
 509 |     tdata->write_address = address;
 510 |     tdata->write_size = size;
 511 |   }
 512 | }
 513 | 
 514 | void memory_access_acquire_locks_a(THREADID pin_threadid) {
 515 |   thread_data_t* tdata = get_tls(pin_threadid);
 516 |   if (tdata->trace_this_access) {
 517 |     // locks released and timestamps updated at next instruction/function
 518 |     if (turn_off_locks) {
 519 |       tdata->max_locked_timestamp = 0;
 520 |     } else {
 521 |       tdata->max_locked_timestamp = acquire_address_locks(pin_threadid);
 522 |     }
 523 |   }
 524 | }
 525 | 
 526 | void memory_access_footer_a(THREADID pin_threadid) {
 527 |   thread_data_t* tdata = get_tls(pin_threadid);
 528 |   // acquire necessary locks (global or addresses) and trace
 529 |   if (tdata->trace_this_access) {
 530 |     int64_t threadid = tdata->user_threadid;
 531 |     {
 532 |       pin_critical_section CS(&tdata->thread_lock);
 533 |       if (*static_cast<volatile bool*>(&tdata->trace_this_access)) {
 534 |         int64_t new_time = tdata->max_locked_timestamp;
 535 |         if (tdata->lamport_timestamp > new_time) new_time = tdata->lamport_timestamp;
 536 |         ++new_time;
 537 | 
 538 |         tdata->trace_stream << new_time << '\t' << threadid << "\tm";
 539 |         if (tdata->is_read) {
 540 |           tdata->trace_stream << "\tr" <<
 541 |             "\t" << tdata->read1_address <<
 542 |             "\t" << tdata->read_size;
 543 |         }
 544 |         if (tdata->is_read2) {
 545 |           tdata->trace_stream << "\tr2" <<
 546 |             "\t" << tdata->read2_address;
 547 |         }
 548 |         if (tdata->is_write) {
 549 |           tdata->trace_stream << "\tw" <<
 550 |             "\t" << tdata->write_address <<
 551 |             "\t" << tdata->write_size;
 552 |         }
 553 |         tdata->trace_stream << "\n";
 554 |         tdata->touch_buffer(pin_threadid, new_time, false, &CS, false);
 555 |       }
 556 |     }
 557 |   }
 558 | }
 559 | 
 560 | // for any atomic RMW that might fail
 561 | // on failure set tdata->is_write to false
 562 | unsigned int zf_bit = 1 << 6;
 563 | void memory_access_CAS_footer_a(THREADID pin_threadid, ADDRINT flags_reg) {
 564 |   thread_data_t* tdata = get_tls(pin_threadid);
 565 |   if (tdata->trace_this_access) {
 566 |     // CAS succeeded if Zero Flag is set, failed otherwise
 567 |     // ZF is bit 6 of eflags
 568 |     int result = flags_reg & zf_bit;
 569 |     assert(tdata->is_write);
 570 |     if (!result) {
 571 |       tdata->is_write = false;
 572 |     }
 573 |   }
 574 | }
 575 | 
 576 | // instructions that do not access memory should
 577 | // still attempt to release locks
 578 | void memory_access_release_a(THREADID pin_threadid) {
 579 |   release_address_locks(pin_threadid);
 580 | }
 581 | 
 582 | //////////////
 583 | // end memory access functions
 584 | //////////////
 585 | 
 586 | 
 587 | void function_call_a(
 588 |     CHAR *name
 589 |   , THREADID pin_threadid
 590 |   , ADDRINT stack_pointer
 591 |   , ADDRINT arg1
 592 |   , ADDRINT arg2
 593 |   , ADDRINT arg3
 594 |   ) {
 595 |   release_address_locks(pin_threadid);
 596 |   // always trace regardless of thread or ROI
 597 |   // (may need to trace experiment startup before ROI or registered threads)
 598 |   thread_data_t *tdata = get_tls(pin_threadid);
 599 |   int64_t threadid = tdata->user_threadid;
 600 |   bool registered = threadid >= 0;
 601 |   int64_t time = -1;
 602 | 
 603 |   {
 604 |     pin_critical_section CS(&thread_start_fini_lock.lock, !registered);
 605 |     if (!registered) {
 606 |       time = acquire_all_thread_locks();
 607 |     }
 608 | 
 609 |     {
 610 |       pin_critical_section CS(&tdata->thread_lock, registered);
 611 |       time = threadid < 0 ? time : tdata->lamport_timestamp;
 612 |       ++time;
 613 |       tdata->lamport_timestamp = time;
 614 | 
 615 |       tdata->trace_stream <<
 616 |         time << '\t' << threadid << "\tfc"  <<
 617 |         "\t" << name << 
 618 |         "\t" << stack_pointer <<
 619 |         "\t" << arg1 <<
 620 |         "\t" << arg2 <<
 621 |         "\t" << arg3 <<
 622 |         "\n";
 623 |       tdata->touch_buffer(pin_threadid, time, !registered, &CS, !registered);
 624 |     }
 625 | 
 626 |     if (!registered) {
 627 |       release_all_thread_locks(time);
 628 |     }
 629 |   }
 630 | }
 631 | 
 632 | void function_return_a(
 633 |     CHAR *name
 634 |   , THREADID pin_threadid
 635 |   , ADDRINT stack_pointer
 636 |   , ADDRINT return_value
 637 |   ) {
 638 |   release_address_locks(pin_threadid);
 639 |   thread_data_t *tdata = get_tls(pin_threadid);
 640 |   int64_t threadid = tdata->user_threadid;
 641 |   bool registered = threadid >= 0;
 642 |   // always trace regardless of thread or ROI
 643 |   // (may need to trace experiment startup before ROI or registered threads)
 644 |   int64_t time = -1;
 645 | 
 646 |   {
 647 |     pin_critical_section CS(&thread_start_fini_lock.lock, !registered);
 648 |     if (!registered) {
 649 |       time = acquire_all_thread_locks();
 650 |     }
 651 | 
 652 |     {
 653 |       pin_critical_section CS(&tdata->thread_lock, registered);
 654 |       time = threadid < 0 ? time : tdata->lamport_timestamp;
 655 |       ++time;
 656 | 
 657 |       tdata->trace_stream <<
 658 |         time << '\t' << threadid << "\tfr" <<
 659 |         "\t" << name <<
 660 |         "\t" << stack_pointer <<
 661 |         "\t" << return_value <<
 662 |         "\n";
 663 |       tdata->touch_buffer(pin_threadid, time, !registered, &CS, !registered);
 664 |     }
 665 | 
 666 |     if (!registered) {
 667 |       release_all_thread_locks(time);
 668 |     }
 669 |   }
 670 | }
 671 | 
 672 | // Always synchronize threads, even if this occurs on registered thread
 673 | void change_roi_a(THREADID pin_threadid, bool new_roi, CHAR *change_to) {
 674 |   release_address_locks(pin_threadid);
 675 |   thread_data_t *tdata = get_tls(pin_threadid);
 676 | 
 677 |   {
 678 |     pin_critical_section CS(&thread_start_fini_lock.lock);
 679 |     int64_t time = acquire_all_thread_locks() + 1;
 680 |     in_roi = new_roi;
 681 |     tdata->trace_stream << time << '\t' << tdata->user_threadid <<
 682 |       '\t' << change_to << "_roi\n";
 683 |     tdata->touch_buffer(pin_threadid, time, true, NULL, true);
 684 |     release_all_thread_locks(time);
 685 |   }
 686 | }
 687 | 
 688 | /* ===================================================================== */
 689 | /* Callback routines                                                     */
 690 | /* these may not hold locks beyond duration of call                      */
 691 | /* ===================================================================== */
 692 | 
 693 | //////////////
 694 | // thread start and end tracing
 695 | //
 696 | // for consistent merging must hold all thread locks while forcing
 697 | // thread registration to global file
 698 | //////////////
 699 | 
 700 | // helper
 701 | // must hold thread_start_fini_lock and all thread locks
 702 | void trace_start_thread(THREADID pin_threadid, int64_t time) {
 703 |   thread_data_t *tdata = get_tls(pin_threadid);
 704 |   tdata->trace_stream << time << '\t' << tdata->user_threadid << "\ttr\n";
 705 |   tdata->touch_buffer(pin_threadid, time, true, NULL, true); // force flush for merging
 706 | }
 707 | 
 708 | void  register_thread_a(THREADID pin_threadid, ADDRINT user_threadid) {
 709 |   release_address_locks(pin_threadid);
 710 |   thread_data_t *tdata = get_tls(pin_threadid);
 711 |   if (register_threads) {
 712 |     pin_critical_section CS(&thread_start_fini_lock.lock);
 713 |     int64_t new_time = acquire_all_thread_locks() + 1;
 714 | 
 715 |     tdata->user_threadid = user_threadid;
 716 |     trace_start_thread(pin_threadid, new_time);
 717 | 
 718 |     release_all_thread_locks(new_time);
 719 |   }
 720 | }
 721 | 
 722 | void thread_start_a(THREADID threadid, CONTEXT *ctxt, INT32 flags, VOID *v) {
 723 |   thread_data_t *tdata = new thread_data_t();
 724 |   {
 725 |     pin_critical_section CS(&thread_start_fini_lock.lock);
 726 |     ++num_threads;
 727 |     PIN_SetThreadData(tls_key, tdata, threadid);
 728 |     
 729 |     if (!register_threads) {
 730 |       tdata->user_threadid = PIN_ThreadUid();
 731 |       // synchronize threads -- already holding thread_s/f lock
 732 |       int64_t start_timestamp = acquire_all_thread_locks() + 1;
 733 |       trace_start_thread(threadid, start_timestamp);
 734 |       release_all_thread_locks(start_timestamp);
 735 |     } else {
 736 |       pin_critical_section CS2(&tdata->thread_lock);
 737 |       tdata->lamport_timestamp = 0;
 738 |     }
 739 | 
 740 |     pin_threadid_set.insert(threadid);
 741 |     {
 742 |       pin_critical_section CS3(&file_lock.lock);
 743 |       last_flushed.left.insert(std::pair<int64_t, int64_t>(threadid, 0));
 744 |     }
 745 |   }
 746 | }
 747 | 
 748 | void thread_fini_a(THREADID threadid, const CONTEXT *ctxt, INT32 code, VOID *v) {
 749 |   release_address_locks(threadid);
 750 |   thread_data_t *tdata = get_tls(threadid);
 751 |   bool registered_thread = tdata->user_threadid >= 0;
 752 |   {
 753 |     pin_critical_section CS(&thread_start_fini_lock.lock);
 754 |     if (registered_thread) {
 755 |       // synchronize threads -- already holding thread_s/f lock
 756 |       int64_t timestamp = acquire_all_thread_locks() + 1;
 757 |       tdata->trace_stream << timestamp << '\t' << tdata->user_threadid << "\ttf\n";
 758 |       tdata->touch_buffer(threadid, timestamp, true, NULL, true); // force flush for merging
 759 |       release_all_thread_locks(timestamp);
 760 |     }
 761 | 
 762 |     pin_threadid_set.erase(threadid);
 763 |     // always flush -- might be an unregistered thread with function traces
 764 |     {
 765 |       pin_critical_section CS2(&tdata->thread_lock);
 766 |       if (tdata->buffered_entries > 0) {
 767 |         pin_critical_section CS3(&file_lock.lock);
 768 |         trace_file << tdata->trace_stream.rdbuf();
 769 |         assert(trace_file);
 770 |         tdata->trace_stream.str(std::string());
 771 |         last_flushed.left.erase(threadid);
 772 |       }
 773 |     }
 774 |     delete get_tls(threadid);
 775 |   }
 776 | }
 777 | 
 778 | //////////////
 779 | // end thread start and end tracing
 780 | //////////////
 781 | 
 782 | void fini_a(INT32 code, VOID *v) {
 783 |   // We do not have access to a THREADID, leading me to believe this callback
 784 |   // occurs only after all threads (including main thread) have joined
 785 |   // no need to release locks
 786 |   {
 787 |     pin_critical_section CS(&file_lock.lock);
 788 |     trace_file.close();
 789 |   }
 790 | }
 791 | 
 792 | // only record if on a registered thread
 793 | void ctxt_change_release(
 794 |     THREADID pin_threadid 
 795 |   , CONTEXT_CHANGE_REASON reason
 796 |   , const CONTEXT *from
 797 |   , CONTEXT *to
 798 |   , INT32 info
 799 |   , VOID *v
 800 |   ) {
 801 |   release_address_locks(pin_threadid);
 802 |   thread_data_t *tdata = get_tls(pin_threadid);
 803 |   int64_t threadid = tdata->user_threadid;
 804 |   {
 805 |     pin_critical_section CS(&tdata->thread_lock);
 806 |     int64_t time = tdata->lamport_timestamp + 1;
 807 |     tdata->trace_stream
 808 |       << time << '\t' << threadid << "\tctxt_change\n";
 809 |     tdata->touch_buffer(pin_threadid, time, false, &CS, false);
 810 |   }
 811 | }
 812 | 
 813 | /* ===================================================================== */
 814 | /* Instrumentation routines                                              */
 815 | /* ===================================================================== */
 816 |    
 817 | VOID Image(IMG img, VOID *v)
 818 | {
 819 |     vector<string>* trace_functions = reinterpret_cast< vector<string>* >(v);
 820 |     for (SEC sec = IMG_SecHead(img); SEC_Valid(sec); sec = SEC_Next(sec)) {
 821 |       for (RTN rtn = SEC_RtnHead(sec); RTN_Valid(rtn); rtn = RTN_Next(rtn)) {
 822 |       string und_func_name = PIN_UndecorateSymbolName(RTN_Name(rtn), UNDECORATION_NAME_ONLY);
 823 | 
 824 |       if (und_func_name == "atomic_trace::register_thread") {
 825 |         RTN_Open(rtn);
 826 |         RTN_InsertCall(rtn, IPOINT_BEFORE, (AFUNPTR)register_thread_a,
 827 |           IARG_THREAD_ID,
 828 |           IARG_FUNCARG_ENTRYPOINT_VALUE, 0, // arg1 -- user threadid
 829 |           IARG_END);
 830 |         RTN_Close(rtn);
 831 |       }
 832 |       if (und_func_name == "atomic_trace::start_roi") {
 833 |         RTN_Open(rtn);
 834 |         RTN_InsertCall(rtn, IPOINT_BEFORE, (AFUNPTR)change_roi_a
 835 |           , IARG_THREAD_ID
 836 |           , IARG_BOOL, true
 837 |           , IARG_ADDRINT, "start"
 838 |           , IARG_END);
 839 |         RTN_Close(rtn);
 840 |       }
 841 |       if (und_func_name == "atomic_trace::end_roi") {
 842 |         RTN_Open(rtn);
 843 |         RTN_InsertCall(rtn, IPOINT_BEFORE, (AFUNPTR)change_roi_a
 844 |           , IARG_THREAD_ID
 845 |           , IARG_BOOL, false
 846 |           , IARG_ADDRINT, "end"
 847 |           , IARG_END);
 848 |         RTN_Close(rtn);
 849 |       }
 850 | 
 851 |       // try to find this function in our list
 852 |       vector<string>::iterator it = find(trace_functions->begin(), trace_functions->end(), und_func_name);
 853 |       if (it != trace_functions->end()) {
 854 |         const char *func_name = it->c_str();
 855 |         RTN_Open(rtn);
 856 |         // call traces name, new stack pointer (after call), values of first three arguments
 857 |         RTN_InsertCall(rtn, IPOINT_BEFORE, (AFUNPTR)function_call_a,
 858 |           IARG_ADDRINT, func_name,
 859 |           IARG_THREAD_ID,
 860 |           IARG_REG_VALUE, REG_STACK_PTR,
 861 |           IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
 862 |           IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
 863 |           IARG_FUNCARG_ENTRYPOINT_VALUE, 2,
 864 |           IARG_END);
 865 | 
 866 |         // return traces name, old stack pointer (before return), value of return
 867 |         RTN_InsertCall(rtn, IPOINT_AFTER, (AFUNPTR)function_return_a,
 868 |           IARG_ADDRINT, func_name,
 869 |           IARG_THREAD_ID,
 870 |           IARG_REG_VALUE, REG_STACK_PTR,
 871 |           IARG_FUNCRET_EXITPOINT_VALUE,
 872 |           IARG_END);
 873 |         RTN_Close(rtn);
 874 |       }
 875 |     }
 876 |   }
 877 | }
 878 | 
 879 | VOID Instruction(INS ins, void * v) {
 880 |   if (INS_IsMemoryRead(ins) || INS_IsMemoryWrite(ins)) {
 881 |     INS_InsertPredicatedCall(
 882 |       ins, IPOINT_BEFORE, (AFUNPTR) memory_access_header_a
 883 |       , IARG_THREAD_ID
 884 |       , IARG_END
 885 |       );
 886 | 
 887 |     if (INS_IsMemoryRead(ins)) {
 888 |       INS_InsertPredicatedCall(
 889 |         ins, IPOINT_BEFORE, (AFUNPTR) memory_access_read1_a
 890 |         , IARG_THREAD_ID
 891 |         , IARG_MEMORYREAD_EA
 892 |         , IARG_MEMORYREAD_SIZE
 893 |         , IARG_END
 894 |         );
 895 |     }
 896 | 
 897 |     if (INS_HasMemoryRead2(ins)) {
 898 |       INS_InsertPredicatedCall(
 899 |         ins, IPOINT_BEFORE, (AFUNPTR) memory_access_read2_a
 900 |         , IARG_THREAD_ID
 901 |         , IARG_MEMORYREAD2_EA
 902 |         , IARG_END
 903 |         );
 904 |     }
 905 | 
 906 |     if (INS_IsMemoryWrite(ins)) {
 907 |       INS_InsertPredicatedCall(
 908 |         ins, IPOINT_BEFORE, (AFUNPTR) memory_access_write_a
 909 |         , IARG_THREAD_ID
 910 |         , IARG_MEMORYWRITE_EA
 911 |         , IARG_MEMORYWRITE_SIZE
 912 |         , IARG_END
 913 |         );
 914 |     }
 915 | 
 916 |     INS_InsertPredicatedCall(
 917 |       ins, IPOINT_BEFORE, (AFUNPTR) memory_access_acquire_locks_a
 918 |       , IARG_THREAD_ID
 919 |       , IARG_END
 920 |       );
 921 | 
 922 |     OPCODE op = INS_Opcode(ins);
 923 |     bool cmpxchg = op == XED_ICLASS_CMPXCHG || op == XED_ICLASS_CMPXCHG16B || op == XED_ICLASS_CMPXCHG8B;
 924 |     // if atomic cas test for CAS success and trace AFTER instruction
 925 |     if (!log_cas_fails && INS_IsAtomicUpdate(ins) && cmpxchg) {
 926 |       assert(INS_HasFallThrough(ins));
 927 |       assert(INS_IsMemoryWrite(ins));
 928 |       INS_InsertPredicatedCall(
 929 |         ins, IPOINT_AFTER, (AFUNPTR) memory_access_CAS_footer_a
 930 |         , IARG_THREAD_ID
 931 |         // must use REG_RFLAGS (although it is not documented)
 932 |         // REG_EFLAGS and REG_FLAGS produce failures
 933 |         // see http://tech.groups.yahoo.com/group/pinheads/message/6581
 934 |         , IARG_REG_VALUE, REG_RFLAGS
 935 |         , IARG_END
 936 |         );
 937 | 
 938 |       INS_InsertPredicatedCall(
 939 |         ins, IPOINT_AFTER, (AFUNPTR) memory_access_footer_a
 940 |         , IARG_THREAD_ID
 941 |         , IARG_END
 942 |         );
 943 |     } else { // if not atomic CAS trace BEFORE instruction
 944 |       INS_InsertPredicatedCall(
 945 |         ins, IPOINT_BEFORE, (AFUNPTR) memory_access_footer_a
 946 |         , IARG_THREAD_ID
 947 |         , IARG_END
 948 |         );
 949 |     }
 950 |   } else {
 951 |     INS_InsertPredicatedCall(
 952 |       ins, IPOINT_BEFORE, (AFUNPTR) memory_access_release_a
 953 |       , IARG_THREAD_ID
 954 |       , IARG_END
 955 |       );
 956 |   }
 957 | }
 958 | 
 959 | /* ===================================================================== */
 960 | /* Print Help Message                                                    */
 961 | /* ===================================================================== */
 962 |    
 963 | INT32 Usage()
 964 | {
 965 |     cerr << "This tool produces a consistent memory access trace and persistence annotation." << endl;
 966 |     cerr << endl << KNOB_BASE::StringKnobSummary() << endl;
 967 |     return -1;
 968 | }
 969 | 
 970 | /* ===================================================================== */
 971 | /* Helper functions for Main                                             */
 972 | /* ===================================================================== */
 973 | 
 974 | vector<string>* read_trace_functions(string file_name) {
 975 |   vector<string> *vec = new vector<string>;
 976 | 
 977 |   std::ifstream function_file(file_name.c_str());
 978 |   string func;
 979 |   while (function_file.good()) {
 980 |     getline(function_file, func);
 981 |     boost::algorithm::trim(func);
 982 |     vec->push_back(func);
 983 |   }
 984 | 
 985 |   return vec;
 986 | }
 987 | 
 988 | bool is_power_2(int64_t num) {
 989 |   return ((num > 0) && !(num & (num - 1)));
 990 | }
 991 | 
 992 | int64_t binary_log(int64_t num) {
 993 |   if (num <= 0) return 0;
 994 |   uint64_t unum = num;
 995 |   int64_t l = 0;
 996 |   while (unum >>= 1) ++l;
 997 |   return l;
 998 | }
 999 | 
1000 | /* ===================================================================== */
1001 | /* Main                                                                  */
1002 | /* ===================================================================== */
1003 | 
1004 | int main(int argc, char *argv[])
1005 | {
1006 |     // Initialize pin & symbol manager
1007 |     PIN_InitSymbols();
1008 |     if( PIN_Init(argc,argv) )
1009 |     {
1010 |         return Usage();
1011 |     }
1012 | 
1013 |     register_threads = KnobRegisterThreads.Value();
1014 |     require_roi = KnobRequireROI.Value();
1015 |     log_cas_fails = KnobCASFailureWrites.Value();
1016 |     vector<string>* trace_functions = read_trace_functions(KnobFunctionsFile.Value());
1017 | 
1018 |     accesses_flush = KnobAccessesBeforeFlush.Value();
1019 |     
1020 |     // these must both be a power of 2
1021 |     num_locks = KnobNumAddressLocks.Value();
1022 |     assert(is_power_2(num_locks));
1023 |     block_size = KnobBlockSize.Value();
1024 |     assert(is_power_2(block_size));
1025 |     block_size_log = binary_log(block_size);
1026 |     turn_off_locks = KnobTurnOff.Value();
1027 |     timestamp_difference = KnobTimestampDifference.Value();
1028 | 
1029 |     address_lock_bank = new address_lock_t [num_locks];
1030 |     for (int64_t i = 0; i < num_locks; ++i) {
1031 |       address_lock_bank[i].lamport_timestamp = 0;
1032 |       PIN_MutexInit(&address_lock_bank[i].lock);
1033 |     }
1034 |     tls_key = PIN_CreateThreadDataKey(0);
1035 |     PIN_MutexInit(&file_lock.lock);
1036 |     PIN_MutexInit(&thread_start_fini_lock.lock);
1037 | 
1038 |     last_flushed_cache = 0;
1039 | 
1040 |     // Write to a file since cout and cerr maybe closed by the application
1041 |     trace_file.open(KnobOutputFile.Value().c_str());
1042 |     trace_file << dec;
1043 |     trace_file.setf(ios::showbase);
1044 |     
1045 |     // Register Image to be called to instrument functions.
1046 |     IMG_AddInstrumentFunction(Image, trace_functions);
1047 |     INS_AddInstrumentFunction(Instruction, 0);
1048 | 
1049 |     PIN_AddThreadStartFunction(thread_start_a, 0);
1050 |     PIN_AddThreadFiniFunction(thread_fini_a, 0);
1051 |     PIN_AddFiniFunction(fini_a, 0);
1052 |     PIN_AddContextChangeFunction(ctxt_change_release, 0);
1053 | 
1054 |     // Never returns
1055 |     PIN_StartProgram();
1056 |     
1057 |     return 0;
1058 | }
1059 | 
1060 | 


--------------------------------------------------------------------------------