├── LICENSE
├── Makefile
├── README.md
├── RELEASE_NOTES
├── index.html
├── itypes_default.spec
├── itypes_default_available
├── mica.conf.example
├── mica.cpp
├── mica.h
├── mica_all.cpp
├── mica_all.h
├── mica_ilp.cpp
├── mica_ilp.h
├── mica_init.cpp
├── mica_init.h
├── mica_itypes.cpp
├── mica_itypes.h
├── mica_memfootprint.cpp
├── mica_memfootprint.h
├── mica_memstackdist.cpp
├── mica_memstackdist.h
├── mica_ppm.cpp
├── mica_ppm.h
├── mica_reg.cpp
├── mica_reg.h
├── mica_stride.cpp
├── mica_stride.h
├── mica_utils.cpp
├── mica_utils.h
└── tableGen.sh
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2007-2011, Kenneth Hoste and Lieven Eeckhout (Ghent University, Belgium)
2 | kehoste@elis.ugent.be, leeckhou@elis.ugent.be
3 |
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
7 |
8 | * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
9 | * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
10 | * Neither the name of the organization nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
11 |
12 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
13 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
14 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
15 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
16 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
17 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
18 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
19 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
20 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
21 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | ifdef PIN_ROOT
2 | CONFIG_ROOT := $(PIN_ROOT)/source/tools/Config
3 | else
4 | CONFIG_ROOT := ../Config
5 | endif
6 |
7 | include $(CONFIG_ROOT)/makefile.config
8 | include $(TOOLS_ROOT)/Config/makefile.default.rules
9 | CXXFLAGS = -std=gnu++11 -DVERBOSE -Wall -Werror -Wno-unknown-pragmas $(DBG) $(OPT)
10 |
11 | SRC_DIR := .
12 | SRC_FILES := $(wildcard $(SRC_DIR)/*.cpp)
13 | OBJ_FILES := $(patsubst $(SRC_DIR)/%.cpp,$(OBJDIR)%$(OBJ_SUFFIX),$(SRC_FILES))
14 |
15 | all: $(OBJDIR)mica$(PINTOOL_SUFFIX)
16 |
17 | # Build the intermediate object file.
18 | $(OBJDIR)%$(OBJ_SUFFIX): %.cpp
19 | $(CXX) $(CXXFLAGS) $(TOOL_CXXFLAGS_NOOPT) $(COMP_OBJ)$@ $<
20 |
21 | $(OBJDIR)mica$(PINTOOL_SUFFIX): $(OBJ_FILES)
22 | $(LINKER) $(TOOL_LDFLAGS) $(LINK_EXE)$@ $^ $(TOOL_LPATHS) $(TOOL_LIBS)
23 |
24 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | MICA: Microarchitecture-Independent Characterization of Applications
2 | ====================================================================
3 | version 1.0
4 |
5 | [Kenneth Hoste](http://kejo.be/ELIS/) & [Lieven Eeckhout](http://users.elis.ugent.be/~leeckhou/) (Ghent University, Belgium)
6 |
7 | Current maintainer:
8 | [Amir H. Ashouri](http://www.eecg.toronto.edu/~aashouri/) (University of Toronto, Canada)
9 |
10 | with contributions by:
11 | - Hamid Fadishei (multi-process support)
12 | - Petr Tuma (code cleanup)
13 | - Maxime Chéramy (cleanup, bug fixes, additional features)
14 |
15 | Websites:
16 | (http://boegel.kejo.be/ELIS/MICA)
17 | (http://www.elis.ugent.be/~kehoste/mica)
18 |
19 | A set of tutorial slides on MICA, which were presented at IISWC-2007 are
20 | available from the MICA website.
21 |
22 | # Disclaimer
23 | ------------
24 |
25 | Currently, this software is only tested on Linux/x86 and [Pin.2.10](https://drive.google.com/file/d/0B-AkmAlNRsymNVl1RndzbFVpZEU/view?usp=drivesdk&resourcekey=0-YSVKSR2SXSpFSaYZvAA4Cg) We had users reporting the corect installation on [Pin-3.4](https://software.intel.com/en-us/articles/pin-a-binary-instrumentation-tool-downloads) and the details are [here](https://github.com/boegel/MICA/commit/1293082a05e97854e3ccc48490d5b72e765b48bf). Anyone who wants to use it on a different
26 | platform supported by Pin is free to do so, but should expect problems. We work on adapting MICA on newer Pin versions.
27 |
28 | Any problem reports or questions are welcome at kenneth.hoste@ugent.be .
29 |
30 | # Compilation
31 | --------------
32 |
33 | The easiest way to compile MICA is to add unzip/untar mica_vXYZ.tar.gz to the source/tools
34 | directory of the Pin kit you are using. If you wish to place mica in a different
35 | directory, you'll have to adjust the makefile included accordingly.
36 | Running 'make' should produce the 'mica_v0-X' shared library.
37 |
38 | By default, MICA is built using the GCC C++ compiler (g++).
39 | Since Pin kit 39599 (March 2nd 2011), building Pin tools with the Intel compilers is
40 | also supported. To build MICA using the Intel C++ compiler, run "make CXX=icpc".
41 | Make sure /opt/intel/lib is added to the LD_LIBRARY_PATH environment variable to
42 | use MICA built using the Intel compilers.
43 |
44 | # Specifying type of analysis
45 | -----------------------------
46 |
47 | MICA supports various types of microarchitecture-independent characteristics.
48 | It also allows to measure the characteristics either for the entire execution, or
49 | per interval of N dynamic instructions.
50 |
51 | Specifying the parameters is done using the mica.conf configuration file.
52 | A sample mica.conf file is provided with the distribution, and details
53 | on how to specify the parameters are found below.
54 | ```
55 | analysis_type: all | ilp | ilp_one | itypes | ppm | reg | stride | memfootprint | memstackdist | custom
56 | interval_size: full |
57 | [ilp_size: ]
58 | [block_size: <2^size>]
59 | [page_size: <2^size>]
60 | [itypes_spec_file: ]
61 | ```
62 | ## example:
63 | ```
64 | analysis_type: all
65 | interval_size: 100000000
66 | block_size: 6
67 | page_size: 12
68 | itypes_spec_file: itypes_default.spec
69 | ```
70 |
71 | specifies to measure all supported characteristics per interval of 100,000,000 instructions,
72 | with block size of 64 (2^6), page size of 4K (2^12), and using the instruction mix categories
73 | described in the file itypes_default.spec
74 |
75 | ## Usage
76 | -------
77 |
78 | Using MICA is very easy; just run:
79 | ```
80 | pin -t mica.so -- []
81 | ```
82 | The type of analysis is specified in the mica.conf file, and some
83 | logging is written to mica.log.
84 |
85 | ## Output files
86 | ---------------
87 |
88 | (I realize the output file names are a bit strange, but that's just the way I
89 | chose them... It's easy to adjust them yourself! ).
90 | ```
91 | ilp:
92 | full: ilp_full_int_pin.out
93 | interval: ilp_phases_int_pin.out
94 | ilp_one:
95 | full: ilp_full_int_pin.out
96 | interval: ilp_phases_int_pin.out
97 | itypes:
98 | full: itypes_full_int_pin.out
99 | interval: itypes_phases_int_pin.out
100 | ppm:
101 | full: ppm_full_int_pin.out
102 | interval: ppm_phases_int_pin.out
103 | reg:
104 | full: reg_full_int_pin.out
105 | interval: reg_phases_int_pin.out
106 | stride:
107 | full: stride_full_int_pin.out
108 | interval: stride_phases_int_pin.out
109 | memfootprint:
110 | full: memfootprint_full_int_pin.out
111 | interval: memfootprint_phases_int_pin.out
112 | memstackdist:
113 | full: memstackdist_full_int_pin.out
114 | interval: memstackdist_phases_int_pin.out
115 | ```
116 |
117 | ## Full execution metrics
118 | -----------------------------------
119 |
120 | ### +++ ilp +++
121 |
122 | Instruction-Level Parallellism (ILP) available for four different instruction
123 | window sizes (32, 64, 128, 256).
124 | This is measured by assuming perfect caches, perfect branch prediction, etc.
125 | The only limitations are the instruction window size and the data dependences.
126 | ```
127 | analysis_type: ilp
128 | ```
129 | Besides measuring these four window sizes at once, MICA also supports
130 | specifying a single window size, which is specified as follows (for
131 | characterizing the full run using an instruction window of 32 entries):
132 | ```
133 | analysis_type: ilp_one
134 | interval_size: full
135 | ilp_size: 32
136 | ```
137 | You can tweak the block size used using the block_size configuration parameter.
138 |
139 | ### +++ itypes +++
140 | ```
141 | analysis_type: itypes
142 | ```
143 | ### +++ Instruction mix +++
144 |
145 | The instruction mix is evaluated by categorizing the executed instructions.
146 | Because the x86 architecture isn't a load-store architecture, we count memory
147 | reads/writes seperately. The following categories are used by default (in order
148 | of output):
149 | ```
150 | - memory read (instructions which read from memory)
151 | - memory write (instructions which write to memory)
152 | - control flow
153 | - arithmetic
154 | - floating-point
155 | - stack
156 | - shift
157 | - string
158 | - sse
159 | - other
160 | - nop
161 | ```
162 | It is possible to redefine the instruction mix categories, by creating a specification
163 | file and mentioning it in the mica.conf file (itypes_spec_file).
164 |
165 | ### +++ ppm +++
166 | ```
167 | analysis_type: ppm
168 | ```
169 | Branch predictability.
170 |
171 | The branch predictability of the conditional branches in the program is
172 | evaluated using a Prediction-by-Partial-Match (PPM) predictor, in 4 different
173 | configurations (global/local branch history, shared/seperate prediction
174 | table(s)), using 3 different history length (4,8,12 bits). Additionally,
175 | average taken and transition count are also being measured.
176 |
177 | ### +++ reg +++
178 | ```
179 | analysis_type: reg
180 | ```
181 | ### Register traffic.
182 |
183 | The register traffic is analyzed in different aspects:
184 | ```
185 | - average number of register operands
186 | - average degree of use
187 | - dependency distances (prob. <= D)
188 |
189 | Dependency distances are chosen in powers of 2, i.e. 1, 2, 4, 8, 16, 32, 64
190 | ```
191 | ### +++ stride +++
192 | ```
193 | analysis_type: stride
194 | ```
195 | Data stream strides.
196 |
197 | The distances between subsequent memory accesses are characterised by:
198 | ```
199 | - local load (memory read) strides
200 | - global load (memory read) strides
201 | - local store (memory write) strides
202 | - global store (memory write) strides
203 | ```
204 | Local means per static instruction accesses, global means over all
205 | instructions. The strides are characterized by powers of 8 (prob. <= 0, 8, 64,
206 | 512, 4096, 32768, 262144)
207 |
208 | ### +++ memfootprint +++
209 | ```
210 | analysis_type: memfootprint
211 | ```
212 | Instruction and data memory footprint.
213 |
214 | The size of the instruction and data memory footprint is characterized by
215 | counting the number of blocks (64-byte) and pages (4KB) touched. This
216 | is done seperately for data and instruction addresses.
217 |
218 | ### +++ memstackdist +++
219 | ```
220 | analysis_type: memstackdist
221 | ```
222 | Memory reuse distances.
223 |
224 | This is a highly valuable set of numbers to characterize the cache behavior
225 | of the application of interest. For each memory read, the corresponding
226 | 64-byte cache block is determined. For each cache block accessed, the number
227 | of unique cache blocks accessed since the last time it was referenced is
228 | determined, using a LRU stack.
229 | The reuse distances for all memory reads are reported in buckets. The first
230 | bucket is used for so called 'cold references'. The subsequent buckets capture reuse
231 | distances of [2^n, 2^(n+1)[, where n ranges from 0 to 18. The first of these
232 | actually captures [0,2[ (not [1,2[), while the last bucket, [2^18, 2^19[, captures all
233 | reuse distances larger then or equal to 2^18, so it's in fact [2^18, oo[.
234 | In total, this delivers 20 buckets, and the total number of memory accesses
235 | (the first number in the output), thus 21 numbers.
236 |
237 | For example: the fifth bucket, corresponds to accesses with reuse distance
238 | between 2^3 and 2^4 (or 8 64-byte cache blocks to 16 64-byte cache blocks).
239 |
240 | Note: because memory addresses vary over different executions of the same
241 | program, these numbers may vary slightly across multiple runs. Please be aware
242 | of this when using these metrics for research purposes.
243 |
244 | To track the progress of the MICA analysis being run, see the mica_progress.txt tool
245 | which shows how many dynamic instructions have been analyzed. Disabling this can be
246 | done by removing the -DVERBOSE flag in the Makefile and rebuilding MICA.
247 |
248 | ### * Interval metrics
249 | -------------------
250 |
251 | Besides characterization total program execution, the tool is also capable of
252 | characterizing interval behavior. The analysis is identical to the tools
253 | above, but flush the state for each new each interval.
254 |
255 | ### +++ ilp +++
256 |
257 | RESET: instruction and cycle counters (per interval), free memory used for
258 | memory address stuff (to avoid huge memory requirements for large workloads)
259 |
260 | DON'T TOUCH: instruction window contents; global instruction and cycle counters
261 |
262 | +++ itypes +++
263 |
264 | RESET: instruction type counters
265 |
266 | +++ ppm +++
267 |
268 | RESET: misprediction counts, taken/transition counts
269 |
270 | DON'T TOUCH: branch history tables
271 |
272 | +++ reg +++
273 |
274 | RESET: operand counts, register use distribution and register age distribution
275 |
276 | DON'T TOUCH: register use counts (i.e. keep track of register use counts across
277 | interval boundaries); register definition addresses
278 |
279 | +++ stride +++
280 |
281 | RESET: instruction counts (mem.read, mem.write, interval), distribution counts
282 |
283 | DON'T TOUCH: last (global/local) read/write memory addresses
284 |
285 | +++ memfootprint +++
286 |
287 | RESET: reference counters, free memory used for memory address stuff (to avoid
288 | huge memory requirements for large workloads)
289 |
290 | DON'T TOUCH: -
291 |
292 | +++ memstackdist +++
293 |
294 | RESET: bucket counts (including cold reference and memory access counts)
295 |
296 | DON't TOUCH: LRU stack (keep track of reuse distances over interval boundaries)
297 |
298 | * Measured in integer values, convert to floating-point
299 | -------------------------------------------------------
300 |
301 | Because of historical reasons (problems with printing out floating-point
302 | numbers in certain situations with previous Pin kits), we only print out
303 | integer values and convert to floating-point metrics offline. This also allows
304 | aggregating data measured per interval to larger intervals or full execution
305 | for most characteristics.
306 | ```
307 | S: interval size
308 | N: number of intervals
309 | I: number of instructions
310 | ```
311 | +++ ilp +++
312 |
313 | FORMAT:
314 |
315 | instruction_countcycle_count_win_size_1cycle_count_win_size_2...cycle_count_win_size_n
316 |
317 | CONVERSION:
318 |
319 | instruction_count/cycle_count
320 | ```
321 | i.e.
322 | 1 to (N-1)th line: S/cycle_count_win_size_i
323 | Nth line: (I-N*S)/cycle_count_win_size_i
324 | ```
325 | +++ itypes +++
326 |
327 | FORMAT:
328 |
329 | instruction_cntmem_read_cntmem_write_cntcontrol_cntarith_cntfp_cntstack_cntshift_cntstring_cntsse_cntsystem_cntnop_cntother_cnt
330 |
331 | CONVERSION:
332 | ```
333 | mem_write_cnt/instruction_cnt
334 | ...
335 | other_cnt/instruction_cnt
336 | ```
337 | NOTE
338 |
339 | Note that simply adding the (n-1) last numbers won't necceseraly yield the first number.
340 | First of all, the memory read and write counts shouldn't be added to the total, because
341 | the x86 architecture is not a load/store architecture (e.g. an instruction can both read
342 | memory and be a floating-point instruction).
343 | Secondly, some instructions may fit in multiple categories, and therefore simply adding the
344 | counts for the various categories will cause instructions to be counted double.
345 |
346 | Also note that the (sum of) instruction_cnt value(s) will not match the instruction count
347 | printed at the last line of the output file ("number of instructions: "). This is because
348 | in the former, each iteration of a REP-prefixed instruction is counted, while in the latter
349 | a REP-prefixed instruction in only counted once.
350 |
351 | The other_cnt contains the number of instructions that did not fit in any of the other categories
352 | (excluding mem_read and mem_write). More details on which kind of instructions this includes can
353 | be found in the itypes_other_group_categories.txt output file.
354 |
355 | +++ ppm +++
356 |
357 | FORMAT:
358 |
359 | instr_cntGAg_mispred_cnt_4bitsPAg_mispred_cnt_4bitsGAs_mispred_cnt_4bitsPAs_mispred_cnt_4bits...PAs_mispred_cnt_12bits
360 |
361 | CONVERSION:
362 | ```
363 | GAg_mispred_cnt_Kbits/instr_cnt
364 | ...
365 | PAs_mispred_cnt_Kbits/instr_cnt
366 | ```
367 | +++ reg +++
368 |
369 | FORMAT:
370 |
371 | instr_cnttotal_oper_cntinstr_reg_cnttotal_reg_use_cnttotal_reg_agereg_age_cnt_1reg_age_cnt_2reg_age_cnt_4...reg_age_cnt_64
372 |
373 | CONVERSION:
374 | ```
375 | total_oper_cnt/instr_cnt
376 | total_reg_use_cnt/instr_reg_cnt
377 | reg_age_cnt_1/total_reg_age
378 | reg_age_cnt_2/total_reg_age
379 | ...
380 | reg_age_cnt_64/total_reg_age
381 | ```
382 | +++ stride +++
383 |
384 | FORMAT:
385 |
386 | mem_read_cntmem_read_local_stride_0mem_read_local_stride_8...mem_read_local_stride_262144mem_read_global_stride_0...mem_read_global_stride_262144mem_write_cntmem_write_local_stride_0...mem_write_global_stride_262144
387 |
388 | CONVERSION:
389 |
390 | mem_read_local_stride_0/mem_read_cnt
391 | ...
392 | mem_read_global_stride_262144/mem_read_cnt
393 | mem_write_local_stride_0/mem_write_cnt
394 | ...
395 | mem_write_global_stride_262144/mem_write_cnt
396 |
397 | +++ memfootprint +++
398 |
399 | Integer output (no conversion needed).
400 |
401 | FORMAT:
402 |
403 | num_64-byte_blocks_datanum_4KB_pages_datanum_64-byte_blocks_instrnum_4KB_pages_instr
404 |
405 | +++ memstackdist +++
406 |
407 | FORMAT:
408 |
409 | mem_access_cntcold_ref_cntacc_cnt_0-2acc_cnt_2-2^2acc_cnt_2^2-2^3...acc_cnt_2^17-2^18acc_cnt_over_2^18
410 |
411 | CONVERSION:
412 | ```
413 | cold_ref_cnt/mem_access_cnt
414 | acc_cnt_0/mem_access_cnt
415 | ...
416 | acc_cnt_2^18-2^19/mem_access_cnt
417 | acc_cnt_rest/mem_access_cnt
418 | ```
419 | * Multi-process binaries
420 | -----------------------------------
421 |
422 | If you want to use MICA on multiprocess binaries which call fork and execv, you should specify this entry in the MICA configuration file:
423 | ```
424 | append_pid: yes
425 | ```
426 | This will tell MICA to append the current process ID to the report file names so multiple processes do not overwrite each other's output.
427 | Additionally, you should pass "-follow_execv 1" parameter to pin in order to trace multiprocess applications.
428 |
429 | ------------------------------------------------------------------
430 | # Complete list of Headers - Table Generation
431 | For ease of use, we provide tableGen.sh to automatically look for all mica instrumented output files beloging to a unique Pid. It generates a CSV file having the first row as the headers. Please refer to the headers in the script for the complete set of names.
432 |
433 | ------------------------------------------------------------------
434 | # Examples of using MICA in the recent literature
435 |
436 | You can see an example of using MICA in building prediction models targetted to Compiler optimization problems here at [COBAYN's github page](https://github.com/amirjamez/COBAYN). There is also a provided dataset located at:
437 | ```
438 | >>COBAYN/data/ft_MICA_cbench.csv
439 | ```
440 |
--------------------------------------------------------------------------------
/RELEASE_NOTES:
--------------------------------------------------------------------------------
1 | March 26th 2012
2 | ---------------
3 |
4 | MICA v0.40
5 |
6 | - contributions by Hamid Fadishei:
7 | * append_pid config entry added by Hamid Fadishei, mainly for multiprocess binaries
8 | * some warning messages resolved
9 |
10 | Aug. 29th 2011
11 | ---------------
12 |
13 | MICA v0.32
14 |
15 | - significant code cleanup by Petr Tuma; notes:
16 | * cleaned up some redundant NULL pointer casts
17 | * cleaned up names of LRU stack entry references
18 | * systematic testing of malloc return value in most of the tool code
19 | * added warning on presence of multiple threads
20 | * replaced sprintf with string streams
21 | * added branch prediction hints using __builtin_expect where appropriate
22 | - guard tracking progress in mica_progress.txt with a preprocessor flag (-DVERBOSE)
23 | - test MICA built using the Intel C++ compiler, and document how to build it using
24 | icpc in the README (TODO: benchmark the performance difference)
25 |
26 | February 28th 2011
27 | ------------------
28 |
29 | MICA v0.31
30 |
31 | - improved config file parsing, i.e. remove dependency on order of entries
32 |
33 | - updated README file
34 | - describe some details regarding instruction mix
35 |
36 | February 27th 2011
37 | ------------------
38 |
39 | MICA v0.3
40 |
41 | - increased flexibility of itypes analysis significantly
42 | - instruction groups used in itypes analysis can be specified by the user now,
43 | using a itypes.spec file; specify the filename in mica.conf using an entry like:
44 | itypes_spec_file:
45 | - by default, the old instruction groups are used
46 | (except for SYSCALL, which was added to the group formely known as 'other')
47 | - made block size in ilp, memfootprint and memreusedist flexible
48 | - size can be set by specifying 'block_size: ' in the mica.conf file
49 | - default block size is 2^6 (64) bytes, which is a change compared to MICA v0.23 for ilp
50 | - made page size in memfootprint flexible
51 | - size can be set by specifying 'page_size: ' in the mica.conf file
52 | - default page size is 4096 (2^12) bytes
53 | - possibly expensive assert statements and other sanity checks were removed
54 | - bug fixes:
55 | - memory read size wasn't being used 100% correctly
56 | in ilp, memfootprint, memreusedist and stride analysis,
57 | the size was being added to the start address of the read,
58 | while (size-1) should be added; otherwise, e.g. for memfootprint,
59 | we count an extra block being touched if the access is near a block boundary
60 | - a small problem with an assert statement was fixed in memfootprint (>= 0 instead of >)
61 | - fprintf statements were fixed for 64-bit systems
62 |
63 |
64 | September 22th 2009
65 | -------------------
66 |
67 | MICA v0.23
68 |
69 | Several people have reported small problems when MICA is being used with
70 | recent Pin kits. This small patch release should resolve these issues.
71 |
72 | - bug fixes:
73 | * adjusted makefile and README according to Pin kit directory tree changes
74 | * adjust mica_itypes.cpp to recognize both NOP and WIDENOP categories
75 |
76 |
77 | June 13th 2008
78 | --------------
79 |
80 | MICA v0.22
81 |
82 | - bug fixes in itypes:
83 | * fixed issue with instructions in MISC category being counted double (both in 'control flow' and 'other' buckets)
84 | (thanks to Ahmed S. Al-Zawawi for bringing my attention to this issue)
85 | * added NOP instructions category
86 | - adjusted README file to make meaning of different buckets in memreusedist more clear
87 | (thanks to Kshitij Sudan for reporting this)
88 |
89 | May 20th 2008
90 | --------------
91 |
92 | MICA v0.21
93 |
94 | - removed -static from makefile, to avoid issues when linking MICA
95 | (thanks to J. K. Rai and Ahmed S. Al-Zawawi for reporting this)
96 |
97 | Dec. 3rd 2007
98 | --------------
99 |
100 | MICA v0.2:
101 |
102 | - various bug fixes, including:
103 | * reg: include non-full-width registers
104 | * ilp: not all registers were included in analysis (stopped after first non-valid register)
105 | * stride: fixed faulty use of readIndex/writeIndex in readMem/writeMem
106 | * reset interval_ins_count for all characteristics
107 | - added features:
108 | * taking size of memory read/write into account
109 | * implementation of memreusedist characteristics, useful for characterizing cache behavior
110 | - adjusted:
111 | * memory footprint measured for 64-byte blocks instead of 32-byte blocks (because most modern processors have 64-byte cache blocks)
112 | * configuring MICA is done using a mica.conf configuration file instead of command line parameters
113 | - speed:
114 | * used InsertIfCall/InsertThenCall to make more analysis routines inlineable
115 | * buffering for ilp implementation, which yields roughly 10% speedup
116 |
117 | Sept. 29th 2007
118 | ---------------
119 |
120 | Initial release: MICA v0.1
121 |
--------------------------------------------------------------------------------
/index.html:
--------------------------------------------------------------------------------
1 |
2 |
4 |
5 |
6 |
7 |
8 |
9 | Kenneth Hoste @ ELIS (UGent) -- MICA
10 |
11 |
12 |
14 |
18 |
19 |
20 |
21 |
22 | MICA: Microarchitecture-Independent Characterization of Applications
23 |
40 |
41 |
42 |
43 |
44 |
45 |
46 | What is MICA ?
47 |
48 | MICA is short for M icroarchitecture-I ndependent
49 | C haracterization of A pplications.
50 |
51 | MICA is a Pin
52 | tool which allows the user to collect a number of program characteristics to
53 | quantify runtime program behavior.
54 |
55 | These program characteristics are totally independent of the
56 | microarchitecture (cache configuration, branch predictor, ...) on which the
57 | measurements are done, in contrast to other workload characterization
58 | techniques using simulation or hardware performance counters.
59 |
60 |
61 |
62 |
63 | News
64 |
65 |
66 | (Feb. 27th 2011)
67 | I am no longer actively working in the field of computer architecture research.
68 | Nevertheless, I will try and support MICA in the coming years.
69 | If you notice that the last release of MICA isn't working for you, e.g. with the latest Pin kit, please contact me (kenneth.hoste@ugent.be )
70 |
71 | March 26th 2012
72 | Release of MICA v0.40
73 |
74 | contributions by Hamid Fadishei:
75 |
76 | add support for multi-process binaries, see append_pid entry for config file
77 | resolve some warning messages
78 |
79 |
80 |
81 |
82 |
83 | Aug. 29th 2011
84 | Release of MICA v0.32
85 |
86 | significant code cleanup by Petr Tuma notes:
87 |
88 | cleaned up some redundant NULL pointer casts
89 | cleaned up names of LRU stack entry references
90 | systematic testing of malloc return value in most of the tool code
91 | added warning on presence of multiple threads
92 | replaced sprintf with string streams
93 | added branch prediction hints using __builtin_expect where appropriate
94 |
95 | guard tracking progress in mica_progress.txt with a preprocessor flag (-DVERBOSE)
96 | test MICA built using the Intel C++ compiler, and document how to build it using
97 | icpc in the README (TODO: benchmark the performance difference)
98 |
99 |
100 | Feb. 28th 2011
101 | Release of MICA v0.31
102 |
103 | improved config file parsing, i.e. remove dependency on order of entries
104 | updated README file
105 |
106 | describe some details regarding instruction mix
107 |
108 |
109 |
110 |
111 |
112 | Feb. 27th 2011
113 | Release of MICA v0.3
114 |
115 | increased flexibility of itypes analysis significantly
116 |
117 | instruction groups used in itypes analysis can be specified by the user now,
118 | using a itypes.spec file; specify the filename in mica.conf using an entry like:
119 | 'itypes_spec_file: <filename>'
120 | by default, the old instruction groups are used
121 | (except for SYSCALL, which was added to the group formely known as 'other')
122 |
123 |
124 | made block size in ilp, memfootprint and memreusedist flexible
125 |
126 | size can be set by specifying 'block_size: <power of 2>' in the mica.conf file
127 | default block size is 2^6 (64) bytes, which is a change compared to MICA v0.23 for ilp
128 |
129 |
130 | made page size in memfootprint flexible
131 |
132 | size can be set by specifying 'page_size: <power of 2>' in the mica.conf file
133 | default page size is 4096 (2^12) bytes
134 |
135 |
136 | possibly expensive assert statements and other sanity checks were removed
137 | bug fixes:
138 |
139 | memory read size wasn't being used 100% correctly
140 | in ilp, memfootprint, memreusedist and stride analysis,
141 | the size was being added to the start address of the read,
142 | while (size-1) should be added; otherwise, e.g. for memfootprint,
143 | we count an extra block being touched if the access is near a block boundary
144 | a small problem with an assert statement was fixed in memfootprint (>= 0 instead of >)
145 | fprintf statements were fixed for 64-bit systems
146 |
147 |
148 |
149 |
150 | Sep. 22th 2009
151 |
152 |
Release of MICA v0.23:
153 |
154 | bug fixes w.r.t. changes in Pin kit:
155 |
156 | adjusted makefile and README to reflect changed Pin kit directory tree
157 | adjusted mica_itypes.cpp to recognize both NOP and WIDENOP categories
158 |
159 |
160 |
161 |
162 | Jun. 13th 2008
163 |
164 |
Release of MICA v0.22, including:
165 |
166 | bug fixes in itypes:
167 |
168 | fixed issue with instructions in MISC category being counted double (both in 'control flow' and 'other' buckets)
169 | (thanks to Ahmed S. Al-Zawawi for bringing my attention to this issue)
170 | added NOP instructions category
171 |
172 | adjusted README file to make meaning of different buckets in memreusedist more clear
173 | (thanks to Kshitij Sudan for reporting this)
174 |
175 |
176 |
177 | May 20th 2008
178 |
179 |
Release of MICA v0.21, including:
180 | removed -static from makefile, to avoid issues when linking MICA
181 | (thanks to J. K. Rai and Ahmed S. Al-Zawawi for reporting this)
182 |
183 |
184 | Mar. 26th 2008
185 | Fixed some faulty hyperlinks in the publication section, and added a new paper on phase-level workload characterization, to be presented at ISPASS-2008 (Austin (TX), April 2008). This is the first paper that actually uses MICA...
186 |
187 | Dec. 3rd 2007
188 | Release of MICA v0.2, which includes some important bug fixes and added features:
189 |
190 | bugs fixed : reg (not including non-full-width registers), ilp (stop including after first non-valid register), stride (faulty use of readIndex/writeIndex in readMem/writeMem), not resetting interval_ins_count for all characteristics
191 | added features : taking size of memory reads/writes into account in ilp , stride and memfootprint , added memreusedist characteristics (for characterizing cache behavior)
192 | adjustments : using 64-byte cache blocks for memfootprint (instead of 32-byte), configuring MICA is done using a mica.conf configuration file instead of using command line parameters
193 | speed : used InsertIfCall/InsertThenCall to make more analysis routines inlineable, used buffering for ilp , which leads to a ~10% speedup
194 |
195 | The new release is available here .
196 |
197 |
198 | Sept. 29th 2007
199 | Official introduction of MICA at the Pin tutorial at IISWC-2007. Slides for the presentation are available here , full code examples used in the tutorial are available here (gzipped tarball here ).
200 |
201 | Sept. 26th 2007
202 | A sneak preview of MICA was shown as part of an presentation I was giving at Intel Hudson (near Boston (MA), US). The Pin development team gave a lot of great feedback on how MICA could be improved and extended, and they were interested in using part of the code for their ongoing research.
203 |
204 |
205 |
206 |
207 | Download
208 |
209 | You can download MICA below. The Pin tool is released under a BSD license, which basically means "do what you want with it, just don't pretend it's yours".
210 | If you are using MICA for a paper, please refer to the IEEE Micro article below for the microarchitecture-independent characterization methodology.
211 | Download MICA v0.40
212 |
213 |
214 |
215 |
216 | How do I ...?
217 |
218 |
219 |
220 | learn more about it?
221 |
222 | A README-file containing information about using the tool and the outputs it produces is available in the release.
223 | A good place to start is the IISWC-2007 Pin tutorial presentation, available here .
224 | For further questions about the use and implementation of MICA, please mail Kenneth Hoste (kehoste@elis.ugent.be ).
225 |
226 |
227 | use it?
228 | MICA is very easy to use, just like any other Pin tool. To analyze the /bin/ls program in Linux, measuring all available characteristics for the full run of the program, execute:
229 | pin -t mica.so -- ls
230 | using a MICA config file that contains:
231 | analysis_type: all
232 | interval_size: full
233 |
234 |
235 | get support for it?
236 |
237 | For now, please contact Kenneth Hoste (kenneth.hoste@ugent.be ) if you experience any problems using MICA. A MICA mailinglist will probably be set up some time soon.
238 |
239 |
240 | contribute to it?
241 | If you have improved MICA (fixed bugs, added features, ...), and want to contribute your efforts, please contact Kenneth Hoste (kenneth.hoste@ugent.be ).
242 |
243 |
244 |
245 |
246 |
247 |
248 | Related publications
249 |
250 | Methodology:
251 |
252 |
259 |
260 | Applications:
261 |
262 |
263 |
264 | Performance Prediction based on Inherent Program Similarity
265 | [abstract ; paper: PDF , PS ; presentation ]
266 | Aashish Phansalkar ,
267 | Lieven Eeckhout ,
268 | Andy Georges ,
269 | Lizy K. John and
270 | Koen De Bosschere
271 | PACT-2006 , Sept. 2006; Seattle, WA (US)
272 |
273 |
274 | Comparing Benchmarks Using Key Microarchitecture-Independent Characteristics
275 | [abstract ; paper: PDF ; presentation ]
276 | by
277 | Kenneth Hoste and
278 | Lieven Eeckhout
279 | IISWC2006 , Oct. 2006; San Jose, CA (US)
280 |
281 |
282 | Analyzing Commercial Processor Performance Numbers for Predicting Performance of Applications of Interest
283 | [abstract ; paper: PDF ; poster ]
284 | by
285 | Kenneth Hoste, Lieven Eeckhout and
286 | Hendrik Blockeel
287 | SIGMETRICS'07 , June 2007; San Diego, CA (US)
288 |
289 | Characterizing the Unique and Diverse Behaviors in Existing and Emerging General-Purpose and Domain-Specific Benchmark Suites [PDF ]
290 | by Kenneth Hoste and Lieven Eeckhout
291 | ISPASS-2008 , April. 2008; Austin, TX (US)
292 |
293 | Scheduling on Heterogeneous Multicore Processors Using Architectural Signatures [PDF ]
294 | by Daniel Shelepov and Alexandra Fedorova (Simon Fraser University, Vancouver, Canada)
295 | WIOSCA-2008 (ISCA workshop) , June 2008; Beijing, China
296 |
297 | HASS: A Scheduler for Heterogeneous Multicore Systems [PDF ]
298 | by Daniel Shelepov, Juan Carlos Saez", Stacey Jeffery°, Alexandra Fedorova,
299 | Nestor Perez, Zhi Feng Huang, Sergey Blagodurov and Viren Kumar
300 | (Simon Fraser University, Vancouver, Canada)
301 | (° University of Waterloo, Ontario, Canada)
302 | (" University of Madrid, Spain)
303 | Operating Systems Review , vol. 43, issue 2, pp. 66-75, April 2009
304 | (Special Issue on the Interaction among the OS, Compilers and Multicore processors)
305 |
306 |
307 | Analysis, Estimation and Optimization of Computer System Performance Using Machine Learning
308 | [PDF ]
309 | Kenneth Hoste, PhD dissertation
310 | Ghent University (Belgium), September 2010
311 |
312 |
313 |
314 |
315 |
316 | Links
317 |
318 |
323 |
324 |
325 |
326 |
327 |
328 |
--------------------------------------------------------------------------------
/itypes_default.spec:
--------------------------------------------------------------------------------
1 | 0, 0, SPECIAL, mem_read
2 | 1, 0, SPECIAL, mem_write
3 | 2, 0, CATEGORY, COND_BR
4 | 2, 1, CATEGORY, UNCOND_BR
5 | 2, 2, OPCODE, LEAVE
6 | 2, 3, OPCODE, RET_NEAR
7 | 2, 4, OPCODE, CALL_NEAR
8 | 3, 0, CATEGORY, LOGICAL
9 | 3, 1, CATEGORY, DATAXFER
10 | 3, 2, CATEGORY, BINARY
11 | 3, 3, CATEGORY, FLAGOP
12 | 3, 4, CATEGORY, BITBYTE
13 | 4, 0, CATEGORY, X87_ALU
14 | 4, 1, CATEGORY, FCMOV
15 | 4, 2, CATEGORY, LOGICAL_FP
16 | 5, 0, CATEGORY, WIDENOP
17 | 5, 1, CATEGORY, NOP
18 | 6, 0, SPECIAL, reg_transfer
19 |
--------------------------------------------------------------------------------
/itypes_default_available:
--------------------------------------------------------------------------------
1 | 0, 0, SPECIAL, mem_read
2 | 1, 0, SPECIAL, mem_write
3 | 2, 0, CATEGORY, COND_BR
4 | 2, 1, CATEGORY, UNCOND_BR
5 | 2, 2, OPCODE, LEAVE
6 | 2, 3, OPCODE, RET_NEAR
7 | 2, 4, OPCODE, CALL_NEAR
8 | 3, 0, CATEGORY, LOGICAL
9 | 3, 1, CATEGORY, DATAXFER
10 | 3, 2, CATEGORY, BINARY
11 | 3, 3, CATEGORY, FLAGOP
12 | 3, 4, CATEGORY, BITBYTE
13 | 4, 0, CATEGORY, X87_ALU
14 | 4, 1, CATEGORY, FCMOV
15 | 5, 0, CATEGORY, POP
16 | 5, 1, CATEGORY, PUSH
17 | 6, 0, CATEGORY, SHIFT
18 | 7, 0, CATEGORY, STRINGOP
19 | 8, 0, CATEGORY, MMX
20 | 8, 1, CATEGORY, SSE
21 | 9, 0, CATEGORY, INTERRUPT
22 | 9, 1, CATEGORY, ROTATE
23 | 9, 2, CATEGORY, SEMAPHORE
24 | 9, 3, CATEGORY, CMOV
25 | 9, 4, CATEGORY, SYSTEM
26 | 9, 5, CATEGORY, MISC
27 | 9, 6, CATEGORY, PREFETCH
28 | 9, 7, CATEGORY, SYSCALL
29 | 10, 0, CATEGORY, WIDENOP
30 | 10, 1, CATEGORY, NOP
31 | 11, 0, SPECIAL, reg_transfer
32 |
--------------------------------------------------------------------------------
/mica.conf.example:
--------------------------------------------------------------------------------
1 | analysis_type: ilp_one
2 | interval_size: full
3 | ilp_size: 32
4 | page_size: 12
5 | block_size: 6
6 | itypes_spec_file: itypes_default.spec
7 | append_pid: yes
--------------------------------------------------------------------------------
/mica.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * This file is part of MICA, a Pin tool to collect
3 | * microarchitecture-independent program characteristics using the Pin
4 | * instrumentation framework.
5 | *
6 | * Please see the README.txt file distributed with the MICA release for more
7 | * information.
8 | */
9 |
10 | /*************************************************************************
11 | * *
12 | * MICA: Microarchitecture-Independent Characterization of Workloads *
13 | * *
14 | *************************************************************************
15 | *
16 | * implementation by Kenneth Hoste (Ghent University, Belgium), December 2006 - March 2011
17 | * based on code written by Lieven Eeckhout (for ATOM on Alpha)
18 | *
19 | * PLEASE DO NOT REDISTRIBUTE THIS CODE WITHOUT INFORMING THE AUTHORS.
20 | *
21 | * contact: kenneth.hoste@ugent.be , lieven.eeckhout@elis.ugent.be
22 | *
23 | */
24 |
25 | /* MICA includes */
26 | #include "mica.h"
27 | #include "mica_init.h"
28 | #include "mica_utils.h"
29 |
30 | #include "mica_all.h"
31 | #include "mica_ilp.h"
32 | #include "mica_itypes.h"
33 | #include "mica_ppm.h"
34 | #include "mica_reg.h"
35 | #include "mica_stride.h"
36 | #include "mica_memfootprint.h"
37 | #include "mica_memstackdist.h"
38 |
39 | #include
40 | #include
41 | #include
42 | #include
43 | using namespace std;
44 |
45 | /* *** Variables *** */
46 |
47 | /* global */
48 | INT64 interval_size; // interval size chosen
49 | INT64 interval_ins_count;
50 | INT64 interval_ins_count_for_hpc_alignment;
51 | INT64 total_ins_count;
52 | INT64 total_ins_count_for_hpc_alignment;
53 |
54 | ins_buffer_entry* ins_buffer[MAX_MEM_TABLE_ENTRIES];
55 |
56 | /* ILP */
57 | UINT32 _ilp_win_size;
58 | char* _itypes_spec_file;
59 |
60 | /* ILP, MEMFOOTPRINT, MEMSTACKDIST */
61 | UINT32 _block_size;
62 |
63 | /* MEMFOOTPRINT */
64 | UINT32 _page_size;
65 |
66 | /* for multiprocess binaries */
67 | int append_pid;
68 |
69 | /* helper */
70 | int thread_count = 0;
71 |
72 | /**********************************************
73 | * MAIN *
74 | **********************************************/
75 |
76 | //FILE* _log;
77 | ofstream _log;
78 |
79 |
80 | /* append _pin.out to name if necessary */
81 | const char *mkfilename(const char *name)
82 | {
83 | char retx[100];
84 | if (append_pid){
85 | sprintf(retx,"%s_%d_pin.out",name,getpid());
86 | }
87 | else{
88 | sprintf(retx,"%s_pin.out",name);
89 | }
90 | char * x = (char*)malloc(sizeof(const char)*100);
91 | strcpy(x,retx);
92 | return (const char*)x;
93 | }
94 |
95 | // find buffer entry for instruction at given address in a hash table
96 | ins_buffer_entry* findInsBufferEntry(ADDRINT a){
97 |
98 | ins_buffer_entry* e;
99 | INT64 key = a % MAX_MEM_TABLE_ENTRIES;
100 |
101 | e = ins_buffer[key];
102 |
103 | if(e != NULL){
104 | do{
105 | if(e->insAddr == a)
106 | break;
107 | e = e->next;
108 | } while(e->next != (ins_buffer_entry*)NULL);
109 |
110 | /* ins address not found, installing */
111 | if(e == NULL){
112 | e = (ins_buffer_entry*)checked_malloc(sizeof(ins_buffer_entry));
113 | e->insAddr = a;
114 | e->regReadCnt = 0;
115 | e->regsRead = NULL;
116 | e->regWriteCnt = 0;
117 | e->regsWritten = NULL;
118 | e->next = NULL;
119 | e->setRead = false;
120 | e->setWritten = false;
121 | e->setRegOpCnt = false;
122 |
123 | ins_buffer_entry* tmp = e = ins_buffer[key];
124 | while(tmp->next != (ins_buffer_entry*)NULL)
125 | tmp = tmp->next;
126 | tmp->next = e;
127 | }
128 | }
129 | else{
130 | /* new entry in hash table */
131 | e = (ins_buffer_entry*)checked_malloc(sizeof(ins_buffer_entry));
132 | e->insAddr = a;
133 | e->regOpCnt = 0;
134 | e->regReadCnt = 0;
135 | e->regsRead = NULL;
136 | e->regWriteCnt = 0;
137 | e->regsWritten = NULL;
138 | e->next = NULL;
139 | e->setRead = false;
140 | e->setWritten = false;
141 | }
142 |
143 | return e;
144 | }
145 |
146 | /* ALL */
147 | VOID Instruction_all(INS ins, VOID* v){
148 | if(interval_size == -1) {
149 | if(INS_HasRealRep(ins)){
150 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END);
151 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END);
152 | }
153 | else{
154 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_no_rep, IARG_END);
155 | }
156 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_always, IARG_END);
157 | }
158 | else{
159 | if(INS_HasRealRep(ins)){
160 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END);
161 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END);
162 | }
163 | else{
164 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_no_rep, IARG_END);
165 | }
166 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_always, IARG_END);
167 | }
168 |
169 | ADDRINT insAddr = INS_Address(ins);
170 | ins_buffer_entry* e = findInsBufferEntry(insAddr);
171 |
172 | //instrument_ilp_all(ins, e);
173 | //instrument_itypes(ins, v);
174 | //instrument_ppm(ins, v);
175 | //instrument_reg(ins, e);
176 | //instrument_stride(ins, v);
177 | //instrument_memfootprint(ins, v);
178 | //instrument_memstackdist(ins, v);
179 | instrument_all(ins, v, e);
180 | }
181 |
182 | VOID Fini_all(INT32 code, VOID* v){
183 | fini_ilp_all(code, v);
184 | fini_itypes(code, v);
185 | fini_ppm(code, v);
186 | fini_reg(code, v);
187 | fini_stride(code, v);
188 | fini_memfootprint(code, v);
189 | fini_memstackdist(code, v);
190 | }
191 |
192 | /* ILP */
193 | VOID Instruction_ilp_all_only(INS ins, VOID* v){
194 | if(interval_size == -1){
195 | if(INS_HasRealRep(ins)){
196 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END);
197 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END);
198 | }
199 | else{
200 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_no_rep, IARG_END);
201 | }
202 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_always, IARG_END);
203 | }
204 | else{
205 | if(INS_HasRealRep(ins)){
206 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END);
207 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END);
208 | }
209 | else{
210 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_no_rep, IARG_END);
211 | }
212 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_always, IARG_END);
213 | }
214 |
215 | ADDRINT insAddr = INS_Address(ins);
216 |
217 | ins_buffer_entry* e = findInsBufferEntry(insAddr);
218 | instrument_ilp_all(ins, e);
219 | }
220 |
221 | VOID Fini_ilp_all_only(INT32 code, VOID* v){
222 | fini_ilp_all(code, v);
223 | }
224 |
225 | /* ILP_ONE */
226 | VOID Instruction_ilp_one_only(INS ins, VOID* v){
227 | if(interval_size == -1){
228 | if(INS_HasRealRep(ins)){
229 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END);
230 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END);
231 | }
232 | else{
233 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_no_rep, IARG_END);
234 | }
235 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_always, IARG_END);
236 | }
237 | else{
238 | if(INS_HasRealRep(ins)){
239 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END);
240 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END);
241 | }
242 | else{
243 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_no_rep, IARG_END);
244 | }
245 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_always, IARG_END);
246 | }
247 |
248 | ADDRINT insAddr = INS_Address(ins);
249 |
250 | ins_buffer_entry* e = findInsBufferEntry(insAddr);
251 | instrument_ilp_one(ins, e);
252 | }
253 |
254 | VOID Fini_ilp_one_only(INT32 code, VOID* v){
255 | fini_ilp_one(code, v);
256 | }
257 |
258 | /* ITYPES */
259 | VOID Instruction_itypes_only(INS ins, VOID* v){
260 | if(interval_size == -1){
261 | if(INS_HasRealRep(ins)){
262 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END);
263 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END);
264 | }
265 | else{
266 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_no_rep, IARG_END);
267 | }
268 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_always, IARG_END);
269 | }
270 | else{
271 | if(INS_HasRealRep(ins)){
272 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END);
273 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END);
274 | }
275 | else{
276 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_no_rep, IARG_END);
277 | }
278 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_always, IARG_END);
279 | }
280 |
281 | instrument_itypes(ins, v);
282 | }
283 |
284 | VOID Fini_itypes_only(INT32 code, VOID* v){
285 | fini_itypes(code, v);
286 | }
287 |
288 | /* PPM */
289 | VOID Instruction_ppm_only(INS ins, VOID* v){
290 | if(interval_size == -1){
291 | if(INS_HasRealRep(ins)){
292 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END);
293 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END);
294 | }
295 | else{
296 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_no_rep, IARG_END);
297 | }
298 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_always, IARG_END);
299 | }
300 | else{
301 | if(INS_HasRealRep(ins)){
302 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END);
303 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END);
304 | }
305 | else{
306 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_no_rep, IARG_END);
307 | }
308 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_always, IARG_END);
309 | }
310 |
311 | instrument_ppm(ins, v);
312 | }
313 |
314 | VOID Fini_ppm_only(INT32 code, VOID* v){
315 | fini_ppm(code, v);
316 | }
317 |
318 | /* REG */
319 | VOID Instruction_reg_only(INS ins, VOID* v){
320 | if(interval_size == -1){
321 | if(INS_HasRealRep(ins)){
322 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END);
323 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END);
324 | }
325 | else{
326 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_no_rep, IARG_END);
327 | }
328 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_always, IARG_END);
329 | }
330 | else{
331 | if(INS_HasRealRep(ins)){
332 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END);
333 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END);
334 | }
335 | else{
336 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_no_rep, IARG_END);
337 | }
338 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_always, IARG_END);
339 | }
340 |
341 | ADDRINT insAddr = INS_Address(ins);
342 |
343 | ins_buffer_entry* e = findInsBufferEntry(insAddr);
344 |
345 | instrument_reg(ins, e);
346 | }
347 |
348 | VOID Fini_reg_only(INT32 code, VOID* v){
349 | fini_reg(code, v);
350 | }
351 |
352 | /* STRIDE */
353 | VOID Instruction_stride_only(INS ins, VOID* v){
354 | if(interval_size == -1){
355 | if(INS_HasRealRep(ins)){
356 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END);
357 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END);
358 | }
359 | else{
360 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_no_rep, IARG_END);
361 | }
362 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_always, IARG_END);
363 | }
364 | else{
365 | if(INS_HasRealRep(ins)){
366 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END);
367 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END);
368 | }
369 | else{
370 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_no_rep, IARG_END);
371 | }
372 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_always, IARG_END);
373 | }
374 |
375 | instrument_stride(ins, v);
376 | }
377 |
378 | VOID Fini_stride_only(INT32 code, VOID* v){
379 | fini_stride(code, v);
380 | }
381 |
382 | /* MEMFOOTPRINT */
383 | VOID Instruction_memfootprint_only(INS ins, VOID* v){
384 | if(interval_size == -1){
385 | if(INS_HasRealRep(ins)){
386 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END);
387 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END);
388 | }
389 | else{
390 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_no_rep, IARG_END);
391 | }
392 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_always, IARG_END);
393 | }
394 | else{
395 | if(INS_HasRealRep(ins)){
396 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END);
397 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END);
398 | }
399 | else{
400 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_no_rep, IARG_END);
401 | }
402 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_always, IARG_END);
403 | }
404 |
405 | instrument_memfootprint(ins, v);
406 | }
407 |
408 | VOID Fini_memfootprint_only(INT32 code, VOID* v){
409 | fini_memfootprint(code, v);
410 | }
411 |
412 | /* MEMSTACKDIST */
413 | VOID Instruction_memstackdist_only(INS ins, VOID* v){
414 | if(interval_size == -1){
415 | if(INS_HasRealRep(ins)){
416 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END);
417 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END);
418 | }
419 | else{
420 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_no_rep, IARG_END);
421 | }
422 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_always, IARG_END);
423 | }
424 | else{
425 | if(INS_HasRealRep(ins)){
426 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END);
427 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END);
428 | }
429 | else{
430 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_no_rep, IARG_END);
431 | }
432 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_always, IARG_END);
433 | }
434 |
435 | instrument_memstackdist(ins, v);
436 | }
437 |
438 | VOID Fini_memstackdist_only(INT32 code, VOID* v){
439 | fini_memstackdist(code, v);
440 | }
441 |
442 | /* MY TYPE */
443 | VOID Instruction_custom(INS ins, VOID* v){
444 |
445 | if(interval_size == -1){
446 | if(INS_HasRealRep(ins)){
447 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END);
448 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END);
449 | }
450 | else{
451 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_no_rep, IARG_END);
452 | }
453 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_always, IARG_END);
454 | }
455 | else{
456 | if(INS_HasRealRep(ins)){
457 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END);
458 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END);
459 | }
460 | else{
461 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_no_rep, IARG_END);
462 | }
463 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_always, IARG_END);
464 | }
465 |
466 | cerr << "Please choose a subset of characteristics you want to use, and remove this message (along with the exit call)" << endl;
467 | exit(1);
468 | // Choose subset of characteristics, and make the same adjustments in Fini_custom and init_custom below
469 |
470 | //ADDRINT insAddr = INS_Address(ins);
471 | //ins_buffer_entry* e = findInsBufferEntry(insAddr);
472 |
473 | //instrument_ilp_all(ins, e);
474 | //instrument_ilp_one(ins, e);
475 | //instrument_itypes(ins, v);
476 | //instrument_ppm(ins, v);
477 | //instrument_reg(ins, e);
478 | //instrument_stride(ins, v);
479 | //instrument_memfootprint(ins, v);
480 | //instrument_memstackdist(ins, v);
481 | }
482 |
483 | VOID Fini_custom(INT32 code, VOID* v){
484 | //fini_ilp_all(code, v);
485 | //fini_ilp_one(code, v);
486 | //fini_itypes(code, v);
487 | //fini_ppm(code, v);
488 | //fini_reg(code, v);
489 | //fini_stride(code, v);
490 | //fini_memfootprint(code, v);
491 | //fini_memstackdist(code, v);
492 | }
493 |
494 | void init_custom(){
495 | //init_ilp_all();
496 | //init_ilp_one();
497 | //init_itypes();
498 | //init_ppm();
499 | //init_reg();
500 | //init_stride();
501 | //init_memfootprint();
502 | //init_memstackdist();
503 | }
504 |
505 |
506 | VOID ThreadStart(THREADID id, CONTEXT *context, INT32 flags, VOID *data)
507 | {
508 | if (__sync_fetch_and_add(&thread_count, 1))
509 | {
510 | LOG_MSG("WARNING: Thread creation detected, results can be corrupted!\n");
511 | WARNING_MSG("Thread creation detected, results can be corrupted!");
512 | }
513 | }
514 |
515 |
516 | /************
517 | * MAIN *
518 | ************/
519 | int main(int argc, char* argv[]){
520 |
521 | int i;
522 | MODE mode;
523 |
524 | setup_mica_log(&_log);
525 |
526 | read_config(&_log, &interval_size, &mode, &_ilp_win_size, &_block_size, &_page_size, &_itypes_spec_file, &append_pid);
527 |
528 | cerr << "interval_size: " << interval_size << ", mode: " << mode << endl;
529 |
530 | interval_ins_count = 0;
531 | interval_ins_count_for_hpc_alignment = 0;
532 | total_ins_count = 0;
533 | total_ins_count_for_hpc_alignment = 0;
534 |
535 | for(i=0; i < MAX_MEM_TABLE_ENTRIES; i++){
536 | ins_buffer[i] = (ins_buffer_entry*)NULL;
537 | }
538 |
539 | switch(mode){
540 | case MODE_ALL:
541 | init_all();
542 | PIN_Init(argc, argv);
543 | INS_AddInstrumentFunction(Instruction_all, 0);
544 | PIN_AddFiniFunction(Fini_all, 0);
545 | break;
546 | case MODE_ILP:
547 | init_ilp_all();
548 | PIN_Init(argc, argv);
549 | INS_AddInstrumentFunction(Instruction_ilp_all_only, 0);
550 | PIN_AddFiniFunction(Fini_ilp_all_only, 0);
551 | break;
552 | case MODE_ILP_ONE:
553 | init_ilp_one();
554 | PIN_Init(argc, argv);
555 | INS_AddInstrumentFunction(Instruction_ilp_one_only, 0);
556 | PIN_AddFiniFunction(Fini_ilp_one_only, 0);
557 | break;
558 | case MODE_ITYPES:
559 | init_itypes();
560 | PIN_Init(argc, argv);
561 | INS_AddInstrumentFunction(Instruction_itypes_only, 0);
562 | PIN_AddFiniFunction(Fini_itypes_only, 0);
563 | break;
564 | case MODE_PPM:
565 | init_ppm();
566 | PIN_Init(argc, argv);
567 | INS_AddInstrumentFunction(Instruction_ppm_only, 0);
568 | PIN_AddFiniFunction(Fini_ppm_only, 0);
569 | break;
570 | case MODE_REG:
571 | init_reg();
572 | PIN_Init(argc, argv);
573 | INS_AddInstrumentFunction(Instruction_reg_only, 0);
574 | PIN_AddFiniFunction(Fini_reg_only, 0);
575 | break;
576 | case MODE_STRIDE:
577 | init_stride();
578 | PIN_Init(argc, argv);
579 | INS_AddInstrumentFunction(Instruction_stride_only, 0);
580 | PIN_AddFiniFunction(Fini_stride_only, 0);
581 | break;
582 | case MODE_MEMFOOTPRINT:
583 | init_memfootprint();
584 | PIN_Init(argc, argv);
585 | INS_AddInstrumentFunction(Instruction_memfootprint_only, 0);
586 | PIN_AddFiniFunction(Fini_memfootprint_only, 0);
587 | break;
588 | case MODE_MEMSTACKDIST:
589 | init_memstackdist();
590 | PIN_Init(argc, argv);
591 | INS_AddInstrumentFunction(Instruction_memstackdist_only, 0);
592 | PIN_AddFiniFunction(Fini_memstackdist_only, 0);
593 | break;
594 | case MODE_CUSTOM:
595 | init_custom();
596 | PIN_Init(argc, argv);
597 | INS_AddInstrumentFunction(Instruction_custom, 0);
598 | PIN_AddFiniFunction(Fini_custom, 0);
599 | break;
600 | default:
601 | cerr << "FATAL ERROR: Unknown mode while trying to allocate memory for Pin tool!" << endl;
602 | _log << "FATAL ERROR: Unknown mode while trying to allocate memory for Pin tool!" << endl;
603 | exit(1);
604 | }
605 |
606 | // The tool does not handle multithreaded programs.
607 | // Since results might be bogus, we print a warning
608 | // when presence of multiple threads is detected by PIN.
609 | PIN_AddThreadStartFunction(ThreadStart, NULL);
610 |
611 | // starts program, never returns
612 | PIN_StartProgram();
613 | }
614 |
--------------------------------------------------------------------------------
/mica.h:
--------------------------------------------------------------------------------
1 | /*
2 | * This file is part of MICA, a Pin tool to collect
3 | * microarchitecture-independent program characteristics using the Pin
4 | * instrumentation framework.
5 | *
6 | * Please see the README.txt file distributed with the MICA release for more
7 | * information.
8 | */
9 |
10 | /* standard library includes */
11 | #include
12 | #include
13 | #include
14 |
15 | #include
16 | #include
17 | using namespace std;
18 |
19 | /* Pin includes */
20 | #include "pin.H"
21 |
22 |
23 | #ifndef MICA
24 | #define MICA
25 |
26 | /* *** global configurations *** */
27 | extern int append_pid;
28 |
29 | /* *** conditional debugging *** */
30 |
31 | #define LOG_MSG(x) _log << x << endl;
32 | #define DEBUG_MSG(x) cerr << "DEBUG: " << x << endl;
33 |
34 | #define WARNING_MSG(x) cerr << "WARNING: " << x << endl;
35 | #define ERROR_MSG(x) cerr << "ERROR: " << x << endl;
36 |
37 |
38 | /* *** utility macros *** */
39 |
40 | #define BITS_TO_MASK(x) ((1ull << (x)) - 1ull)
41 | #define BITS_TO_COUNT(x) (1ull << (x))
42 |
43 |
44 | /* *** defines *** */
45 |
46 | #define CHAR_CNT 69
47 |
48 | /* ILP/MEMFOOTPRINT */
49 |
50 | #define ILP_WIN_SIZE_BASE 32
51 |
52 | // number of stack entries in single hash table item
53 | #define LOG_MAX_MEM_ENTRIES 16
54 | #define MAX_MEM_ENTRIES BITS_TO_COUNT(LOG_MAX_MEM_ENTRIES)
55 | #define MASK_MAX_MEM_ENTRIES BITS_TO_MASK(LOG_MAX_MEM_ENTRIES)
56 |
57 | #define LOG_MAX_MEM_BLOCK LOG_MAX_MEM_ENTRIES
58 | #define MAX_MEM_BLOCK MAX_MEM_ENTRIES
59 |
60 | #define MAX_MEM_BLOCK_ENTRIES 65536
61 | #define MAX_MEM_TABLE_ENTRIES 12289 // hash table size, should be a prime number (769, 1543, 3079, 6151, 12289, 24593, 49157, 98317, 196613, 393241, 786433)
62 |
63 | /* PPM */
64 | #define MAX_HIST_LENGTH 12
65 | #define NUM_HIST_LENGTHS 3
66 | const UINT32 history_lengths[NUM_HIST_LENGTHS] = {4,8,12};
67 |
68 | /* REG */
69 | #define MAX_NUM_REGS 4096
70 | #define MAX_NUM_OPER 7
71 | #define MAX_DIST 128
72 | #define MAX_COMM_DIST MAX_DIST
73 | #define MAX_REG_USE MAX_DIST
74 |
75 | /* STRIDE */
76 | #define MAX_DISTR 524288 // 2^21
77 |
78 | /* MEMREUSEDIST */
79 |
80 | #define BUCKET_CNT 19 // number of reuse distance buckets to use
81 |
82 | const char *mkfilename(const char *name);
83 |
84 | #endif
85 |
--------------------------------------------------------------------------------
/mica_all.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * This file is part of MICA, a Pin tool to collect
3 | * microarchitecture-independent program characteristics using the Pin
4 | * instrumentation framework.
5 | *
6 | * Please see the README.txt file distributed with the MICA release for more
7 | * information.
8 | */
9 |
10 | /* MICA includes */
11 | #include "mica_all.h"
12 | #include "mica_ilp.h" // needed for empty_all_buffer_all
13 | #include "mica_itypes.h" // needed for itypes_count , itypes_instr_interval_output and itypes_instr_interval_reset
14 | #include "mica_ppm.h" // needed for instrument_ppm_cond_br, ppm_instr_interval_output and ppm_instr_interval_reset
15 | #include "mica_reg.h" // needed for reg_instr_full, reg_instr_intervals, reg_instr_interval_output and reg_instr_interval_reset
16 | #include "mica_stride.h" // needed for stride_index_mem*, readMem_stride, writeMem_stride, stride_instr_interval_output and stride_instr_interval_reset
17 | #include "mica_memfootprint.h" // needed for memOp, memfootprint_instr_interval_output and memfootprint_instr_interval_reset
18 | #include "mica_memstackdist.h" // needed for memstackdist_memRead, memstackdist_instr_interval_output and memstackdist_instr_interval_reset
19 |
20 | #define PROGRESS_THRESHOLD 10000000 // 10M
21 |
22 | extern INT64 total_ins_count;
23 | extern INT64 total_ins_count_for_hpc_alignment;
24 | extern INT64 interval_ins_count;
25 | extern INT64 interval_ins_count_for_hpc_alignment; // one count for REP prefixed instructions
26 |
27 | extern INT64 interval_size;
28 |
29 | extern identifier** group_identifiers;
30 | extern INT64* group_ids_cnt;
31 | extern INT64* group_counts;
32 | extern INT64 number_of_groups;
33 |
34 | extern INT64 other_ids_cnt;
35 | extern INT64 other_ids_max_cnt;
36 | extern identifier* other_group_identifiers;
37 |
38 | void init_all(){
39 |
40 | init_ilp_all();
41 | init_itypes();
42 | init_ppm();
43 | init_reg();
44 | init_stride();
45 | init_memfootprint();
46 | init_memstackdist();
47 | }
48 |
49 | ADDRINT returnArg(BOOL arg){
50 |
51 | return arg;
52 | }
53 |
54 | VOID all_instr_full_count_always(){
55 |
56 | total_ins_count++;
57 |
58 | #ifdef VERBOSE
59 | if (__builtin_expect (total_ins_count % PROGRESS_THRESHOLD == 0, false)) {
60 | ofstream progress_file;
61 | progress_file.open ("mica_progress.txt", ios::out | ios::trunc);
62 | progress_file << total_ins_count << " instructions analyzed" << endl;
63 | progress_file.close ();
64 | }
65 | #endif
66 | }
67 |
68 | VOID all_instr_full_count_for_hpc_alignment_no_rep(){
69 | total_ins_count_for_hpc_alignment++;
70 | }
71 |
72 | VOID all_instr_full_count_for_hpc_alignment_with_rep(UINT32 repCnt){
73 | if(repCnt > 0){
74 | total_ins_count_for_hpc_alignment++;
75 | }
76 | }
77 |
78 | VOID all_instr_intervals_count_always(){
79 | total_ins_count++;
80 | interval_ins_count++;
81 |
82 | #ifdef VERBOSE
83 | if (__builtin_expect(total_ins_count % PROGRESS_THRESHOLD == 0, false)) {
84 | ofstream progress_file;
85 | progress_file.open("mica_progress.txt", ios::out | ios::trunc);
86 | progress_file << total_ins_count << " instructions analyzed" << endl;
87 | progress_file.close();
88 | }
89 | #endif
90 | }
91 |
92 | VOID all_instr_intervals_count_for_hpc_alignment_no_rep(){
93 | total_ins_count_for_hpc_alignment++;
94 | interval_ins_count_for_hpc_alignment++;
95 | }
96 |
97 | VOID all_instr_intervals_count_for_hpc_alignment_with_rep(UINT32 repCnt){
98 | if(repCnt > 0){
99 | total_ins_count_for_hpc_alignment++;
100 | interval_ins_count_for_hpc_alignment++;
101 | }
102 | }
103 |
104 | ADDRINT all_buffer_instruction_2reads_write(void* _e, ADDRINT read1_addr, ADDRINT read2_addr, ADDRINT read_size, UINT32 stride_index_memread1, UINT32 stride_index_memread2, ADDRINT write_addr, ADDRINT write_size, UINT32 stride_index_memwrite){
105 |
106 | //itypes_count_mem_read();
107 | //itypes_count_mem_write();
108 | readMem_stride(stride_index_memread1, read1_addr, read_size);
109 | readMem_stride(stride_index_memread2, read2_addr, read_size);
110 | writeMem_stride(stride_index_memwrite, write_addr, write_size);
111 | memOp(read1_addr, read_size); // memfootprint
112 | memOp(read2_addr, read_size);
113 | memOp(write_addr, write_size);
114 | memstackdist_memRead(read1_addr, read_size); // memstackdist
115 | memstackdist_memRead(read2_addr, read_size);
116 | //return ilp_buffer_instruction_2reads_write(_e, read1_addr, read2_addr, read_size, write_addr, write_size);
117 | ilp_buffer_instruction_only(_e);
118 | ilp_buffer_instruction_read(read1_addr, read_size);
119 | ilp_buffer_instruction_read2(read2_addr);
120 | ilp_buffer_instruction_write(write_addr, write_size);
121 | return ilp_buffer_instruction_next();
122 | }
123 |
124 | ADDRINT all_buffer_instruction_read_write(void* _e, ADDRINT read1_addr, ADDRINT read_size, UINT32 stride_index_memread1, ADDRINT write_addr, ADDRINT write_size, UINT32 stride_index_memwrite){
125 |
126 | //itypes_count_mem_read();
127 | //itypes_count_mem_write();
128 | readMem_stride(stride_index_memread1, read1_addr, read_size);
129 | writeMem_stride(stride_index_memwrite, write_addr, write_size);
130 | memOp(read1_addr, read_size); // memfootprint
131 | memOp(write_addr, write_size);
132 | memstackdist_memRead(read1_addr, read_size); // memstackdist
133 | //return ilp_buffer_instruction_read_write(_e, read1_addr, read_size, write_addr, write_size);
134 | ilp_buffer_instruction_only(_e);
135 | ilp_buffer_instruction_read(read1_addr, read_size);
136 | ilp_buffer_instruction_write(write_addr, write_size);
137 | return ilp_buffer_instruction_next();
138 | }
139 |
140 | ADDRINT all_buffer_instruction_2reads(void* _e, ADDRINT read1_addr, ADDRINT read2_addr, ADDRINT read_size, UINT32 stride_index_memread1, UINT32 stride_index_memread2){
141 |
142 | //itypes_count_mem_read();
143 | readMem_stride(stride_index_memread1, read1_addr, read_size);
144 | readMem_stride(stride_index_memread2, read2_addr, read_size);
145 | memOp(read1_addr, read_size); // memfootprint
146 | memOp(read2_addr, read_size);
147 | memstackdist_memRead(read1_addr, read_size); // memstackdist
148 | memstackdist_memRead(read2_addr, read_size);
149 | //return ilp_buffer_instruction_2reads(_e, read1_addr, read2_addr, read_size);
150 | ilp_buffer_instruction_only(_e);
151 | ilp_buffer_instruction_read(read1_addr, read_size);
152 | ilp_buffer_instruction_read2(read2_addr);
153 | return ilp_buffer_instruction_next();
154 | }
155 |
156 | ADDRINT all_buffer_instruction_read(void* _e, ADDRINT read1_addr, ADDRINT read_size, UINT32 stride_index_memread1){
157 |
158 | //itypes_count_mem_read();
159 | readMem_stride(stride_index_memread1, read1_addr, read_size);
160 | memOp(read1_addr, read_size); // memfootprint
161 | memstackdist_memRead(read1_addr, read_size); // memstackdist
162 | //return ilp_buffer_instruction_read(_e, read1_addr, read_size);
163 | ilp_buffer_instruction_only(_e);
164 | ilp_buffer_instruction_read(read1_addr, read_size);
165 | return ilp_buffer_instruction_next();
166 | }
167 |
168 | ADDRINT all_buffer_instruction_write(void* _e, ADDRINT write_addr, ADDRINT write_size, UINT32 stride_index_memwrite){
169 |
170 | //itypes_count_mem_write();
171 | writeMem_stride(stride_index_memwrite, write_addr, write_size);
172 | memOp(write_addr, write_size); // memfootprint
173 | //return ilp_buffer_instruction_write(_e, write_addr, write_size);
174 | ilp_buffer_instruction_only(_e);
175 | ilp_buffer_instruction_write(write_addr, write_size);
176 | return ilp_buffer_instruction_next();
177 | }
178 |
179 | ADDRINT all_buffer_instruction(void* _e){
180 |
181 | //return ilp_buffer_instruction(_e);
182 | ilp_buffer_instruction_only(_e);
183 | return ilp_buffer_instruction_next();
184 | }
185 |
186 | VOID all_instr_full(VOID* _e, ADDRINT instrAddr, ADDRINT size){
187 | reg_instr_full(_e);
188 | instrMem(instrAddr, size);
189 | }
190 |
191 | ADDRINT all_instr_intervals(VOID* _e, ADDRINT instrAddr, ADDRINT size){
192 | reg_instr_intervals(_e);
193 | instrMem(instrAddr, size);
194 | return (ADDRINT)(interval_ins_count_for_hpc_alignment == interval_size);
195 | };
196 |
197 | VOID all_instr_interval(){
198 |
199 | /* output per interval for ILP is done by ilp-buffering functions */
200 |
201 | itypes_instr_interval_output();
202 | itypes_instr_interval_reset();
203 |
204 | ppm_instr_interval_output();
205 | ppm_instr_interval_reset();
206 |
207 | reg_instr_interval_output();
208 | reg_instr_interval_reset();
209 |
210 | stride_instr_interval_output();
211 | stride_instr_interval_reset();
212 |
213 | memfootprint_instr_interval_output();
214 | memfootprint_instr_interval_reset();
215 |
216 | memstackdist_instr_interval_output();
217 | memstackdist_instr_interval_reset();
218 |
219 | interval_ins_count = 0;
220 | interval_ins_count_for_hpc_alignment = 0;
221 | }
222 |
223 | VOID all_instr_interval_for_ilp(){
224 |
225 | // save these, because empty_ilp_buffer_all resets them
226 | INT64 interval_ins_count_backup = interval_ins_count;
227 | INT64 interval_ins_count_for_hpc_alignment_backup = interval_ins_count_for_hpc_alignment;
228 |
229 | empty_ilp_buffer_all();
230 |
231 | // restore
232 | interval_ins_count = interval_ins_count_backup;
233 | interval_ins_count_for_hpc_alignment = interval_ins_count_for_hpc_alignment_backup;
234 | }
235 |
236 | VOID instrument_all(INS ins, VOID* v, ins_buffer_entry* e){
237 |
238 | UINT32 i, j, maxNumRegsProd, maxNumRegsCons, regReadCnt, regWriteCnt, opCnt, regOpCnt;
239 | REG reg;
240 | BOOL categorized = false;
241 | char cat[50];
242 | char opcode[50];
243 |
244 | UINT32 stride_index_memread1;
245 | UINT32 stride_index_memread2;
246 | UINT32 stride_index_memwrite;
247 |
248 | /* fetch cateogry and opcode for this instruction */
249 | strcpy(cat,CATEGORY_StringShort(INS_Category(ins)).c_str());
250 | strcpy(opcode,INS_Mnemonic(ins).c_str());
251 |
252 | // buffer register reads per static instruction
253 | if(!e->setRead){
254 |
255 |
256 | // register reads and memory reads determine the issue time
257 | maxNumRegsCons = INS_MaxNumRRegs(ins);
258 |
259 | regReadCnt = 0;
260 | for(i=0; i < maxNumRegsCons; i++){
261 | reg = INS_RegR(ins, i);
262 | //assert((UINT32)reg < MAX_NUM_REGS);
263 | // only consider valid general-purpose registers (any bit-width) and floating-point registers,
264 | // i.e. exlude branch, segment and pin registers, among others
265 | if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){
266 | regReadCnt++;
267 | }
268 | }
269 |
270 | e->regReadCnt = regReadCnt;
271 | e->regsRead = (REG*)checked_malloc(regReadCnt*sizeof(REG));
272 |
273 | regReadCnt = 0;
274 | for(i=0; i < maxNumRegsCons; i++){
275 |
276 | reg = INS_RegR(ins, i);
277 |
278 | //assert((UINT32)reg < MAX_NUM_REGS);
279 | // only consider valid general-purpose registers (any bit-width) and floating-point registers,
280 | // i.e. exlude branch, segment and pin registers, among others
281 | if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){
282 | e->regsRead[regReadCnt++] = reg;
283 | }
284 | }
285 |
286 | e->setRead = true;
287 |
288 | }
289 |
290 | // buffer register writes per static instruction
291 | if(!e->setWritten){
292 | maxNumRegsProd = INS_MaxNumWRegs(ins);
293 |
294 | regWriteCnt = 0;
295 | for(i=0; i < maxNumRegsProd; i++){
296 |
297 | reg = INS_RegW(ins, i);
298 |
299 | //assert((UINT32)reg < MAX_NUM_REGS);
300 | // only consider valid general-purpose registers (any bit-width) and floating-point registers,
301 | // i.e. exlude branch, segment and pin registers, among others */
302 | if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){
303 | regWriteCnt++;
304 | }
305 | }
306 |
307 | e->regWriteCnt = regWriteCnt;
308 | e->regsWritten = (REG*)checked_malloc(regWriteCnt*sizeof(REG));
309 |
310 | regWriteCnt = 0;
311 | for(i=0; i < maxNumRegsProd; i++){
312 |
313 | reg = INS_RegW(ins, i);
314 |
315 | //assert((UINT32)reg < MAX_NUM_REGS);
316 | // only consider valid general-purpose registers (any bit-width) and floating-point registers,
317 | // i.e. exlude branch, segment and pin registers, among others
318 | if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){
319 | e->regsWritten[regWriteCnt++] = reg;
320 | }
321 | }
322 |
323 | e->setWritten = true;
324 | }
325 |
326 | if(!e->setRegOpCnt){
327 | regOpCnt = 0;
328 | opCnt = INS_OperandCount(ins);
329 | for(i = 0; i < opCnt; i++){
330 | if(INS_OperandIsReg(ins,i))
331 | regOpCnt++;
332 | }
333 | /*if(regOpCnt >= MAX_NUM_OPER){
334 | fprintf(stderr,"BOOM! -> MAX_NUM_OPER is exceeded! (%u)\n", regOpCnt);
335 | exit(1);
336 | }*/
337 | e->regOpCnt = regOpCnt;
338 | e->setRegOpCnt = true;
339 | }
340 |
341 | // buffer memory operations (and instruction register buffer) with one single InsertCall
342 | if(INS_IsMemoryRead(ins)){
343 |
344 | stride_index_memread1 = stride_index_memRead1(INS_Address(ins));
345 |
346 | if(INS_IsMemoryWrite(ins)){
347 |
348 | stride_index_memwrite = stride_index_memWrite(INS_Address(ins));
349 |
350 | if(INS_HasMemoryRead2(ins)){
351 |
352 | stride_index_memread2 = stride_index_memRead2(INS_Address(ins));
353 |
354 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)all_buffer_instruction_2reads_write, IARG_PTR, (void*)e, IARG_MEMORYREAD_EA, IARG_MEMORYREAD2_EA, IARG_MEMORYREAD_SIZE, IARG_UINT32, stride_index_memread1, IARG_UINT32, stride_index_memread2, IARG_MEMORYWRITE_EA, IARG_MEMORYWRITE_SIZE, IARG_UINT32, stride_index_memwrite, IARG_END);
355 | }
356 | else{
357 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)all_buffer_instruction_read_write, IARG_PTR, (void*)e, IARG_MEMORYREAD_EA, IARG_MEMORYREAD_SIZE, IARG_UINT32, stride_index_memread1, IARG_MEMORYWRITE_EA, IARG_MEMORYWRITE_SIZE, IARG_UINT32, stride_index_memwrite, IARG_END);
358 |
359 | }
360 | }
361 | else{
362 | if(INS_HasMemoryRead2(ins)){
363 |
364 | stride_index_memread2 = stride_index_memRead2(INS_Address(ins));
365 |
366 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)all_buffer_instruction_2reads, IARG_PTR, (void*)e, IARG_MEMORYREAD_EA, IARG_MEMORYREAD2_EA, IARG_MEMORYREAD_SIZE, IARG_UINT32 , stride_index_memread1, IARG_UINT32, stride_index_memread2, IARG_END);
367 | }
368 | else{
369 |
370 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)all_buffer_instruction_read, IARG_PTR, (void*)e, IARG_MEMORYREAD_EA, IARG_MEMORYREAD_SIZE, IARG_UINT32, stride_index_memread1, IARG_END);
371 | }
372 | }
373 | }
374 | else{
375 | if(INS_IsMemoryWrite(ins)){
376 |
377 | stride_index_memwrite = stride_index_memWrite(INS_Address(ins));
378 |
379 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)all_buffer_instruction_write, IARG_PTR, (void*)e, IARG_MEMORYWRITE_EA, IARG_MEMORYWRITE_SIZE, IARG_UINT32, stride_index_memwrite, IARG_END);
380 | }
381 | else{
382 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)all_buffer_instruction, IARG_PTR, (void*)e, IARG_END);
383 | }
384 | }
385 |
386 | /* InsertIfCall returns true if ILP buffer is full */
387 | //INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)empty_ilp_buffer_all, IARG_END);
388 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_interval_for_ilp, IARG_END); // wrapper for empty_ilp_buffer_all
389 |
390 | /* +++ ITYPES +++ */
391 |
392 | // go over all groups, increase group count if instruction matches that group
393 | // group counts are increased at most once per instruction executed,
394 | // even if the instruction matches multiple identifiers in that group
395 | for(i=0; i < number_of_groups; i++){
396 | for(j=0; j < group_ids_cnt[i]; j++){
397 | if(group_identifiers[i][j].type == identifier_type::ID_TYPE_CATEGORY){
398 | if(strcmp(group_identifiers[i][j].str, cat) == 0){
399 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count, IARG_UINT32, i, IARG_END);
400 | categorized = true;
401 | break;
402 | }
403 | }
404 | else{
405 | if(group_identifiers[i][j].type == identifier_type::ID_TYPE_OPCODE){
406 | if(strcmp(group_identifiers[i][j].str, opcode) == 0){
407 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count, IARG_UINT32, i, IARG_END);
408 | categorized = true;
409 | break;
410 | }
411 | }
412 | else{
413 | if(group_identifiers[i][j].type == identifier_type::ID_TYPE_SPECIAL){
414 | if(strcmp(group_identifiers[i][j].str, "mem_read") == 0 && INS_IsMemoryRead(ins) ){
415 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count, IARG_UINT32, i, IARG_END);
416 | categorized = true;
417 | break;
418 | }
419 | else{
420 | if(strcmp(group_identifiers[i][j].str, "mem_write") == 0 && INS_IsMemoryWrite(ins) ){
421 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count, IARG_UINT32, i, IARG_END);
422 | categorized = true;
423 | break;
424 | }
425 | else{
426 | }
427 | }
428 | }
429 | else{
430 | cerr << "ERROR! Unknown identifier type specified (" << group_identifiers[i][j].type << ")" << endl;
431 | }
432 | }
433 | }
434 | }
435 | }
436 |
437 | // count instruction that don't fit in any of the specified categories in the last group
438 | if( !categorized ){
439 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count, IARG_UINT32, (unsigned int)number_of_groups, IARG_END);
440 |
441 | // check whether this category is already known in the 'other' group
442 | for(i=0; i < other_ids_cnt; i++){
443 | if(strcmp(other_group_identifiers[i].str, cat) == 0)
444 | break;
445 | }
446 |
447 | // if a new instruction category is found, add it to the set
448 | if(i == other_ids_cnt){
449 | other_group_identifiers[other_ids_cnt].type = identifier_type::ID_TYPE_CATEGORY;
450 | other_group_identifiers[other_ids_cnt].str = checked_strdup(cat);
451 | other_ids_cnt++;
452 | }
453 |
454 | // prepare for (possible) next category
455 | if(other_ids_cnt == other_ids_max_cnt){
456 | other_ids_max_cnt *= 2;
457 | other_group_identifiers = (identifier*)checked_realloc(other_group_identifiers, other_ids_max_cnt*sizeof(identifier));
458 | }
459 | }
460 |
461 | /* +++ PPM *** */
462 | if(strcmp(cat,"COND_BR") == 0){
463 | instrument_ppm_cond_br(ins);
464 | }
465 | /* inserting calls for counting instructions is done in mica.cpp */
466 | if(interval_size != -1){
467 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals, IARG_PTR, (void*)e, IARG_ADDRINT, INS_Address(ins), IARG_ADDRINT, (ADDRINT)INS_Size(ins), IARG_END);
468 | /* only called if interval is 'full' */
469 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_interval, IARG_END);
470 | }
471 | else{
472 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full, IARG_PTR, (void*)e, IARG_ADDRINT, INS_Address(ins), IARG_ADDRINT, (ADDRINT)INS_Size(ins), IARG_END);
473 | }
474 |
475 | }
476 |
--------------------------------------------------------------------------------
/mica_all.h:
--------------------------------------------------------------------------------
1 | /*
2 | * This file is part of MICA, a Pin tool to collect
3 | * microarchitecture-independent program characteristics using the Pin
4 | * instrumentation framework.
5 | *
6 | * Please see the README.txt file distributed with the MICA release for more
7 | * information.
8 | */
9 |
10 | #include "mica.h"
11 | #include "mica_utils.h"
12 |
13 | VOID init_all();
14 | ADDRINT returnArg(BOOL arg);
15 | VOID all_instr_full_count_always();
16 | VOID all_instr_full_count_for_hpc_alignment_no_rep();
17 | VOID all_instr_full_count_for_hpc_alignment_with_rep(UINT32 repCnt);
18 | VOID all_instr_intervals_count_always();
19 | VOID all_instr_intervals_count_for_hpc_alignment_no_rep();
20 | VOID all_instr_intervals_count_for_hpc_alignment_with_rep(UINT32 repCnt);
21 | VOID instrument_all(INS ins, VOID* v, ins_buffer_entry* e);
22 |
--------------------------------------------------------------------------------
/mica_ilp.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * This file is part of MICA, a Pin tool to collect
3 | * microarchitecture-independent program characteristics using the Pin
4 | * instrumentation framework.
5 | *
6 | * Please see the README.txt file distributed with the MICA release for more
7 | * information.
8 | */
9 |
10 | #include "pin.H"
11 |
12 | /* MICA includes */
13 | #include "mica_utils.h"
14 | #include "mica_ilp.h"
15 |
16 | #include
17 | #include
18 | using namespace std;
19 |
20 | #define ILP_WIN_SIZE_CNT 4
21 |
22 | const UINT32 win_sizes[ILP_WIN_SIZE_CNT] = {32, 64, 128, 256};
23 |
24 | extern UINT32 _ilp_win_size;
25 | UINT32 win_size;
26 |
27 | extern UINT32 _block_size;
28 | UINT32 ilp_block_size;
29 |
30 | /* buffer settings */
31 |
32 | //#define ILP_BUFFER_SIZE 256
33 | #define ILP_BUFFER_SIZE 200
34 |
35 | /* buffer variables */
36 |
37 | typedef struct ilp_buffer_entry_type{
38 |
39 | ins_buffer_entry* e;
40 |
41 | ADDRINT mem_read1_addr;
42 | ADDRINT mem_read2_addr;
43 | ADDRINT mem_read_size;
44 |
45 | ADDRINT mem_write_addr;
46 | ADDRINT mem_write_size;
47 |
48 | } ilp_buffer_entry;
49 |
50 | ilp_buffer_entry* ilp_buffer[ILP_BUFFER_SIZE];
51 | UINT32 ilp_buffer_index;
52 |
53 | void init_ilp_buffering();
54 | VOID fini_ilp_buffering_all();
55 | VOID fini_ilp_buffering_one();
56 |
57 | /* Global variables */
58 |
59 | extern INT64 interval_size;
60 | extern INT64 interval_ins_count;
61 | extern INT64 interval_ins_count_for_hpc_alignment;
62 | extern INT64 total_ins_count;
63 | extern INT64 total_ins_count_for_hpc_alignment;
64 | ofstream output_file_ilp_one;
65 | ofstream output_file_ilp_all;
66 |
67 | INT32 size_pow_all_times_all;
68 | INT64 index_all_times_all;
69 | UINT64* all_times_all[ILP_WIN_SIZE_CNT];
70 |
71 | INT32 size_pow_times;
72 | INT64 index_all_times;
73 | UINT64* all_times;
74 |
75 | INT64 cpuClock_interval_all[ILP_WIN_SIZE_CNT];
76 | UINT64 timeAvailable_all[ILP_WIN_SIZE_CNT][MAX_NUM_REGS];
77 | nlist* memAddressesTable_all[MAX_MEM_TABLE_ENTRIES];
78 | UINT32 windowHead_all[ILP_WIN_SIZE_CNT];
79 | UINT32 windowTail_all[ILP_WIN_SIZE_CNT];
80 | UINT64 cpuClock_all[ILP_WIN_SIZE_CNT];
81 | UINT64* executionProfile_all[ILP_WIN_SIZE_CNT];
82 | UINT64 issueTime_all[ILP_WIN_SIZE_CNT];
83 |
84 | INT64 cpuClock_interval;
85 | UINT64 timeAvailable[MAX_NUM_REGS];
86 | nlist* memAddressesTable[MAX_MEM_TABLE_ENTRIES];
87 | UINT32 windowHead;
88 | UINT32 windowTail;
89 | UINT64 cpuClock;
90 | UINT64* executionProfile;
91 | UINT64 issueTime;
92 |
93 | /*************************
94 | ILP (COMMON)
95 | **************************/
96 |
97 | /* initializing */
98 | void init_ilp_common(){
99 | /* initializing total instruction counts is done in mica.cpp */
100 | }
101 |
102 | /************************************
103 | ILP (one given window size)
104 | *************************************/
105 |
106 | /* initializing */
107 | void init_ilp_one(){
108 |
109 | UINT32 i;
110 |
111 | init_ilp_common();
112 | init_ilp_buffering();
113 |
114 | win_size = _ilp_win_size;
115 | ilp_block_size = _block_size;
116 |
117 | size_pow_times = 10;
118 | all_times = (UINT64*)checked_malloc((1 << size_pow_times) * sizeof(UINT64));
119 | index_all_times = 1; // don't use first element of all_times
120 |
121 | windowHead = 0;
122 | windowTail = 0;
123 | cpuClock = 0;
124 | cpuClock_interval = 0;
125 | for(i = 0; i < MAX_NUM_REGS; i++){
126 | timeAvailable[i] = 0;
127 | }
128 |
129 | executionProfile = (UINT64*)checked_malloc(win_size*sizeof(UINT64));
130 |
131 | for(i = 0; i < win_size; i++){
132 | executionProfile[i] = 0;
133 | }
134 | issueTime = 0;
135 |
136 | if(interval_size != -1){
137 | if(interval_size % ILP_BUFFER_SIZE != 0){
138 | cerr << "ERROR! Interval size is not a multiple of ILP buffer size. (" << interval_size << " vs " << ILP_BUFFER_SIZE << ")" << endl;
139 | exit(-1);
140 | }
141 | char filename[100];
142 | sprintf(filename, "ilp-win%d_phases_int", win_size);
143 | output_file_ilp_one.open(mkfilename(filename), ios::out|ios::trunc);
144 | output_file_ilp_one.close();
145 | }
146 | }
147 |
148 | /* support */
149 | void increase_size_all_times_one(){
150 | UINT64* ptr;
151 |
152 | size_pow_times++;
153 |
154 | ptr = (UINT64*)realloc(all_times, (1 << size_pow_times)*sizeof(UINT64));
155 | if(ptr == (UINT64*)NULL){
156 | cerr << "Could not allocate memory (realloc)!" << endl;
157 | exit(1);
158 | }
159 | all_times = ptr;
160 | }
161 |
162 | /* per-instruction stuff */
163 | VOID ilp_instr_one(){
164 |
165 | const UINT32 win_size_const = win_size;
166 | UINT32 reordered;
167 |
168 | /* set issue time for tail of instruction window */
169 | executionProfile[windowTail] = issueTime;
170 | windowTail = (windowTail + 1) % win_size_const;
171 |
172 | /* if instruction window (issue buffer) full */
173 | if(windowHead == windowTail){
174 | cpuClock++;
175 | cpuClock_interval++;
176 | reordered = 0;
177 | /* remove all instructions which are done from beginning of window,
178 | * until an instruction comes along which is not ready yet:
179 | * -> check executionProfile to see which instructions are done
180 | * -> commit maximum win_size instructions (i.e. stop when issue buffer is empty)
181 | */
182 | while((executionProfile[windowHead] < cpuClock) && (reordered < win_size_const)) {
183 | windowHead = (windowHead + 1) % win_size_const;
184 | reordered++;
185 | }
186 | //assert(reordered != 0);
187 | }
188 |
189 | /* reset issue times */
190 | issueTime = 0;
191 | }
192 |
193 | VOID ilp_instr_full_one(){
194 |
195 | /* counting instructions is done in all_instr_full() */
196 |
197 | ilp_instr_one();
198 | }
199 |
200 | VOID ilp_instr_intervals_one(){
201 |
202 | int i;
203 |
204 | /* counting instructions is done in all_instr_intervals() */
205 |
206 | ilp_instr_one();
207 |
208 | if(interval_ins_count_for_hpc_alignment == interval_size){
209 |
210 | char filename[100];
211 | sprintf(filename, "ilp-win%d_phases_int", win_size);
212 |
213 | output_file_ilp_one.open(mkfilename(filename), ios::out|ios::app);
214 |
215 | output_file_ilp_one << interval_size << " " << cpuClock_interval << endl;
216 |
217 | /* reset */
218 | interval_ins_count = 0;
219 | interval_ins_count_for_hpc_alignment = 0;
220 |
221 | cpuClock_interval = 0;
222 |
223 | /* clean up memory used, to avoid memory problems for long (CPU2006) benchmarks */
224 | size_pow_times = 10;
225 |
226 | free(all_times);
227 | all_times = (UINT64*)checked_malloc((1 << size_pow_times) * sizeof(UINT64));
228 | index_all_times = 1;
229 |
230 | nlist* np;
231 | nlist* np_rm;
232 | for(i=0; i < MAX_MEM_TABLE_ENTRIES; i++){
233 | np = memAddressesTable[i];
234 | while(np != (nlist*)NULL){
235 | np_rm = np;
236 | np = np->next;
237 | free(np_rm->mem);
238 | free(np_rm);
239 | }
240 | memAddressesTable[i] = (nlist*) NULL;
241 | }
242 |
243 | output_file_ilp_one.close();
244 | }
245 | }
246 |
247 | VOID checkIssueTime_one(){
248 |
249 | if(cpuClock > issueTime)
250 | issueTime = cpuClock;
251 | }
252 |
253 | /* register stuff */
254 | VOID readRegOp_ilp_one(UINT32 regId){
255 |
256 | if(timeAvailable[regId] > issueTime)
257 | issueTime = timeAvailable[regId];
258 | }
259 |
260 | VOID readRegOp_ilp_one_fast(VOID* _e){
261 |
262 | ins_buffer_entry* e = (ins_buffer_entry*)_e;
263 |
264 | INT32 i;
265 |
266 | UINT32 regId;
267 |
268 | for(i=0; i < e->regReadCnt; i++){
269 | regId = (UINT32)e->regsRead[i];
270 | if(timeAvailable[regId] > issueTime)
271 | issueTime = timeAvailable[regId];
272 | }
273 | }
274 |
275 | VOID writeRegOp_ilp_one(UINT32 regId){
276 |
277 | timeAvailable[regId] = issueTime + 1;
278 | }
279 |
280 | VOID writeRegOp_ilp_one_fast(VOID* _e){
281 |
282 | ins_buffer_entry* e = (ins_buffer_entry*)_e;
283 |
284 | INT32 i;
285 |
286 | for(i=0; i < e->regWriteCnt; i++)
287 | timeAvailable[(UINT32)e->regsWritten[i]] = issueTime + 1;
288 | }
289 |
290 | /* memory access stuff */
291 | VOID readMem_ilp_one(ADDRINT effAddr, ADDRINT size){
292 |
293 |
294 | ADDRINT a;
295 | ADDRINT upperMemAddr, indexInChunk;
296 | memNode* chunk = (memNode*)NULL;
297 | ADDRINT shiftedAddr = effAddr >> ilp_block_size;
298 | ADDRINT shiftedEndAddr = (effAddr + size - 1) >> ilp_block_size;
299 |
300 | if(size > 0){
301 | for(a = shiftedAddr; a <= shiftedEndAddr; a++){
302 | upperMemAddr = a >> LOG_MAX_MEM_ENTRIES;
303 | indexInChunk = a ^ (upperMemAddr << LOG_MAX_MEM_ENTRIES);
304 |
305 | chunk = lookup(memAddressesTable, upperMemAddr);
306 | if(chunk == (memNode*)NULL)
307 | chunk = install(memAddressesTable, upperMemAddr);
308 |
309 | //assert(indexInChunk < MAX_MEM_ENTRIES);
310 | //assert(chunk->timeAvailable[indexInChunk] < (1 << size_pow_times));
311 | if(all_times[chunk->timeAvailable[indexInChunk]] > issueTime)
312 | issueTime = all_times[chunk->timeAvailable[indexInChunk]];
313 | }
314 | }
315 | }
316 |
317 | VOID writeMem_ilp_one(ADDRINT effAddr, ADDRINT size){
318 |
319 | ADDRINT a;
320 | ADDRINT upperMemAddr, indexInChunk;
321 | memNode* chunk = (memNode*)NULL;
322 | ADDRINT shiftedAddr = effAddr >> ilp_block_size;
323 | ADDRINT shiftedEndAddr = (effAddr + size - 1) >> ilp_block_size;
324 |
325 | if(size > 0){
326 | for(a = shiftedAddr; a <= shiftedEndAddr; a++){
327 | upperMemAddr = a >> LOG_MAX_MEM_ENTRIES;
328 | indexInChunk = a ^ (upperMemAddr << LOG_MAX_MEM_ENTRIES);
329 |
330 | chunk = lookup(memAddressesTable,upperMemAddr);
331 | if(chunk == (memNode*)NULL)
332 | chunk = install(memAddressesTable,upperMemAddr);
333 |
334 | //assert(indexInChunk < MAX_MEM_ENTRIES);
335 | if(chunk->timeAvailable[indexInChunk] == 0){
336 | index_all_times++;
337 | if(index_all_times >= (1 << size_pow_times))
338 | increase_size_all_times_one();
339 | chunk->timeAvailable[indexInChunk] = index_all_times;
340 | }
341 | //assert(chunk->timeAvailable[indexInChunk] < (1 << size_pow_times));
342 | all_times[chunk->timeAvailable[indexInChunk]] = issueTime + 1;
343 | }
344 | }
345 | }
346 |
347 | /* instrumenting (instruction level) */
348 | /*VOID instrument_ilp_one(INS ins, VOID* v){
349 |
350 | UINT32 i;
351 | UINT32 maxNumRegsProd, maxNumRegsCons;
352 | REG reg;
353 |
354 | // register reads and memory reads determine the issue time
355 | maxNumRegsCons = INS_MaxNumRRegs(ins);
356 |
357 | for(i=0; i < maxNumRegsCons; i++){
358 |
359 | reg = INS_RegR(ins, i);
360 |
361 | assert((UINT32)reg < MAX_NUM_REGS);
362 | // only consider valid general-purpose registers (any bit-width) and floating-point registers,
363 | // i.e. exlude branch, segment and pin registers, among others
364 | if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){
365 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)readRegOp_ilp_one, IARG_UINT32, reg, IARG_END);
366 | }
367 | }
368 |
369 | if(INS_IsMemoryRead(ins)){
370 |
371 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)readMem_ilp_one, IARG_MEMORYREAD_EA, IARG_MEMORYREAD_SIZE, IARG_END);
372 |
373 | if(INS_HasMemoryRead2(ins)){
374 |
375 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)readMem_ilp_one, IARG_MEMORYREAD2_EA, IARG_MEMORYREAD_SIZE, IARG_END);
376 | }
377 | }
378 |
379 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)checkIssueTime_one, IARG_END);
380 |
381 | // register writes and memory writes determine the time when these locations are available
382 |
383 | maxNumRegsProd = INS_MaxNumWRegs(ins);
384 | for(i=0; i < maxNumRegsProd; i++){
385 |
386 | reg = INS_RegW(ins, i);
387 |
388 | assert((UINT32)reg < MAX_NUM_REGS);
389 | // only consider valid general-purpose registers (any bit-width) and floating-point registers,
390 | // i.e. exlude branch, segment and pin registers, among others
391 | if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){
392 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)writeRegOp_ilp_one, IARG_UINT32, reg, IARG_END);
393 | }
394 | }
395 |
396 | if(INS_IsMemoryWrite(ins)){
397 |
398 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)writeMem_ilp_one, IARG_MEMORYWRITE_EA, IARG_MEMORYWRITE_SIZE, IARG_END);
399 | }
400 |
401 | // count instructions
402 | if(interval_size == -1)
403 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)ilp_instr_full_one, IARG_END);
404 | else
405 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)ilp_instr_intervals_one, IARG_END);
406 |
407 | }*/
408 |
409 | /* finishing... */
410 | VOID fini_ilp_one(INT32 code, VOID* v){
411 |
412 | char filename[100];
413 |
414 | fini_ilp_buffering_one();
415 |
416 | if(interval_size == -1){
417 | sprintf(filename, "ilp-win%d_full_int", win_size);
418 |
419 | output_file_ilp_one.open(mkfilename(filename), ios::out|ios::trunc);
420 | //output_file_ilp_one << total_ins_count;
421 | }
422 | else{
423 | sprintf(filename, "ilp-win%d_phases_int", win_size);
424 | output_file_ilp_one.open(mkfilename(filename), ios::out|ios::app);
425 | output_file_ilp_one << interval_ins_count;
426 | }
427 | output_file_ilp_one << " " << cpuClock_interval << endl;
428 |
429 | //output_file_ilp_one << "number of instructions: " << total_ins_count_for_hpc_alignment << endl;
430 | output_file_ilp_one.close();
431 | }
432 |
433 | /***************************************
434 | ILP (all 4 hardcoded window sizes)
435 | ****************************************/
436 |
437 | /* initializing */
438 | void init_ilp_all(){
439 |
440 | int i,j;
441 |
442 | init_ilp_common();
443 | init_ilp_buffering();
444 |
445 | size_pow_all_times_all = 10;
446 | for(i=0; i < ILP_WIN_SIZE_CNT; i++){
447 | all_times_all[i] = (UINT64*)checked_malloc((1 << size_pow_all_times_all) * sizeof(UINT64));
448 | }
449 | index_all_times_all = 1; // don't use first element of all_times_all
450 |
451 | ilp_block_size = _block_size;
452 |
453 | for(j=0; j < ILP_WIN_SIZE_CNT; j++){
454 | windowHead_all[j] = 0;
455 | windowTail_all[j] = 0;
456 | cpuClock_all[j] = 0;
457 | cpuClock_interval_all[j] = 0;
458 | for(i = 0; i < MAX_NUM_REGS; i++){
459 | timeAvailable_all[j][i] = 0;
460 | }
461 |
462 | executionProfile_all[j] = (UINT64*)checked_malloc(win_sizes[j]*sizeof(UINT64));
463 |
464 | for(i = 0; i < (int)win_sizes[j]; i++){
465 | executionProfile_all[j][i] = 0;
466 | }
467 | issueTime_all[j] = 0;
468 | }
469 |
470 | if(interval_size != -1){
471 | if(interval_size % ILP_BUFFER_SIZE != 0){
472 | cerr << "ERROR! Interval size is not a multiple of ILP buffer size. (" << interval_size << " vs " << ILP_BUFFER_SIZE << ")" << endl;
473 | exit(-1);
474 | }
475 | output_file_ilp_all.open(mkfilename("ilp_phases_int"), ios::out|ios::trunc);
476 | output_file_ilp_all.close();
477 | }
478 | }
479 |
480 | /* support */
481 | void increase_size_all_times_all(){
482 | int i;
483 | UINT64* ptr;
484 | size_pow_all_times_all++;
485 |
486 | for(i=0; i < ILP_WIN_SIZE_CNT; i++){
487 | ptr = (UINT64*)realloc(all_times_all[i],(1 << size_pow_all_times_all)*sizeof(UINT64));
488 | if(ptr == (UINT64*)NULL){
489 | cerr << "Could not allocate memory (realloc)!" << endl;
490 | exit(1);
491 | }
492 | all_times_all[i] = ptr;
493 | }
494 | }
495 |
496 | /* per-instruction stuff */
497 | VOID ilp_instr_all(){
498 |
499 | int i;
500 | UINT32 reordered;
501 |
502 |
503 | for(i=0; i < ILP_WIN_SIZE_CNT; i++){
504 |
505 | /* set issue time for tail of instruction window */
506 | executionProfile_all[i][windowTail_all[i]] = issueTime_all[i];
507 | windowTail_all[i] = (windowTail_all[i] + 1) % win_sizes[i];
508 |
509 | /* if instruction window (issue buffer) full */
510 | if(windowHead_all[i] == windowTail_all[i]){
511 | cpuClock_all[i]++;
512 | cpuClock_interval_all[i]++;
513 | reordered = 0;
514 | /* remove all instructions which are done from beginning of window,
515 | * until an instruction comes along which is not ready yet:
516 | * -> check executionProfile_all to see which instructions are done
517 | * -> commit maximum win_size instructions (i.e. stop when issue buffer is empty)
518 | */
519 | while((executionProfile_all[i][windowHead_all[i]] < cpuClock_all[i]) && (reordered < win_sizes[i])) {
520 | windowHead_all[i] = (windowHead_all[i] + 1) % win_sizes[i];
521 | reordered++;
522 | }
523 | //assert(reordered != 0);
524 | }
525 |
526 | /* reset issue times */
527 | issueTime_all[i] = 0;
528 |
529 | }
530 |
531 | }
532 |
533 | VOID ilp_instr_full_all(){
534 |
535 | /* counting instructions is done in all_instr_full() */
536 |
537 | ilp_instr_all();
538 | }
539 |
540 | VOID ilp_instr_intervals_all(){
541 |
542 | int i;
543 |
544 | /* counting instructions is done in all_instr_intervals() */
545 |
546 | if(interval_ins_count_for_hpc_alignment == interval_size){
547 |
548 | output_file_ilp_all.open(mkfilename("ilp_phases_int"), ios::out|ios::app);
549 |
550 | output_file_ilp_all << interval_ins_count;
551 | for(i = 0; i < ILP_WIN_SIZE_CNT; i++)
552 | output_file_ilp_all << " " << cpuClock_interval_all[i];
553 | output_file_ilp_all << endl;
554 |
555 | /* reset */
556 | interval_ins_count = 0;
557 | interval_ins_count_for_hpc_alignment = 0;
558 |
559 | for(i = 0; i < ILP_WIN_SIZE_CNT; i++)
560 | cpuClock_interval_all[i] = 0;
561 |
562 | /* clean up memory used, to avoid memory problems for long (CPU2006) benchmarks */
563 | size_pow_all_times_all = 10;
564 | for(i = 0; i < ILP_WIN_SIZE_CNT; i++){
565 | free(all_times_all[i]);
566 | all_times_all[i] = (UINT64*)checked_malloc((1 << size_pow_all_times_all) * sizeof(UINT64));
567 | }
568 | index_all_times_all = 1;
569 |
570 | nlist* np;
571 | nlist* np_rm;
572 | for(i=0; i < MAX_MEM_TABLE_ENTRIES; i++){
573 | np = memAddressesTable_all[i];
574 | while(np != (nlist*)NULL){
575 | np_rm = np;
576 | np = np->next;
577 | free(np_rm->mem);
578 | free(np_rm);
579 | }
580 | memAddressesTable_all[i] = (nlist*) NULL;
581 | }
582 |
583 | output_file_ilp_all.close();
584 | }
585 |
586 | ilp_instr_all();
587 | }
588 |
589 | VOID checkIssueTime_all(){
590 | int i;
591 |
592 | for(i=0; i < ILP_WIN_SIZE_CNT; i++){
593 | if(cpuClock_all[i] > issueTime_all[i])
594 | issueTime_all[i] = cpuClock_all[i];
595 | }
596 | }
597 |
598 | /* register stuff */
599 | VOID readRegOp_ilp_all(UINT32 regId){
600 | int i;
601 |
602 | for(i=0; i < ILP_WIN_SIZE_CNT; i++){
603 |
604 | if(timeAvailable_all[i][regId] > issueTime_all[i])
605 | issueTime_all[i] = timeAvailable_all[i][regId];
606 | }
607 | }
608 |
609 | VOID writeRegOp_ilp_all(UINT32 regId){
610 | int i;
611 |
612 | for(i=0; i < ILP_WIN_SIZE_CNT; i++){
613 | timeAvailable_all[i][regId] = issueTime_all[i] + 1;
614 | }
615 | }
616 |
617 | /* memory access stuff */
618 | VOID readMem_ilp_all(ADDRINT effAddr, ADDRINT size){
619 |
620 | int i;
621 |
622 | ADDRINT a;
623 | ADDRINT upperMemAddr, indexInChunk;
624 | memNode* chunk = (memNode*)NULL;
625 | ADDRINT shiftedAddr = effAddr >> ilp_block_size;
626 | ADDRINT shiftedEndAddr = (effAddr + size - 1) >> ilp_block_size;
627 |
628 | if(size > 0){
629 | for(a = shiftedAddr; a <= shiftedEndAddr; a++){
630 | upperMemAddr = a >> LOG_MAX_MEM_ENTRIES;
631 | indexInChunk = a ^ (upperMemAddr << LOG_MAX_MEM_ENTRIES);
632 |
633 | chunk = lookup(memAddressesTable_all,upperMemAddr);
634 | if(chunk == (memNode*)NULL)
635 | chunk = install(memAddressesTable_all,upperMemAddr);
636 |
637 | //assert(indexInChunk < MAX_MEM_ENTRIES);
638 | for(i=0; i < ILP_WIN_SIZE_CNT; i++){
639 |
640 | if(all_times_all[i][chunk->timeAvailable[indexInChunk]] > issueTime_all[i])
641 | issueTime_all[i] = all_times_all[i][chunk->timeAvailable[indexInChunk]];
642 | }
643 | }
644 | }
645 | }
646 |
647 | VOID writeMem_ilp_all(ADDRINT effAddr, ADDRINT size){
648 | int i;
649 |
650 | ADDRINT a;
651 | ADDRINT upperMemAddr, indexInChunk;
652 | memNode* chunk = (memNode*)NULL;
653 | ADDRINT shiftedAddr = effAddr >> ilp_block_size;
654 | ADDRINT shiftedEndAddr = (effAddr + size - 1) >> ilp_block_size;
655 |
656 | if(size > 0){
657 | for(a = shiftedAddr; a <= shiftedEndAddr; a++){
658 | upperMemAddr = a >> LOG_MAX_MEM_ENTRIES;
659 | indexInChunk = a ^ (upperMemAddr << LOG_MAX_MEM_ENTRIES);
660 |
661 | chunk = lookup(memAddressesTable_all,upperMemAddr);
662 | if(chunk == (memNode*)NULL)
663 | chunk = install(memAddressesTable_all,upperMemAddr);
664 |
665 | //assert(indexInChunk < MAX_MEM_ENTRIES);
666 | if(chunk->timeAvailable[indexInChunk] == 0){
667 | index_all_times_all++;
668 | if(index_all_times_all >= (1 << size_pow_all_times_all))
669 | increase_size_all_times_all();
670 | chunk->timeAvailable[indexInChunk] = index_all_times_all;
671 | }
672 | for(i=0; i < ILP_WIN_SIZE_CNT; i++){
673 | all_times_all[i][chunk->timeAvailable[indexInChunk]] = issueTime_all[i] + 1;
674 | }
675 | }
676 | }
677 | }
678 |
679 | /* instrumenting (instruction level) */
680 | /*VOID instrument_ilp_all(INS ins, VOID* v){
681 |
682 | UINT32 i;
683 | UINT32 maxNumRegsProd, maxNumRegsCons;
684 | REG reg;
685 |
686 |
687 | // register reads and memory reads determine the issue time
688 | maxNumRegsCons = INS_MaxNumRRegs(ins);
689 |
690 | for(i=0; i < maxNumRegsCons; i++){
691 |
692 | reg = INS_RegR(ins, i);
693 |
694 | // only consider valid general-purpose registers (any bit-width) and floating-point registers,
695 | // i.e. exlude branch, segment and pin registers, among others
696 | if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){
697 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)readRegOp_ilp_all, IARG_UINT32, reg, IARG_END);
698 | }
699 | }
700 |
701 | if(INS_IsMemoryRead(ins)){
702 |
703 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)readMem_ilp_all, IARG_MEMORYREAD_EA, IARG_MEMORYREAD_SIZE, IARG_END);
704 |
705 | if(INS_HasMemoryRead2(ins)){
706 |
707 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)readMem_ilp_all, IARG_MEMORYREAD2_EA, IARG_MEMORYREAD_SIZE, IARG_END);
708 | }
709 | }
710 |
711 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)checkIssueTime_all, IARG_END);
712 |
713 | // register writes and memory writes determine the time when these locations are available
714 |
715 | maxNumRegsProd = INS_MaxNumWRegs(ins);
716 | for(i=0; i < maxNumRegsProd; i++){
717 |
718 | reg = INS_RegW(ins, i);
719 |
720 | // only consider valid general-purpose registers (any bit-width) and floating-point registers,
721 | // i.e. exlude branch, segment and pin registers, among others
722 | if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){
723 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)writeRegOp_ilp_all, IARG_UINT32, reg, IARG_END);
724 | }
725 | }
726 |
727 | if(INS_IsMemoryWrite(ins)){
728 |
729 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)writeMem_ilp_all, IARG_MEMORYWRITE_EA, IARG_MEMORYWRITE_SIZE, IARG_END);
730 | }
731 |
732 | // count instructions
733 | if(interval_size == -1)
734 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)ilp_instr_full_all,IARG_END);
735 | else
736 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)ilp_instr_intervals_all, IARG_END);
737 | }*/
738 |
739 | /* finishing... */
740 | VOID fini_ilp_all(INT32 code, VOID* v){
741 |
742 | int i;
743 |
744 | fini_ilp_buffering_all();
745 |
746 | if(interval_size == -1){
747 | output_file_ilp_all.open(mkfilename("ilp_full_int"), ios::out|ios::trunc);
748 | output_file_ilp_all << total_ins_count;
749 | }
750 | else{
751 | output_file_ilp_all.open(mkfilename("ilp_phases_int"), ios::out|ios::app);
752 | output_file_ilp_all << interval_ins_count;
753 | }
754 | for(i = 0; i < ILP_WIN_SIZE_CNT; i++)
755 | output_file_ilp_all << " " << cpuClock_interval_all[i];
756 | output_file_ilp_all << " ";
757 |
758 | output_file_ilp_all << endl;
759 | //output_file_ilp_all << "number of instructions: " << total_ins_count_for_hpc_alignment << endl;
760 | output_file_ilp_all.close();
761 | }
762 |
763 | /**************************
764 | ILP (BUFFERING)
765 | ***************************/
766 |
767 | /*
768 | * notes
769 | *
770 | * using PIN_FAST_ANALYSIS_CALL for buffering functions was tested
771 | * during the preparation of MICA v0.3, but showed to slightly slowdown
772 | * things instead of speeding them up, so it was dropped in the end
773 | */
774 |
775 | /* initializing */
776 | void init_ilp_buffering(){
777 |
778 | int i;
779 |
780 | ilp_buffer_index = 0;
781 | for(i=0; i < ILP_BUFFER_SIZE; i++){
782 | ilp_buffer[i] = (ilp_buffer_entry*)checked_malloc(sizeof(ilp_buffer_entry));
783 | ilp_buffer[i]->e = (ins_buffer_entry*)NULL;
784 | ilp_buffer[i]->mem_read1_addr = 0;
785 | ilp_buffer[i]->mem_read2_addr = 0;
786 | ilp_buffer[i]->mem_read_size = 0;
787 | ilp_buffer[i]->mem_write_addr = 0;
788 | ilp_buffer[i]->mem_write_size = 0;
789 | }
790 | }
791 |
792 | VOID ilp_buffer_instruction_only(void* _e){
793 | ilp_buffer[ilp_buffer_index]->e = (ins_buffer_entry*)_e;
794 | }
795 |
796 | VOID ilp_buffer_instruction_read(ADDRINT read1_addr, ADDRINT read_size){
797 | ilp_buffer[ilp_buffer_index]->mem_read1_addr = read1_addr;
798 | ilp_buffer[ilp_buffer_index]->mem_read_size = read_size;
799 | }
800 |
801 | VOID ilp_buffer_instruction_read2(ADDRINT read2_addr){
802 | ilp_buffer[ilp_buffer_index]->mem_read2_addr = read2_addr;
803 | }
804 |
805 | VOID ilp_buffer_instruction_write(ADDRINT write_addr, ADDRINT write_size){
806 | ilp_buffer[ilp_buffer_index]->mem_write_addr = write_addr;
807 | ilp_buffer[ilp_buffer_index]->mem_write_size = write_size;
808 | }
809 |
810 | ADDRINT ilp_buffer_instruction_next(){
811 | ilp_buffer_index++;
812 | return (ADDRINT)(ilp_buffer_index == ILP_BUFFER_SIZE || interval_ins_count_for_hpc_alignment == interval_size);
813 | }
814 |
815 | /* empty buffer for one given window size */
816 | VOID empty_buffer_one(){
817 | UINT32 i,j;
818 |
819 | for(i=0; i < ilp_buffer_index; i++){
820 |
821 | // register reads
822 | for(j=0; j < (UINT32)ilp_buffer[i]->e->regReadCnt; j++){
823 | readRegOp_ilp_one((UINT32)ilp_buffer[i]->e->regsRead[j]);
824 | }
825 |
826 | // memory reads
827 | if(ilp_buffer[i]->mem_read1_addr != 0){
828 | readMem_ilp_one(ilp_buffer[i]->mem_read1_addr, ilp_buffer[i]->mem_read_size);
829 | ilp_buffer[i]->mem_read1_addr = 0;
830 |
831 | if(ilp_buffer[i]->mem_read2_addr != 0){
832 | readMem_ilp_one(ilp_buffer[i]->mem_read2_addr, ilp_buffer[i]->mem_read_size);
833 | ilp_buffer[i]->mem_read2_addr = 0;
834 | }
835 |
836 | ilp_buffer[i]->mem_read_size = 0;
837 | }
838 |
839 | checkIssueTime_one();
840 |
841 | // register writes
842 | for(j=0; j < (UINT32)ilp_buffer[i]->e->regWriteCnt; j++){
843 | writeRegOp_ilp_one((UINT32)ilp_buffer[i]->e->regsWritten[j]);
844 | }
845 |
846 | // memory writes
847 | if(ilp_buffer[i]->mem_write_addr != 0){
848 | writeMem_ilp_one(ilp_buffer[i]->mem_write_addr, ilp_buffer[i]->mem_write_size);
849 | ilp_buffer[i]->mem_write_addr = 0;
850 | ilp_buffer[i]->mem_write_size = 0;
851 | }
852 |
853 | ilp_buffer[i]->e = (ins_buffer_entry*)NULL;
854 |
855 | if(interval_size == -1)
856 | ilp_instr_full_one();
857 | else
858 | ilp_instr_intervals_one();
859 | }
860 |
861 | ilp_buffer_index = 0;
862 | }
863 |
864 | /* empty buffer for all 4 (hardcoded) window sizes */
865 | VOID empty_ilp_buffer_all(){
866 | UINT32 i,j;
867 |
868 | for(i=0; i < ilp_buffer_index; i++){
869 |
870 | // register reads
871 | for(j=0; j < (UINT32)ilp_buffer[i]->e->regReadCnt; j++){
872 | readRegOp_ilp_all((UINT32)ilp_buffer[i]->e->regsRead[j]);
873 | }
874 |
875 | // memory reads
876 | if(ilp_buffer[i]->mem_read1_addr != 0){
877 | readMem_ilp_all(ilp_buffer[i]->mem_read1_addr, ilp_buffer[i]->mem_read_size);
878 | ilp_buffer[i]->mem_read1_addr = 0;
879 |
880 | if(ilp_buffer[i]->mem_read2_addr != 0){
881 | readMem_ilp_all(ilp_buffer[i]->mem_read2_addr, ilp_buffer[i]->mem_read_size);
882 | ilp_buffer[i]->mem_read2_addr = 0;
883 | }
884 |
885 | ilp_buffer[i]->mem_read_size = 0;
886 | }
887 |
888 | checkIssueTime_all();
889 |
890 | // register writes
891 | for(j=0; j < (UINT32)ilp_buffer[i]->e->regWriteCnt; j++){
892 | writeRegOp_ilp_all((UINT32)ilp_buffer[i]->e->regsWritten[j]);
893 | }
894 |
895 | // memory writes
896 | if(ilp_buffer[i]->mem_write_addr != 0){
897 | writeMem_ilp_all(ilp_buffer[i]->mem_write_addr, ilp_buffer[i]->mem_write_size);
898 | ilp_buffer[i]->mem_write_addr = 0;
899 | ilp_buffer[i]->mem_write_size = 0;
900 | }
901 |
902 | ilp_buffer[i]->e = (ins_buffer_entry*)NULL;
903 |
904 | if(interval_size == -1)
905 | ilp_instr_full_all();
906 | else
907 | ilp_instr_intervals_all();
908 | }
909 |
910 | ilp_buffer_index = 0;
911 | }
912 |
913 | /* instrumenting (instruction level) */
914 | VOID instrument_ilp_buffering_common(INS ins, ins_buffer_entry* e){
915 |
916 | UINT32 i, maxNumRegsProd, maxNumRegsCons, regReadCnt, regWriteCnt;
917 | REG reg;
918 |
919 | // buffer register reads per static instruction
920 | if(!e->setRead){
921 |
922 |
923 | // register reads and memory reads determine the issue time
924 | maxNumRegsCons = INS_MaxNumRRegs(ins);
925 |
926 | regReadCnt = 0;
927 | for(i=0; i < maxNumRegsCons; i++){
928 | reg = INS_RegR(ins, i);
929 | //assert((UINT32)reg < MAX_NUM_REGS);
930 | // only consider valid general-purpose registers (any bit-width) and floating-point registers,
931 | // i.e. exlude branch, segment and pin registers, among others
932 | if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){
933 | regReadCnt++;
934 | }
935 | }
936 |
937 | e->regReadCnt = regReadCnt;
938 | e->regsRead = (REG*)checked_malloc(regReadCnt*sizeof(REG));
939 |
940 | regReadCnt = 0;
941 | for(i=0; i < maxNumRegsCons; i++){
942 |
943 | reg = INS_RegR(ins, i);
944 |
945 | //assert((UINT32)reg < MAX_NUM_REGS);
946 | // only consider valid general-purpose registers (any bit-width) and floating-point registers,
947 | // i.e. exlude branch, segment and pin registers, among others
948 | if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){
949 | e->regsRead[regReadCnt++] = reg;
950 | }
951 | }
952 |
953 | e->setRead = true;
954 |
955 | }
956 |
957 | // buffer register writes per static instruction
958 | if(!e->setWritten){
959 | maxNumRegsProd = INS_MaxNumWRegs(ins);
960 |
961 | regWriteCnt = 0;
962 | for(i=0; i < maxNumRegsProd; i++){
963 |
964 | reg = INS_RegW(ins, i);
965 |
966 | //assert((UINT32)reg < MAX_NUM_REGS);
967 | // only consider valid general-purpose registers (any bit-width) and floating-point registers,
968 | // i.e. exlude branch, segment and pin registers, among others */
969 | if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){
970 | regWriteCnt++;
971 | }
972 | }
973 |
974 | e->regWriteCnt = regWriteCnt;
975 | e->regsWritten = (REG*)checked_malloc(regWriteCnt*sizeof(REG));
976 |
977 | regWriteCnt = 0;
978 | for(i=0; i < maxNumRegsProd; i++){
979 |
980 | reg = INS_RegW(ins, i);
981 |
982 | //assert((UINT32)reg < MAX_NUM_REGS);
983 | // only consider valid general-purpose registers (any bit-width) and floating-point registers,
984 | // i.e. exlude branch, segment and pin registers, among others
985 | if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){
986 | e->regsWritten[regWriteCnt++] = reg;
987 | }
988 | }
989 |
990 | e->setWritten = true;
991 | }
992 |
993 | // buffer memory operations (and instruction register buffer) with one single InsertCall
994 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)ilp_buffer_instruction_only, IARG_PTR, (void*)e, IARG_END);
995 |
996 | if(INS_IsMemoryRead(ins)){
997 |
998 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)ilp_buffer_instruction_read, IARG_MEMORYREAD_EA, IARG_MEMORYREAD_SIZE, IARG_END);
999 |
1000 | if(INS_HasMemoryRead2(ins)){
1001 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)ilp_buffer_instruction_read2, IARG_MEMORYREAD2_EA, IARG_END);
1002 | }
1003 | }
1004 |
1005 | if(INS_IsMemoryWrite(ins)){
1006 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)ilp_buffer_instruction_write, IARG_MEMORYWRITE_EA, IARG_MEMORYWRITE_SIZE, IARG_END);
1007 | }
1008 |
1009 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)ilp_buffer_instruction_next, IARG_END);
1010 |
1011 | }
1012 |
1013 | VOID instrument_ilp_one(INS ins, ins_buffer_entry* e){
1014 |
1015 | instrument_ilp_buffering_common(ins, e);
1016 | // only called if buffer is full
1017 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)empty_buffer_one, IARG_END);
1018 | }
1019 |
1020 | VOID instrument_ilp_all(INS ins, ins_buffer_entry* e){
1021 |
1022 | instrument_ilp_buffering_common(ins, e);
1023 | // only called if buffer is full
1024 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)empty_ilp_buffer_all, IARG_END);
1025 | }
1026 |
1027 | VOID fini_ilp_buffering_all(){
1028 |
1029 | if(ilp_buffer_index != 0)
1030 | empty_ilp_buffer_all();
1031 | }
1032 |
1033 | VOID fini_ilp_buffering_one(){
1034 |
1035 | if(ilp_buffer_index != 0)
1036 | empty_buffer_one();
1037 | }
1038 |
1039 |
--------------------------------------------------------------------------------
/mica_ilp.h:
--------------------------------------------------------------------------------
1 | /*
2 | * This file is part of MICA, a Pin tool to collect
3 | * microarchitecture-independent program characteristics using the Pin
4 | * instrumentation framework.
5 | *
6 | * Please see the README.txt file distributed with the MICA release for more
7 | * information.
8 | */
9 |
10 | #include "mica.h"
11 | #include "mica_utils.h"
12 |
13 | void init_ilp_all();
14 | void init_ilp_one();
15 |
16 | VOID instrument_ilp_all(INS ins, ins_buffer_entry* e);
17 | VOID instrument_ilp_one(INS ins, ins_buffer_entry* e);
18 |
19 | VOID fini_ilp_all(INT32 code, VOID* v);
20 | VOID fini_ilp_one(INT32 code, VOID* v);
21 |
22 | /* support for fast instrumentation of all characteristics in a single run (avoid multiple InsertCalls!) */
23 | //void ilp_buffer_instruction_only(void* _e);
24 | VOID PIN_FAST_ANALYSIS_CALL ilp_buffer_instruction_only(void* _e);
25 | //void ilp_buffer_instruction_read(ADDRINT read1_addr, ADDRINT read_size);
26 | VOID PIN_FAST_ANALYSIS_CALL ilp_buffer_instruction_read(ADDRINT read1_addr, ADDRINT read_size);
27 | //void ilp_buffer_instruction_read2(ADDRINT read2_addr);
28 | VOID PIN_FAST_ANALYSIS_CALL ilp_buffer_instruction_read2(ADDRINT read2_addr);
29 | //void ilp_buffer_instruction_write(ADDRINT write_addr, ADDRINT write_size);
30 | VOID PIN_FAST_ANALYSIS_CALL ilp_buffer_instruction_write(ADDRINT write_addr, ADDRINT write_size);
31 | ADDRINT ilp_buffer_instruction_next();
32 | /*ADDRINT ilp_buffer_instruction_2reads_write(void* _e, ADDRINT read1_addr, ADDRINT read2_addr, ADDRINT read_size, ADDRINT write_addr, ADDRINT write_size);
33 | ADDRINT ilp_buffer_instruction_read_write(void* _e, ADDRINT read1_addr, ADDRINT read_size, ADDRINT write_addr, ADDRINT write_size);
34 | ADDRINT ilp_buffer_instruction_2reads(void* _e, ADDRINT read1_addr, ADDRINT read2_addr, ADDRINT read_size);
35 | ADDRINT ilp_buffer_instruction_read(void* _e, ADDRINT read1_addr, ADDRINT read_size);
36 | ADDRINT ilp_buffer_instruction_write(void* _e, ADDRINT write_addr, ADDRINT write_size);
37 | ADDRINT ilp_buffer_instruction(void* _e);*/
38 | VOID empty_ilp_buffer_all();
39 |
--------------------------------------------------------------------------------
/mica_init.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * This file is part of MICA, a Pin tool to collect
3 | * microarchitecture-independent program characteristics using the Pin
4 | * instrumentation framework.
5 | *
6 | * Please see the README.txt file distributed with the MICA release for more
7 | * information.
8 | */
9 |
10 | //#include
11 | //#include
12 | //#include
13 |
14 | /* MICA includes */
15 | #include "mica_init.h"
16 |
17 | /*
18 | * Setup MICA log file.
19 | */
20 | void setup_mica_log(ofstream *log){
21 |
22 | (*log).open("mica.log", ios::out|ios::trunc);
23 | if(!(*log).is_open()){
24 | ERROR_MSG("Could not create log file, aborting.");
25 | exit(1);
26 | }
27 | }
28 |
29 | /*
30 | * Read mica.conf config file for MICA.
31 | *
32 | * analysis_type: 'all' | 'ilp' | 'ilp_one' | 'itypes' | 'ppm' | 'reg' | 'stride' | 'memfootprint' | 'memstackdist' | 'custom'
33 | * interval_size: 'full' |
34 | * ilp_size:
35 | * itypes_spec_file:
36 | */
37 | enum CONFIG_PARAM {UNKNOWN_CONFIG_PARAM = -1, ANALYSIS_TYPE = 0, INTERVAL_SIZE, ILP_SIZE, _BLOCK_SIZE, _PAGE_SIZE, ITYPES_SPEC_FILE, APPEND_PID, CONF_PAR_CNT};
38 | const char* config_params_str[CONF_PAR_CNT] = {"analysis_type", "interval_size", "ilp_size", "block_size", "page_size", "itypes_spec_file"};
39 | enum ANALYSIS_TYPE {UNKNOWN_ANALYSIS_TYPE = -1, ALL=0, ILP, ILP_ONE, ITYPES, PPM, MICA_REG, STRIDE, MEMFOOTPRINT, MEMSTACKDIST, CUSTOM, ANA_TYPE_CNT};
40 | const char* analysis_types_str[ANA_TYPE_CNT] = { "all", "ilp", "ilp_one", "itypes", "ppm", "reg", "stride", "memfootprint", "memstackdist", "custom"};
41 |
42 | enum CONFIG_PARAM findConfigParam(char* s){
43 |
44 | if(strcmp(s, "analysis_type") == 0){ return ANALYSIS_TYPE; }
45 | if(strcmp(s, "interval_size") == 0){ return INTERVAL_SIZE; }
46 | if(strcmp(s, "ilp_size") == 0){ return ILP_SIZE; }
47 | if(strcmp(s, "block_size") == 0){ return _BLOCK_SIZE; }
48 | if(strcmp(s, "page_size") == 0){ return _PAGE_SIZE; }
49 | if(strcmp(s, "itypes_spec_file") == 0){ return ITYPES_SPEC_FILE; }
50 | if(strcmp(s, "append_pid") == 0){ return APPEND_PID; }
51 |
52 | return UNKNOWN_CONFIG_PARAM;
53 | }
54 |
55 | enum ANALYSIS_TYPE findAnalysisType(char* s){
56 |
57 | if(strcmp(s, "all") == 0){ return ALL; }
58 | if(strcmp(s, "ilp") == 0){ return ILP; }
59 | if(strcmp(s, "ilp_one") == 0){ return ILP_ONE; }
60 | if(strcmp(s, "itypes") == 0){ return ITYPES; }
61 | if(strcmp(s, "ppm") == 0){ return PPM; }
62 | if(strcmp(s, "reg") == 0){ return MICA_REG; }
63 | if(strcmp(s, "stride") == 0){ return STRIDE; }
64 | if(strcmp(s, "memfootprint") == 0){ return MEMFOOTPRINT; }
65 | if(strcmp(s, "memstackdist") == 0){ return MEMSTACKDIST; }
66 | if(strcmp(s, "custom") == 0){ return CUSTOM; }
67 |
68 | return UNKNOWN_ANALYSIS_TYPE;
69 | }
70 |
71 | void read_config(ofstream* log, INT64* intervalSize, MODE* mode, UINT32* _ilp_win_size, UINT32* _block_size, UINT32* _page_size, char** _itypes_spec_file, int* append_pid){
72 |
73 | int i;
74 | char* param;
75 | char* val;
76 | FILE* config_file = fopen("mica.conf","r");
77 |
78 | /* a config file named 'mica.conf' is required */
79 | if(config_file == (FILE*)NULL){
80 | cerr << "ERROR: No config file 'mica.conf' found, please create one!" << endl;
81 | (*log) << "ERROR: No config file 'mica.conf' found, please create one!" << endl;
82 | exit(1);
83 | }
84 |
85 | (*log) << "Reading config file ..." << endl;
86 |
87 | param = (char*)checked_malloc(1000*sizeof(char));
88 | val = (char*)checked_malloc(1000*sizeof(char));
89 |
90 | // default values
91 | *mode = UNKNOWN_MODE;
92 | *_ilp_win_size = 0;
93 | *_block_size = 6; // default block size = 64 bytes (2^6)
94 | *_page_size = 12; // default page size = 4KB (2^12)
95 |
96 | while(!feof(config_file)){
97 |
98 | if (fscanf(config_file, "%[^:]: %s\n", param, val) != 2)
99 | {
100 | cerr << "ERROR: invalid config entry found" << endl;
101 | (*log) << "ERROR: invalid config entry found" << endl;
102 | exit(1);
103 | }
104 |
105 | switch(findConfigParam(param)){
106 |
107 | case ANALYSIS_TYPE:
108 | // figure out mode we are running in
109 | cerr << "Analysis type: " << val << endl;
110 |
111 | switch(findAnalysisType(val)){
112 |
113 | case ALL:
114 | *mode = MODE_ALL;
115 | cerr << "Measuring ALL characteristics..." << endl;
116 | (*log) << "Measuring ALL characteristics..." << endl;
117 | break;
118 |
119 | case ILP:
120 | *mode = MODE_ILP;
121 | cerr << "Measuring ILP characteristics..." << endl;
122 | (*log) << "Measuring ILP characteristics..." << endl;
123 | break;
124 |
125 | case ILP_ONE:
126 | *mode = MODE_ILP_ONE;
127 | cerr << "Measuring ILP characteristics for a given window size..." << endl;
128 | (*log) << "Measuring ILP characteristics for a given window size..." << endl;
129 | break;
130 |
131 | case ITYPES:
132 | *mode = MODE_ITYPES;
133 | cerr << "Measuring ITYPES characteristics..." << endl;
134 | (*log) << "Measuring ITYPES characteristics..." << endl;
135 | break;
136 |
137 | case PPM:
138 | *mode = MODE_PPM;
139 | cerr << "Measuring PPM characteristics..." << endl;
140 | (*log) << "Measuring PPM characteristics..." << endl;
141 | break;
142 |
143 | case MICA_REG:
144 | *mode = MODE_REG;
145 | cerr << "Measuring REG characteristics..." << endl;
146 | (*log) << "Measuring REG characteristics..." << endl;
147 | break;
148 |
149 | case STRIDE:
150 | *mode = MODE_STRIDE;
151 | cerr << "Measuring STRIDE characteristics..." << endl;
152 | (*log) << "Measuring STRIDE characteristics..." << endl;
153 | break;
154 |
155 | case MEMFOOTPRINT:
156 | *mode = MODE_MEMFOOTPRINT;
157 | cerr << "Measuring MEMFOOTPRINT characteristics..." << endl;
158 | (*log) << "Measuring MEMFOOTPRINT characteristics..." << endl;
159 | break;
160 |
161 | case MEMSTACKDIST:
162 | *mode = MODE_MEMSTACKDIST;
163 | cerr << "Measuring MEMSTACKDIST characteristics..." << endl;
164 | (*log) << "Measuring MEMSTACKDIST characteristics..." << endl;
165 | break;
166 |
167 | case CUSTOM:
168 | *mode = MODE_CUSTOM;
169 | (*log) << "Measuring CUSTOM characteristics..." << endl;
170 | break;
171 |
172 | default:
173 | (*log) << endl << "ERROR: Unknown analysis type chosen!" << endl;
174 | cerr << "Known analysis types:" << endl;
175 | for(i=0; i < ANA_TYPE_CNT; i++){
176 | cerr << "\t" << analysis_types_str[i] << endl;
177 | }
178 | break;
179 | }
180 | break;
181 |
182 | case INTERVAL_SIZE:
183 | cerr << "interval size: " << val << endl;
184 | (*log) << "interval size: " << val << endl;
185 |
186 | if(strcmp(val, "full") == 0){
187 | *intervalSize = -1;
188 | cerr << "Returning data for full execution..." << endl;
189 | (*log) << "Returning data for full execution..." << endl;
190 | }
191 | else{
192 | *intervalSize = (INT64) atoll(val);
193 | cerr << "Returning data for each interval of " << *intervalSize << " instructions..." << endl;
194 | (*log) << "Returning data for each interval of " << *intervalSize << " instructions..." << endl;
195 | }
196 | break;
197 |
198 | case ILP_SIZE:
199 |
200 | *_ilp_win_size = (UINT32)atoi(val);
201 | cerr << "ILP window size: " << *_ilp_win_size << endl;
202 | (*log) << "ILP window size: " << *_ilp_win_size << endl;
203 | break;
204 |
205 | case _BLOCK_SIZE:
206 | *_block_size = (UINT32)atoi(val);
207 | cerr << "block size: 2^" << *_block_size << endl;
208 | (*log) << "block size: 2^" << *_block_size << endl;
209 | break;
210 |
211 | case _PAGE_SIZE:
212 | *_page_size = (UINT32)atoi(val);
213 | cerr << "page size: 2^" << *_page_size << endl;
214 | (*log) << "page size: 2^" << *_page_size << endl;
215 | break;
216 |
217 | case ITYPES_SPEC_FILE:
218 | *_itypes_spec_file = (char*)checked_malloc((strlen(val)+1)*sizeof(char));
219 | strcpy(*_itypes_spec_file, val);
220 | cerr << "ITYPES spec file: " << *_itypes_spec_file << endl;
221 | (*log) << "ITYPES spec file: " << *_itypes_spec_file << endl;
222 | break;
223 |
224 | case APPEND_PID:
225 | if (strcmp(val, "yes")==0)
226 | {
227 | *append_pid = 1;
228 | cerr << "append pid: yes" << endl;
229 | (*log) << "append pid: yes" << endl;
230 | }
231 | else if (strcmp(val, "no")==0)
232 | {
233 | *append_pid = 0;
234 | cerr << "append pid: no" << endl;
235 | (*log) << "append pid: no" << endl;
236 | }
237 | else
238 | {
239 | cerr << "ERROR! append_pid can be either yes or no" << endl;
240 | (*log) << "ERROR! append_pid can be either yes or no" << endl;
241 | exit(1);
242 | }
243 | break;
244 | default:
245 | cerr << "ERROR: Unknown config parameter specified: " << param << " (" << val << ")" << endl;
246 | cerr << "Known config parameters:" << endl;
247 | (*log) << "ERROR: Unknown config parameter specified: " << param << " (" << val << ")" << endl;
248 | (*log) << "Known config parameters:" << endl;
249 | for(i=0; i < CONF_PAR_CNT; i++){
250 | cerr << "\t" << config_params_str[i] << endl;
251 | (*log) << "\t" << config_params_str[i] << endl;
252 | }
253 | exit(1);
254 | break;
255 | }
256 | }
257 | cerr << "All done reading config" << endl;
258 | (*log) << "All done reading config" << endl;
259 |
260 | if(*mode == UNKNOWN_MODE){
261 | cerr << "ERROR! No mode specified, the mica.conf file should specify the \"analysis_type\" config parameter." << endl;
262 | (*log) << "ERROR! No mode specified, the mica.conf file should specify the \"analysis_type\" config parameter." << endl;
263 | exit(1);
264 | }
265 |
266 | if(*mode == MODE_ILP_ONE && *_ilp_win_size == 0){
267 | cerr << "ERROR! \"ilp_one\" mode was specified, but no window size (ilp_size) was found along with it!" << endl;
268 | (*log) << "ERROR! ERROR! \"ilp_one\" mode was specified, but no window size (ilp_size) was found along with it!" << endl;
269 | exit(1);
270 | }
271 |
272 | (*log).close();
273 |
274 | free(param);
275 | free(val);
276 |
277 | }
278 |
--------------------------------------------------------------------------------
/mica_init.h:
--------------------------------------------------------------------------------
1 | /*
2 | * This file is part of MICA, a Pin tool to collect
3 | * microarchitecture-independent program characteristics using the Pin
4 | * instrumentation framework.
5 | *
6 | * Please see the README.txt file distributed with the MICA release for more
7 | * information.
8 | */
9 |
10 | #include "mica.h"
11 | #include "mica_ilp.h"
12 | #include "mica_itypes.h"
13 | #include "mica_ppm.h"
14 | #include "mica_reg.h"
15 | #include "mica_stride.h"
16 | #include "mica_memfootprint.h"
17 | #include "mica_memstackdist.h"
18 |
19 | enum MODE { UNKNOWN_MODE, MODE_ALL, MODE_ILP, MODE_ILP_ONE, MODE_ITYPES, MODE_PPM, MODE_REG, MODE_STRIDE, MODE_MEMFOOTPRINT, MODE_MEMSTACKDIST, MODE_CUSTOM };
20 |
21 | void setup_mica_log(ofstream *log);
22 |
23 | void read_config(ofstream *log, INT64* interval_size, MODE* mode, UINT32* _ilp_win_size, UINT32* _block_size, UINT32* _page_size, char** _itypes_spec_file, int* append_pid);
24 |
--------------------------------------------------------------------------------
/mica_itypes.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * This file is part of MICA, a Pin tool to collect
3 | * microarchitecture-independent program characteristics using the Pin
4 | * instrumentation framework.
5 | *
6 | * Please see the README.txt file distributed with the MICA release for more
7 | * information.
8 | */
9 |
10 | #include "pin.H"
11 |
12 | /* MICA includes */
13 | #include "mica_utils.h"
14 | #include "mica_itypes.h"
15 |
16 | /* Global variables */
17 |
18 | extern INT64 interval_size;
19 | extern INT64 interval_ins_count;
20 | extern INT64 interval_ins_count_for_hpc_alignment;
21 | extern INT64 total_ins_count;
22 | extern INT64 total_ins_count_for_hpc_alignment;
23 | extern char* _itypes_spec_file;
24 |
25 | ofstream output_file_itypes;
26 |
27 | identifier** group_identifiers;
28 | INT64* group_ids_cnt;
29 | INT64* group_counts;
30 | INT64 number_of_groups;
31 |
32 | INT64 other_ids_cnt;
33 | INT64 other_ids_max_cnt;
34 | identifier* other_group_identifiers;
35 |
36 | /* counter functions */
37 | ADDRINT itypes_instr_intervals(){
38 | return (ADDRINT)(interval_ins_count_for_hpc_alignment == interval_size);
39 | };
40 |
41 | VOID itypes_instr_interval_output(){
42 | int i;
43 | output_file_itypes.open(mkfilename("itypes_phases_int"), ios::out|ios::app);
44 | output_file_itypes << interval_size;
45 | for(i=0; i < number_of_groups+1; i++){
46 | output_file_itypes << " " << group_counts[i];
47 | }
48 | output_file_itypes << endl;
49 | output_file_itypes.close();
50 | }
51 |
52 | VOID itypes_instr_interval_reset(){
53 | int i;
54 | for(i=0; i < number_of_groups+1; i++){
55 | group_counts[i] = 0;
56 | }
57 | }
58 |
59 | VOID itypes_instr_interval(){
60 |
61 | itypes_instr_interval_output();
62 | itypes_instr_interval_reset();
63 | interval_ins_count = 0;
64 | interval_ins_count_for_hpc_alignment = 0;
65 | }
66 |
67 | VOID itypes_count(UINT32 gid){
68 | group_counts[gid]++;
69 | };
70 |
71 | // initialize default groups
72 | VOID init_itypes_default_groups(){
73 |
74 | number_of_groups = 12;
75 |
76 | group_identifiers = (identifier**)checked_malloc((number_of_groups+1)*sizeof(identifier*));
77 | group_ids_cnt = (INT64*)checked_malloc((number_of_groups+1)*sizeof(INT64));
78 | group_counts = (INT64*)checked_malloc((number_of_groups+1)*sizeof(INT64));
79 | for(int i=0; i < number_of_groups+1; i++){
80 | group_counts[i] = 0;
81 | }
82 |
83 | // memory reads
84 | group_ids_cnt[0] = 1;
85 | group_identifiers[0] = (identifier*)checked_malloc(group_ids_cnt[0]*sizeof(identifier));
86 | group_identifiers[0][0].type = identifier_type::ID_TYPE_SPECIAL;
87 | group_identifiers[0][0].str = checked_strdup("mem_read");
88 |
89 | // memory writes
90 | group_ids_cnt[1] = 1;
91 | group_identifiers[1] = (identifier*)checked_malloc(group_ids_cnt[1]*sizeof(identifier));
92 | group_identifiers[1][0].type = identifier_type::ID_TYPE_SPECIAL;
93 | group_identifiers[1][0].str = checked_strdup("mem_write");
94 |
95 | // control flow instructions
96 | group_ids_cnt[2] = 5;
97 | group_identifiers[2] = (identifier*)checked_malloc(group_ids_cnt[2]*sizeof(identifier));
98 | group_identifiers[2][0].type = identifier_type::ID_TYPE_CATEGORY;
99 | group_identifiers[2][0].str = checked_strdup("COND_BR");
100 | group_identifiers[2][1].type = identifier_type::ID_TYPE_CATEGORY;
101 | group_identifiers[2][1].str = checked_strdup("UNCOND_BR");
102 | group_identifiers[2][2].type = identifier_type::ID_TYPE_OPCODE;
103 | group_identifiers[2][2].str = checked_strdup("LEAVE");
104 | group_identifiers[2][3].type = identifier_type::ID_TYPE_OPCODE;
105 | group_identifiers[2][3].str = checked_strdup("RET_NEAR");
106 | group_identifiers[2][4].type = identifier_type::ID_TYPE_OPCODE;
107 | group_identifiers[2][4].str = checked_strdup("CALL_NEAR");
108 |
109 | // arithmetic instructions (integer)
110 | group_ids_cnt[3] = 5;
111 | group_identifiers[3] = (identifier*)checked_malloc(group_ids_cnt[3]*sizeof(identifier));
112 | group_identifiers[3][0].type = identifier_type::ID_TYPE_CATEGORY;
113 | group_identifiers[3][0].str = checked_strdup("LOGICAL");
114 | group_identifiers[3][1].type = identifier_type::ID_TYPE_CATEGORY;
115 | group_identifiers[3][1].str = checked_strdup("DATAXFER");
116 | group_identifiers[3][2].type = identifier_type::ID_TYPE_CATEGORY;
117 | group_identifiers[3][2].str = checked_strdup("BINARY");
118 | group_identifiers[3][3].type = identifier_type::ID_TYPE_CATEGORY;
119 | group_identifiers[3][3].str = checked_strdup("FLAGOP");
120 | group_identifiers[3][4].type = identifier_type::ID_TYPE_CATEGORY;
121 | group_identifiers[3][4].str = checked_strdup("BITBYTE");
122 |
123 | // floating point instructions
124 | group_ids_cnt[4] = 2;
125 | group_identifiers[4] = (identifier*)checked_malloc(group_ids_cnt[4]*sizeof(identifier));
126 | group_identifiers[4][0].type = identifier_type::ID_TYPE_CATEGORY;
127 | group_identifiers[4][0].str = checked_strdup("X87_ALU");
128 | group_identifiers[4][1].type = identifier_type::ID_TYPE_CATEGORY;
129 | group_identifiers[4][1].str = checked_strdup("FCMOV");
130 |
131 | // pop/push instructions (stack usage)
132 | group_ids_cnt[5] = 2;
133 | group_identifiers[5] = (identifier*)checked_malloc(group_ids_cnt[5]*sizeof(identifier));
134 | group_identifiers[5][0].type = identifier_type::ID_TYPE_CATEGORY;
135 | group_identifiers[5][0].str = checked_strdup("POP");
136 | group_identifiers[5][1].type = identifier_type::ID_TYPE_CATEGORY;
137 | group_identifiers[5][1].str = checked_strdup("PUSH");
138 |
139 | // [!] shift instructions (bitwise)
140 | group_ids_cnt[6] = 1;
141 | group_identifiers[6] = (identifier*)checked_malloc(group_ids_cnt[6]*sizeof(identifier));
142 | group_identifiers[6][0].type = identifier_type::ID_TYPE_CATEGORY;
143 | group_identifiers[6][0].str = checked_strdup("SHIFT");
144 |
145 | // [!] string instructions
146 | group_ids_cnt[7] = 1;
147 | group_identifiers[7] = (identifier*)checked_malloc(group_ids_cnt[7]*sizeof(identifier));
148 | group_identifiers[7][0].type = identifier_type::ID_TYPE_CATEGORY;
149 | group_identifiers[7][0].str = checked_strdup("STRINGOP");
150 |
151 | // [!] MMX/SSE instructions
152 | group_ids_cnt[8] = 2;
153 | group_identifiers[8] = (identifier*)checked_malloc(group_ids_cnt[8]*sizeof(identifier));
154 | group_identifiers[8][0].type = identifier_type::ID_TYPE_CATEGORY;
155 | group_identifiers[8][0].str = checked_strdup("MMX");
156 | group_identifiers[8][1].type = identifier_type::ID_TYPE_CATEGORY;
157 | group_identifiers[8][1].str = checked_strdup("SSE");
158 |
159 | // other (interrupts, rotate instructions, semaphore, conditional move, system)
160 | group_ids_cnt[9] = 8;
161 | group_identifiers[9] = (identifier*)checked_malloc(group_ids_cnt[9]*sizeof(identifier));
162 | group_identifiers[9][0].type = identifier_type::ID_TYPE_CATEGORY;
163 | group_identifiers[9][0].str = checked_strdup("INTERRUPT");
164 | group_identifiers[9][1].type = identifier_type::ID_TYPE_CATEGORY;
165 | group_identifiers[9][1].str = checked_strdup("ROTATE");
166 | group_identifiers[9][2].type = identifier_type::ID_TYPE_CATEGORY;
167 | group_identifiers[9][2].str = checked_strdup("SEMAPHORE");
168 | group_identifiers[9][3].type = identifier_type::ID_TYPE_CATEGORY;
169 | group_identifiers[9][3].str = checked_strdup("CMOV");
170 | group_identifiers[9][4].type = identifier_type::ID_TYPE_CATEGORY;
171 | group_identifiers[9][4].str = checked_strdup("SYSTEM");
172 | group_identifiers[9][5].type = identifier_type::ID_TYPE_CATEGORY;
173 | group_identifiers[9][5].str = checked_strdup("MISC");
174 | group_identifiers[9][6].type = identifier_type::ID_TYPE_CATEGORY;
175 | group_identifiers[9][6].str = checked_strdup("PREFETCH");
176 | group_identifiers[9][7].type = identifier_type::ID_TYPE_CATEGORY;
177 | group_identifiers[9][7].str = checked_strdup("SYSCALL");
178 |
179 | // [!] NOP instructions
180 | group_ids_cnt[10] = 2;
181 | group_identifiers[10] = (identifier*)checked_malloc(group_ids_cnt[10]*sizeof(identifier));
182 | group_identifiers[10][0].type = identifier_type::ID_TYPE_CATEGORY;
183 | group_identifiers[10][0].str = checked_strdup("WIDENOP");
184 | group_identifiers[10][1].type = identifier_type::ID_TYPE_CATEGORY;
185 | group_identifiers[10][1].str = checked_strdup("NOP");
186 |
187 | // register transfer instructions (move from a register to another register)
188 | group_ids_cnt[11] = 1;
189 | group_identifiers[11] = (identifier*)checked_malloc(group_ids_cnt[11]*sizeof(identifier));
190 | group_identifiers[11][0].type = identifier_type::ID_TYPE_SPECIAL;
191 | group_identifiers[11][0].str = checked_strdup("reg_transfer");
192 | }
193 |
194 | /* initializing */
195 | VOID init_itypes(){
196 |
197 | int i, j;
198 | int gid, sgid;
199 | char type[100];
200 | char str[100];
201 | string line;
202 |
203 | /* try and open instruction groups specification file */
204 | if(_itypes_spec_file != NULL){
205 | ifstream f(_itypes_spec_file);
206 | if(f){
207 | // count number of groups
208 | number_of_groups = 0;
209 | while( getline(f,line)){
210 | sscanf(line.c_str(), "%d, %d, %[^,], %[^\n]\n", &gid, &sgid, type, str);
211 | if(gid > number_of_groups)
212 | number_of_groups++;
213 | }
214 | f.close();
215 | number_of_groups++;
216 | cerr << "==> found " << number_of_groups << " groups" << endl;
217 |
218 | group_identifiers = (identifier**)checked_malloc((number_of_groups+1)*sizeof(identifier*));
219 | group_ids_cnt = (INT64*)checked_malloc((number_of_groups+1)*sizeof(INT64));
220 | group_counts = (INT64*)checked_malloc((number_of_groups+1)*sizeof(INT64));
221 | for(i=0; i < number_of_groups+1; i++){
222 | group_counts[i] = 0;
223 | }
224 |
225 | // count number of subgroups per group
226 | f.open(_itypes_spec_file);
227 | i=0;
228 | while( getline(f,line)){
229 | sscanf(line.c_str(), "%d, %d, %[^,], %[^\n]\n", &gid, &sgid, type, str);
230 | if(gid == i){
231 | group_ids_cnt[i]++;
232 | }
233 | else{
234 | group_identifiers[i] = (identifier*)checked_malloc(group_ids_cnt[i]*sizeof(identifier));
235 | i++;
236 | group_ids_cnt[i]++;
237 | }
238 | }
239 | group_identifiers[i] = (identifier*)checked_malloc(group_ids_cnt[i]*sizeof(identifier));
240 | f.close();
241 |
242 | // save subgroup types and identifiers
243 | f.open(_itypes_spec_file);
244 | i=0;
245 | while( getline(f,line)){
246 | sscanf(line.c_str(), "%d, %d, %[^,], %[^\n]\n", &gid, &sgid, type, str);
247 | if(strcmp(type, "CATEGORY") == 0){
248 | group_identifiers[gid][sgid].type = identifier_type::ID_TYPE_CATEGORY;
249 | }
250 | else{
251 | if(strcmp(type, "OPCODE") == 0){
252 | group_identifiers[gid][sgid].type = identifier_type::ID_TYPE_OPCODE;
253 | }
254 | else{
255 | if(strcmp(type, "SPECIAL") == 0){
256 | group_identifiers[gid][sgid].type = identifier_type::ID_TYPE_SPECIAL;
257 | }
258 | else{
259 | cerr << "ERROR! Unknown subgroup type found (\"" << type << "\")." << endl;
260 | cerr << " Known subgroup types: {CATEGORY, OPCODE, SPECIAL}." << endl;
261 | exit(-1);
262 | }
263 | }
264 | }
265 | group_identifiers[gid][sgid].str = checked_strdup(str);
266 | }
267 | f.close();
268 |
269 | // print out groups read
270 | for(i=0; i < number_of_groups; i++){
271 | cerr << " group " << i << " (#: " << group_ids_cnt[i] << "): ";
272 | for(j=0; j < group_ids_cnt[i]; j++){
273 | cerr << group_identifiers[i][j].str << " ";
274 | switch(group_identifiers[i][j].type){
275 | case identifier_type::ID_TYPE_CATEGORY:
276 | cerr << "[CAT]; ";
277 | break;
278 | case identifier_type::ID_TYPE_OPCODE:
279 | cerr << "[OPCODE]; ";
280 | break;
281 | case identifier_type::ID_TYPE_SPECIAL:
282 | cerr << "[SPECIAL]; ";
283 | break;
284 | default:
285 | cerr << "ERROR! Unknown subgroup type found for [" << i << "][" << j << "] (\"" << group_identifiers[i][j].type << "\")." << endl;
286 | cerr << " Known subgroup types: {CATEGORY, OPCODE, SPECIAL}." << endl;
287 | exit(-1);
288 | break;
289 | }
290 | }
291 | cerr << endl;
292 | }
293 | }
294 | else{
295 | cerr << "ERROR! Failed to open file \"" << _itypes_spec_file << "\" containing instruction groups specification." << endl;
296 | exit(-1);
297 | }
298 | }
299 | else{
300 | // if no specification file was found, just use defaults (compatible with MICA v0.23 and older)
301 | init_itypes_default_groups();
302 | }
303 |
304 | // allocate space for identifiers of 'other' group
305 | other_ids_cnt = 0;
306 | other_ids_max_cnt = 2;
307 | other_group_identifiers = (identifier*)checked_malloc(other_ids_max_cnt*sizeof(identifier));
308 |
309 | // (initializing total instruction counts is done in mica.cpp)
310 |
311 | if(interval_size != -1){
312 | output_file_itypes.open(mkfilename("itypes_phases_int"), ios::out|ios::trunc);
313 | output_file_itypes.close();
314 | }
315 | }
316 |
317 | /* instrumenting (instruction level) */
318 | VOID instrument_itypes(INS ins, VOID* v){
319 |
320 | int i,j;
321 | char cat[50];
322 | char opcode[50];
323 | strcpy(cat,CATEGORY_StringShort(INS_Category(ins)).c_str());
324 | strcpy(opcode,INS_Mnemonic(ins).c_str());
325 | BOOL categorized = false;
326 |
327 | // go over all groups, increase group count if instruction matches that group
328 | // group counts are increased at most once per instruction executed,
329 | // even if the instruction matches multiple identifiers in that group
330 | for(i=0; i < number_of_groups; i++){
331 | for(j=0; j < group_ids_cnt[i]; j++){
332 | if(group_identifiers[i][j].type == identifier_type::ID_TYPE_CATEGORY){
333 | if(strcmp(group_identifiers[i][j].str, cat) == 0){
334 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count, IARG_UINT32, i, IARG_END);
335 | categorized = true;
336 | break;
337 | }
338 | }
339 | else{
340 | if(group_identifiers[i][j].type == identifier_type::ID_TYPE_OPCODE){
341 | if(strcmp(group_identifiers[i][j].str, opcode) == 0){
342 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count, IARG_UINT32, i, IARG_END);
343 | categorized = true;
344 | break;
345 | }
346 | }
347 | else{
348 | if(group_identifiers[i][j].type == identifier_type::ID_TYPE_SPECIAL){
349 | if(strcmp(group_identifiers[i][j].str, "mem_read") == 0 && INS_IsMemoryRead(ins) ){
350 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count, IARG_UINT32, i, IARG_END);
351 | categorized = true;
352 | break;
353 | }
354 | else{
355 | if(strcmp(group_identifiers[i][j].str, "mem_write") == 0 && INS_IsMemoryWrite(ins) ){
356 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count, IARG_UINT32, i, IARG_END);
357 | categorized = true;
358 | break;
359 | }
360 | else if(strcmp(group_identifiers[i][j].str, "reg_transfer") == 0 && INS_IsMov(ins) ){
361 | UINT32 flag=0,n;
362 | n=INS_OperandCount(ins);
363 | for(UINT32 i=0;i= other_ids_max_cnt){
403 | other_ids_max_cnt *= 2;
404 | other_group_identifiers = (identifier*)checked_realloc(other_group_identifiers, other_ids_max_cnt*sizeof(identifier));
405 | }
406 | }
407 |
408 | /* inserting calls for counting instructions is done in mica.cpp */
409 | if(interval_size != -1){
410 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_instr_intervals,IARG_END);
411 | /* only called if interval is 'full' */
412 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_instr_interval,IARG_END);
413 | }
414 | }
415 |
416 | /* finishing... */
417 | VOID fini_itypes(INT32 code, VOID* v){
418 | int i;
419 |
420 | if(interval_size == -1){
421 | output_file_itypes.open(mkfilename("itypes_full_int"), ios::out|ios::trunc);
422 | output_file_itypes << total_ins_count_for_hpc_alignment << " " << total_ins_count;
423 | for(i=0; i < number_of_groups; i++){
424 | output_file_itypes << " " << group_counts[i];
425 | }
426 | output_file_itypes << endl;
427 | }
428 | else{
429 | output_file_itypes.open(mkfilename("itypes_phases_int"), ios::out|ios::app);
430 | output_file_itypes << interval_ins_count;
431 | for(i=0; i < number_of_groups+1; i++){
432 | output_file_itypes << " " << group_counts[i];
433 | }
434 | output_file_itypes << endl;
435 | }
436 | //output_file_itypes << "number of instructions: " << total_ins_count_for_hpc_alignment << endl;
437 | output_file_itypes << " ";
438 | output_file_itypes.close();
439 |
440 | // print instruction categories in 'other' group of instructions
441 | ofstream output_file_other_group_categories;
442 | output_file_other_group_categories.open("itypes_other_group_categories.txt", ios::out|ios::trunc);
443 | for(i=0; i < other_ids_cnt; i++){
444 | output_file_other_group_categories << other_group_identifiers[i].str << endl;
445 | }
446 | }
447 |
--------------------------------------------------------------------------------
/mica_itypes.h:
--------------------------------------------------------------------------------
1 | /*
2 | * This file is part of MICA, a Pin tool to collect
3 | * microarchitecture-independent program characteristics using the Pin
4 | * instrumentation framework.
5 | *
6 | * Please see the README.txt file distributed with the MICA release for more
7 | * information.
8 | */
9 |
10 | #include "mica.h"
11 |
12 | #ifndef MICA_ITYPES_H
13 | #define MICA_ITYPES_H
14 |
15 | typedef struct identifier_type{
16 | // type of identifier
17 | // SPECIAL includes stuff like memory reads/writes
18 | enum {ID_TYPE_CATEGORY = 1, ID_TYPE_OPCODE, ID_TYPE_SPECIAL} type;
19 | // string identifier for category/opcode
20 | char* str;
21 | } identifier;
22 |
23 | VOID init_itypes();
24 | VOID init_itypes_default_groups();
25 |
26 | VOID instrument_itypes(INS ins, VOID* v);
27 | VOID instrument_itypes_bbl(TRACE trace, VOID* v);
28 | VOID fini_itypes(INT32 code, VOID* v);
29 |
30 |
31 | VOID itypes_count(UINT32 gid);
32 |
33 | VOID itypes_instr_interval_output();
34 | VOID itypes_instr_interval_reset();
35 |
36 | #endif
37 |
--------------------------------------------------------------------------------
/mica_memfootprint.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * This file is part of MICA, a Pin tool to collect
3 | * microarchitecture-independent program characteristics using the Pin
4 | * instrumentation framework.
5 | *
6 | * Please see the README.txt file distributed with the MICA release for more
7 | * information.
8 | */
9 |
10 | #include "pin.H"
11 |
12 | /* MICA includes */
13 | #include "mica_utils.h"
14 | #include "mica_memfootprint.h"
15 |
16 | /* Global variables */
17 |
18 | extern INT64 interval_size;
19 | extern INT64 interval_ins_count;
20 | extern INT64 interval_ins_count_for_hpc_alignment;
21 | extern INT64 total_ins_count;
22 | extern INT64 total_ins_count_for_hpc_alignment;
23 |
24 | extern UINT32 _block_size;
25 | extern UINT32 _page_size;
26 |
27 | static UINT32 memfootprint_block_size;
28 | static UINT32 page_size;
29 |
30 | static ofstream output_file_memfootprint;
31 |
32 | static nlist* DmemCacheWorkingSetTable[MAX_MEM_TABLE_ENTRIES];
33 | static nlist* DmemPageWorkingSetTable[MAX_MEM_TABLE_ENTRIES];
34 | static nlist* ImemCacheWorkingSetTable[MAX_MEM_TABLE_ENTRIES];
35 | static nlist* ImemPageWorkingSetTable[MAX_MEM_TABLE_ENTRIES];
36 |
37 |
38 | static long long DmemCacheWSS() {
39 | long long DmemCacheWorkingSetSize = 0L;
40 | for (int i = 0; i < MAX_MEM_TABLE_ENTRIES; i++) {
41 | for (nlist *np = DmemCacheWorkingSetTable [i]; np != (nlist*) NULL; np = np->next) {
42 | for (ADDRINT j = 0; j < MAX_MEM_BLOCK; j++) {
43 | if ((np->mem)->numReferenced [j]) {
44 | DmemCacheWorkingSetSize++;
45 | }
46 | }
47 | }
48 | }
49 | return DmemCacheWorkingSetSize;
50 | }
51 |
52 | static long long ImemCacheWSS() {
53 | long long ImemCacheWorkingSetSize = 0L;
54 | for (int i = 0; i < MAX_MEM_TABLE_ENTRIES; i++) {
55 | for (nlist *np = ImemCacheWorkingSetTable [i]; np != (nlist*) NULL; np = np->next) {
56 | for (ADDRINT j = 0; j < MAX_MEM_BLOCK; j++) {
57 | if ((np->mem)->numReferenced [j]) {
58 | ImemCacheWorkingSetSize++;
59 | }
60 | }
61 | }
62 | }
63 | return ImemCacheWorkingSetSize;
64 | }
65 |
66 | static long long DmemPageWSS() {
67 | long long DmemPageWorkingSetSize = 0L;
68 | for (int i = 0; i < MAX_MEM_TABLE_ENTRIES; i++) {
69 | for (nlist *np = DmemPageWorkingSetTable [i]; np != (nlist*) NULL; np = np->next) {
70 | for (ADDRINT j = 0; j < MAX_MEM_BLOCK; j++) {
71 | if ((np->mem)->numReferenced [j]) {
72 | DmemPageWorkingSetSize++;
73 | }
74 | }
75 | }
76 | }
77 | return DmemPageWorkingSetSize;
78 | }
79 |
80 | static long long ImemPageWSS() {
81 | long long ImemPageWorkingSetSize = 0L;
82 | for (int i = 0; i < MAX_MEM_TABLE_ENTRIES; i++) {
83 | for (nlist *np = ImemPageWorkingSetTable [i]; np != (nlist*) NULL; np = np->next) {
84 | for (ADDRINT j = 0; j < MAX_MEM_BLOCK; j++) {
85 | if ((np->mem)->numReferenced [j]) {
86 | ImemPageWorkingSetSize++;
87 | }
88 | }
89 | }
90 | }
91 | return ImemPageWorkingSetSize;
92 | }
93 |
94 | /* initializing */
95 | void init_memfootprint(){
96 | int i;
97 |
98 | for (i = 0; i < MAX_MEM_TABLE_ENTRIES; i++) {
99 | DmemCacheWorkingSetTable[i] = (nlist*) NULL;
100 | DmemPageWorkingSetTable[i] = (nlist*) NULL;
101 | ImemCacheWorkingSetTable[i] = (nlist*) NULL;
102 | ImemPageWorkingSetTable[i] = (nlist*) NULL;
103 | }
104 |
105 | memfootprint_block_size = _block_size;
106 | page_size = _page_size;
107 |
108 | if(interval_size != -1){
109 | output_file_memfootprint.open(mkfilename("memfootprint_phases_int"), ios::out|ios::trunc);
110 | output_file_memfootprint.close();
111 | }
112 | }
113 |
114 | VOID memOp(ADDRINT effMemAddr, ADDRINT size){
115 | if(size > 0){
116 | ADDRINT a;
117 | ADDRINT addr, endAddr, upperAddr, indexInChunk;
118 | memNode* chunk;
119 |
120 | /* D-stream (64-byte) cache block memory footprint */
121 |
122 | addr = effMemAddr >> memfootprint_block_size;
123 | endAddr = (effMemAddr + size - 1) >> memfootprint_block_size;
124 |
125 | for(a = addr; a <= endAddr; a++){
126 |
127 | upperAddr = a >> LOG_MAX_MEM_BLOCK;
128 | indexInChunk = a ^ (upperAddr << LOG_MAX_MEM_BLOCK);
129 |
130 | chunk = lookup(DmemCacheWorkingSetTable, upperAddr);
131 | if(chunk == (memNode*)NULL)
132 | chunk = install(DmemCacheWorkingSetTable, upperAddr);
133 |
134 | //assert(indexInChunk >= 0 && indexInChunk < MAX_MEM_BLOCK);
135 | chunk->numReferenced[indexInChunk] = true;
136 |
137 | }
138 |
139 | /* D-stream (4KB) page block memory footprint */
140 |
141 | addr = effMemAddr >> page_size;
142 | endAddr = (effMemAddr + size - 1) >> page_size;
143 |
144 | for(a = addr; a <= endAddr; a++){
145 |
146 | upperAddr = a >> LOG_MAX_MEM_BLOCK;
147 | indexInChunk = a ^ (upperAddr << LOG_MAX_MEM_BLOCK);
148 |
149 | chunk = lookup(DmemPageWorkingSetTable, upperAddr);
150 | if(chunk == (memNode*)NULL)
151 | chunk = install(DmemPageWorkingSetTable, upperAddr);
152 |
153 | //assert(indexInChunk >= 0 && indexInChunk < MAX_MEM_BLOCK);
154 | chunk->numReferenced[indexInChunk] = true;
155 |
156 | }
157 | }
158 | }
159 |
160 | VOID instrMem(ADDRINT instrAddr, ADDRINT size){
161 |
162 | if(size > 0){
163 | ADDRINT a;
164 | ADDRINT addr, endAddr, upperAddr, indexInChunk;
165 | memNode* chunk;
166 |
167 |
168 | /* I-stream (64-byte) cache block memory footprint */
169 |
170 | addr = instrAddr >> memfootprint_block_size;
171 | endAddr = (instrAddr + size - 1) >> memfootprint_block_size;
172 |
173 | for(a = addr; a <= endAddr; a++){
174 |
175 | upperAddr = a >> LOG_MAX_MEM_BLOCK;
176 | indexInChunk = a ^ (upperAddr << LOG_MAX_MEM_BLOCK);
177 |
178 | chunk = lookup(ImemCacheWorkingSetTable, upperAddr);
179 | if(chunk == (memNode*)NULL)
180 | chunk = install(ImemCacheWorkingSetTable, upperAddr);
181 |
182 | //assert(indexInChunk >= 0 && indexInChunk < MAX_MEM_BLOCK);
183 | chunk->numReferenced[indexInChunk] = true;
184 |
185 | }
186 |
187 | /* I-stream (4KB) page block memory footprint */
188 |
189 | addr = instrAddr >> page_size;
190 | endAddr = (instrAddr + size - 1) >> page_size;
191 |
192 | for(a = addr; a <= endAddr; a++){
193 |
194 | upperAddr = a >> LOG_MAX_MEM_BLOCK;
195 | indexInChunk = a ^ (upperAddr << LOG_MAX_MEM_BLOCK);
196 |
197 | chunk = lookup(ImemPageWorkingSetTable, upperAddr);
198 | if(chunk == (memNode*)NULL)
199 | chunk = install(ImemPageWorkingSetTable, upperAddr);
200 |
201 | //assert(indexInChunk >= 0 && indexInChunk < MAX_MEM_BLOCK);
202 | chunk->numReferenced[indexInChunk] = true;
203 | }
204 | }
205 | }
206 |
207 | static VOID memfootprint_instr_full(ADDRINT instrAddr, ADDRINT size){
208 |
209 | /* counting instructions is done in all_instr_full() */
210 |
211 | instrMem(instrAddr, size);
212 | }
213 |
214 | static ADDRINT memfootprint_instr_intervals(ADDRINT instrAddr, ADDRINT size){
215 |
216 | /* counting instructions is done in all_instr_intervals() */
217 |
218 | instrMem(instrAddr, size);
219 | return (ADDRINT)(interval_ins_count_for_hpc_alignment == interval_size);
220 | }
221 |
222 | VOID memfootprint_instr_interval_output(){
223 |
224 | output_file_memfootprint.open(mkfilename("memfootprint_phases_int"), ios::out|ios::app);
225 |
226 | long long DmemCacheWorkingSetSize = DmemCacheWSS();
227 | long long DmemPageWorkingSetSize = DmemPageWSS();
228 | long long ImemCacheWorkingSetSize = ImemCacheWSS();
229 | long long ImemPageWorkingSetSize = ImemPageWSS();
230 |
231 | output_file_memfootprint << DmemCacheWorkingSetSize << " " << DmemPageWorkingSetSize << " " << ImemCacheWorkingSetSize << " " << ImemPageWorkingSetSize << endl;
232 | output_file_memfootprint.close();
233 | }
234 |
235 | VOID memfootprint_instr_interval_reset(){
236 | /* clean used memory, to avoid memory shortage for long (CPU2006) benchmarks */
237 | for(ADDRINT i=0; i < MAX_MEM_TABLE_ENTRIES; i++){
238 | free_nlist(DmemCacheWorkingSetTable[i]);
239 | free_nlist(DmemPageWorkingSetTable[i]);
240 | free_nlist(ImemCacheWorkingSetTable[i]);
241 | free_nlist(ImemPageWorkingSetTable[i]);
242 | }
243 | }
244 |
245 | static VOID memfootprint_instr_interval(){
246 |
247 | memfootprint_instr_interval_output();
248 | memfootprint_instr_interval_reset();
249 | interval_ins_count = 0;
250 | interval_ins_count_for_hpc_alignment = 0;
251 | }
252 |
253 | /* instrumenting (instruction level) */
254 | VOID instrument_memfootprint(INS ins, VOID* v){
255 |
256 | if(INS_IsMemoryRead(ins)){
257 |
258 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)memOp, IARG_MEMORYREAD_EA, IARG_MEMORYREAD_SIZE, IARG_END);
259 |
260 | if(INS_HasMemoryRead2(ins)){
261 |
262 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)memOp, IARG_MEMORYREAD2_EA, IARG_MEMORYREAD_SIZE, IARG_END);
263 | }
264 | }
265 | if(INS_IsMemoryWrite(ins)){
266 |
267 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)memOp, IARG_MEMORYWRITE_EA, IARG_MEMORYWRITE_SIZE, IARG_END);
268 | }
269 |
270 | if(interval_size == -1)
271 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)memfootprint_instr_full, IARG_ADDRINT, INS_Address(ins), IARG_ADDRINT, (ADDRINT)INS_Size(ins), IARG_END);
272 | else{
273 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)memfootprint_instr_intervals, IARG_ADDRINT, INS_Address(ins), IARG_ADDRINT, (ADDRINT)INS_Size(ins), IARG_END);
274 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)memfootprint_instr_interval, IARG_END);
275 | }
276 | }
277 |
278 |
279 | /* finishing... */
280 | VOID fini_memfootprint(INT32 code, VOID* v){
281 |
282 | long long DmemCacheWorkingSetSize = DmemCacheWSS();
283 | long long DmemPageWorkingSetSize = DmemPageWSS();
284 | long long ImemCacheWorkingSetSize = ImemCacheWSS();
285 | long long ImemPageWorkingSetSize = ImemPageWSS();
286 |
287 | if(interval_size == -1){
288 | output_file_memfootprint.open(mkfilename("memfootprint_full_int"), ios::out|ios::trunc);
289 | }
290 | else{
291 | output_file_memfootprint.open(mkfilename("memfootprint_phases_int"), ios::out|ios::app);
292 | }
293 |
294 | output_file_memfootprint << DmemCacheWorkingSetSize << " " << DmemPageWorkingSetSize << " " << ImemCacheWorkingSetSize << " " << ImemPageWorkingSetSize << endl;
295 | //output_file_memfootprint << "number of instructions: " << total_ins_count_for_hpc_alignment << endl;
296 | output_file_memfootprint << " ";
297 | output_file_memfootprint.close();
298 | }
299 |
--------------------------------------------------------------------------------
/mica_memfootprint.h:
--------------------------------------------------------------------------------
1 | /*
2 | * This file is part of MICA, a Pin tool to collect
3 | * microarchitecture-independent program characteristics using the Pin
4 | * instrumentation framework.
5 | *
6 | * Please see the README.txt file distributed with the MICA release for more
7 | * information.
8 | */
9 |
10 | #include "mica.h"
11 |
12 | void init_memfootprint();
13 | VOID instrument_memfootprint(INS ins, VOID* v);
14 | VOID fini_memfootprint(INT32 code, VOID* v);
15 |
16 | VOID memOp(ADDRINT effMemAddr, ADDRINT size);
17 | VOID instrMem(ADDRINT instrAddr, ADDRINT size);
18 |
19 | VOID memfootprint_instr_interval_output();
20 | VOID memfootprint_instr_interval_reset();
21 |
--------------------------------------------------------------------------------
/mica_memstackdist.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * This file is part of MICA, a Pin tool to collect
3 | * microarchitecture-independent program characteristics using the Pin
4 | * instrumentation framework.
5 | *
6 | * Please see the README.txt file distributed with the MICA release for more
7 | * information.
8 | */
9 |
10 | #include "pin.H"
11 |
12 | /* MICA includes */
13 | #include "mica_utils.h"
14 | #include "mica_memstackdist.h"
15 |
16 | /* Global variables */
17 |
18 | extern INT64 interval_size;
19 | extern INT64 interval_ins_count;
20 | extern INT64 interval_ins_count_for_hpc_alignment;
21 | extern INT64 total_ins_count;
22 | extern INT64 total_ins_count_for_hpc_alignment;
23 |
24 | extern UINT32 _block_size;
25 |
26 | static UINT32 memstackdist_block_size;
27 |
28 | static ofstream output_file_memstackdist;
29 |
30 | /* A single entry of the cache line reference stack.
31 | * below points to the entry below us in the stack
32 | * above points to the entry above us in the stack
33 | * block_addr is the cache line index of this entry
34 | * bucket is the number of the stack depth bucket where this entry belongs
35 | */
36 | typedef struct stack_entry_type {
37 | struct stack_entry_type* below;
38 | struct stack_entry_type* above;
39 | ADDRINT block_addr;
40 | INT32 bucket;
41 | } stack_entry;
42 |
43 | /* A single entry of the hash table, contains an array of stack entries referenced by part of cache line index. */
44 | typedef struct block_type_fast {
45 | ADDRINT id;
46 | stack_entry* stack_entries[MAX_MEM_ENTRIES];
47 | struct block_type_fast* next;
48 | } block_fast;
49 |
50 | static stack_entry* stack_top;
51 | static UINT64 stack_size;
52 |
53 | static block_fast* hashTableCacheBlocks_fast[MAX_MEM_TABLE_ENTRIES];
54 | static INT64 mem_ref_cnt;
55 | static INT64 cold_refs;
56 |
57 | /* Counters of accesses into each bucket. */
58 | static INT64 buckets[BUCKET_CNT];
59 | /* References to stack entries that are the oldest entries belonging to the particular bucket.
60 | * This is used to update bucket attributes of stack entries efficiently. Since the last
61 | * bucket is overflow bucket, last borderline entry should never be set. */
62 | static stack_entry* borderline_stack_entries[BUCKET_CNT];
63 |
64 | /* initializing */
65 | void init_memstackdist(){
66 |
67 | int i;
68 |
69 | /* initialize */
70 | cold_refs = 0;
71 | for(i=0; i < BUCKET_CNT; i++){
72 | buckets[i] = 0;
73 | borderline_stack_entries[i] = NULL;
74 | }
75 | mem_ref_cnt = 0;
76 | /* hash table */
77 | for (i = 0; i < MAX_MEM_TABLE_ENTRIES; i++) {
78 | hashTableCacheBlocks_fast[i] = NULL;
79 | }
80 | /* access stack */
81 | /* a dummy entry is inserted on the stack top to save some checks later */
82 | /* since the dummy entry is not in the hash table, it should never be used */
83 | stack_top = (stack_entry*) checked_malloc(sizeof(stack_entry));
84 | stack_top->block_addr = 0;
85 | stack_top->above = NULL;
86 | stack_top->below = NULL;
87 | stack_top->bucket = 0;
88 | stack_size = 1;
89 |
90 | memstackdist_block_size = _block_size;
91 |
92 | if(interval_size != -1){
93 | output_file_memstackdist.open(mkfilename("memstackdist_phases_int"), ios::out|ios::trunc);
94 | output_file_memstackdist.close();
95 | }
96 | }
97 |
98 | /*VOID memstackdist_instr_full(){
99 | // counting instructions is done in all_instr_full()
100 |
101 | }*/
102 |
103 | static ADDRINT memstackdist_instr_intervals(){
104 |
105 | /* counting instructions is done in all_instr_intervals() */
106 |
107 | return (ADDRINT)(interval_ins_count_for_hpc_alignment == interval_size);
108 | }
109 |
110 | VOID memstackdist_instr_interval_output(){
111 | int i;
112 | output_file_memstackdist.open(mkfilename("memstackdist_phases_int"), ios::out|ios::app);
113 | output_file_memstackdist << mem_ref_cnt << " " << cold_refs;
114 | for(i=0; i < BUCKET_CNT; i++){
115 | output_file_memstackdist << " " << buckets[i];
116 | }
117 | output_file_memstackdist << endl;
118 | output_file_memstackdist.close();
119 | }
120 |
121 | VOID memstackdist_instr_interval_reset(){
122 | int i;
123 | mem_ref_cnt = 0;
124 | cold_refs = 0;
125 | for(i=0; i < BUCKET_CNT; i++){
126 | buckets[i] = 0;
127 | }
128 | }
129 |
130 | static VOID memstackdist_instr_interval(){
131 |
132 | memstackdist_instr_interval_output();
133 | memstackdist_instr_interval_reset();
134 | interval_ins_count = 0;
135 | interval_ins_count_for_hpc_alignment = 0;
136 | }
137 |
138 | /* hash table support */
139 |
140 | /** entry_lookup
141 | *
142 | * Finds an arrray of stack entry references for a given address key (upper part of address) in a hash table.
143 | */
144 | stack_entry** entry_lookup(block_fast** table, ADDRINT key){
145 |
146 | block_fast* b;
147 |
148 | for (b = table[key % MAX_MEM_TABLE_ENTRIES]; b != NULL; b = b->next){
149 | if(b->id == key)
150 | return b->stack_entries;
151 | }
152 |
153 | return NULL;
154 | }
155 |
156 | /** entry_install
157 | *
158 | * Installs a new array of stack entry references for a given address key (upper part of address) in a hash table.
159 | */
160 | static stack_entry** entry_install(block_fast** table, ADDRINT key){
161 |
162 | block_fast* b;
163 |
164 | ADDRINT index = key % MAX_MEM_TABLE_ENTRIES;
165 |
166 | b = table[index];
167 |
168 | if(b == NULL) {
169 | b = (block_fast*)checked_malloc(sizeof(block_fast));
170 | table[index] = b;
171 | }
172 | else{
173 | while(b->next != NULL){
174 | b = b->next;
175 | }
176 | b->next = (block_fast*)checked_malloc(sizeof(block_fast));
177 | b = b->next;
178 | }
179 | b->next = NULL;
180 | b->id = key;
181 | for(ADDRINT i = 0; i < MAX_MEM_ENTRIES; i++){
182 | b->stack_entries[i] = NULL;
183 | }
184 | return b->stack_entries;
185 | }
186 |
187 |
188 | /* stack support */
189 |
190 | #if 0
191 | /** stack_sanity_check
192 | *
193 | * Checks whether the stack structure is internally consistent.
194 | */
195 | static VOID stack_sanity_check(){
196 |
197 | UINT64 position = 0;
198 | INT32 bucket = 0;
199 |
200 | stack_entry *e = stack_top;
201 |
202 | if (e->above != NULL){
203 | ERROR_MSG("Item above top of stack.");
204 | exit(1);
205 | }
206 |
207 | while (e != NULL){
208 |
209 | // Check whether the stack entry has a correct bucket.
210 | if (e->bucket != bucket){
211 | ERROR_MSG("Stack entry with invalid bucket.");
212 | exit(1);
213 | }
214 |
215 | // Check whether the stack entry is linked correctly.
216 | if (e->above && (e->above->below != e)){
217 | ERROR_MSG("Incorrectly linked stack.");
218 | exit(1);
219 | }
220 | if (e->below && (e->below->above != e)){
221 | ERROR_MSG("Incorrectly linked stack.");
222 | exit(1);
223 | }
224 |
225 | // Calculate which bucket we should be in next.
226 | // Never spill over the overflow bucket though.
227 | if (bucket < BUCKET_CNT - 1)
228 | {
229 | UINT64 borderline = ((UINT64) 1) << bucket;
230 | if (position == borderline){
231 | if (borderline_stack_entries [bucket] != e){
232 | ERROR_MSG("Incorrect bucket borderline.");
233 | exit(1);
234 | }
235 | bucket ++;
236 | }
237 | }
238 |
239 | // Go on through the entire stack.
240 | e = e->below;
241 | position++;
242 | }
243 | }
244 | #endif
245 |
246 |
247 | /** move_to_top_fast
248 | *
249 | * Moves the stack entry e corresponding to the address a to the top of stack.
250 | * The stack entry can be NULL, in which case a new stack entry is created.
251 | */
252 | static VOID move_to_top_fast(stack_entry *e, ADDRINT a){
253 |
254 | INT32 bucket;
255 |
256 | /* check if entry was accessed before */
257 | if(e != NULL){
258 |
259 | /* check to see if we already are at top of stack */
260 | if(e->above != NULL){
261 |
262 | // disconnect the entry from its current position on the stack
263 | if (e->below != NULL) e->below->above = e->above;
264 | e->above->below = e->below;
265 |
266 | // adjust all borderline entries above the entry touched (note that we can be sure those entries exist)
267 | // a borderline entry is an entry whose bucket will change when an item is inserted above it on the stack
268 | for(bucket=0; bucket < BUCKET_CNT && bucket < e->bucket; bucket++){
269 | borderline_stack_entries[bucket]->bucket++;
270 | borderline_stack_entries[bucket] = borderline_stack_entries[bucket]->above;
271 | }
272 | // if the entry touched was a borderline entry, new borderline entry is the one above the touched one
273 | if(e == borderline_stack_entries[e->bucket]){
274 | borderline_stack_entries[e->bucket] = borderline_stack_entries[e->bucket]->above;
275 | }
276 |
277 | // place new entry on top of LRU stack
278 | e->below = stack_top;
279 | e->above = NULL;
280 | stack_top->above = e;
281 | stack_top = e;
282 | e->bucket = 0;
283 | }
284 | /* else: if top of stack was referenced again, nothing to do! */
285 |
286 | }
287 | else{
288 | // allocate memory for new stack entry
289 | stack_entry* e = (stack_entry*) checked_malloc(sizeof(stack_entry));
290 |
291 | // initialize with address and refer prev to top of stack
292 | e->block_addr = a;
293 | e->above = NULL;
294 | e->below = stack_top;
295 | e->bucket = 0;
296 |
297 | // adjust top of stack
298 | stack_top->above = e;
299 | stack_top = e;
300 |
301 | stack_size++;
302 |
303 | // adjust all borderline entries that exist up until the overflow bucket
304 | // (which really has no borderline entry since there is no next bucket)
305 | // we retain the number of the first free bucket for next code
306 | for(bucket=0; bucket < BUCKET_CNT - 1; bucket++){
307 | if (borderline_stack_entries[bucket] == NULL) break;
308 | borderline_stack_entries[bucket]->bucket++;
309 | borderline_stack_entries[bucket] = borderline_stack_entries[bucket]->above;
310 | }
311 |
312 | // if the stack size has reached a boundary of a bucket, set the boundary entry for this bucket
313 | // the variable types are chosen deliberately large for overflow safety
314 | // at least they should not overflow sooner than stack_size anyway
315 | // overflow bucket boundar is never set
316 | if (bucket < BUCKET_CNT - 1)
317 | {
318 | UINT64 borderline_distance = ((UINT64) 2) << bucket;
319 | if(stack_size == borderline_distance){
320 | // find the bottom of the stack by traversing from somewhere close to it
321 | stack_entry *stack_bottom;
322 | if (bucket) stack_bottom = borderline_stack_entries [bucket-1];
323 | else stack_bottom = stack_top;
324 | while (stack_bottom->below) stack_bottom = stack_bottom->below;
325 | // the new borderline is the bottom of the stack
326 | borderline_stack_entries [bucket] = stack_bottom;
327 | }
328 | }
329 | }
330 |
331 | // stack_sanity_check();
332 | }
333 |
334 | /* determine reuse distance (= number of unique cache blocks referenced since last time this cache was referenced)
335 | * reuse distance is tracked in move_to_top_fast (by climbing up the LRU stack entry-by-entry until top of stack is reached),
336 | * this function only returns the reuse distance calculated by move_to_top_fast */
337 |
338 | static INT64 det_reuse_dist_bucket(stack_entry* e){
339 |
340 | if(e != NULL)
341 | return e->bucket;
342 | else
343 | return -1;
344 | }
345 |
346 | /* register memory access (either read of write) determine which cache lines are touched */
347 | VOID memstackdist_memRead(ADDRINT effMemAddr, ADDRINT size){
348 |
349 | ADDRINT a, endAddr, addr, upperAddr, indexInChunk;
350 | stack_entry** chunk;
351 | stack_entry* entry_for_addr;
352 |
353 | /* Calculate index in cache addresses. The calculation does not
354 | * handle address overflows but those are unlikely to happen. */
355 | addr = effMemAddr >> memstackdist_block_size;
356 | endAddr = (effMemAddr + size - 1) >> memstackdist_block_size;
357 |
358 | /* The hit is counted for all cache lines involved. */
359 | for(a = addr; a <= endAddr; a++){
360 |
361 | /* split the cache line address into hash key of chunk and index in chunk */
362 | upperAddr = a >> LOG_MAX_MEM_ENTRIES;
363 | indexInChunk = a & MASK_MAX_MEM_ENTRIES;
364 |
365 | chunk = entry_lookup(hashTableCacheBlocks_fast, upperAddr);
366 | if(chunk == NULL) chunk = entry_install(hashTableCacheBlocks_fast, upperAddr);
367 |
368 | entry_for_addr = chunk[indexInChunk];
369 |
370 | /* determine reuse distance for this access (if it has been accessed before) */
371 | INT64 b = det_reuse_dist_bucket(entry_for_addr);
372 |
373 | if(b < 0)
374 | cold_refs++;
375 | else
376 | buckets[b]++;
377 |
378 | /* adjust LRU stack */
379 | /* as a side effect, can allocate new entry, which could have been NULL so far */
380 | move_to_top_fast(entry_for_addr, a);
381 |
382 | /* update hash table for new cache blocks */
383 | if(chunk[indexInChunk] == NULL) chunk[indexInChunk] = stack_top;
384 |
385 | mem_ref_cnt++;
386 | }
387 | }
388 |
389 | VOID instrument_memstackdist(INS ins, VOID *v){
390 |
391 | if( INS_IsMemoryRead(ins) ){
392 |
393 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)memstackdist_memRead, IARG_MEMORYREAD_EA, IARG_MEMORYREAD_SIZE, IARG_END);
394 |
395 | if( INS_HasMemoryRead2(ins) )
396 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)memstackdist_memRead, IARG_MEMORYREAD2_EA, IARG_MEMORYREAD_SIZE, IARG_END);
397 | }
398 |
399 | if(interval_size != -1){
400 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)memstackdist_instr_intervals,IARG_END);
401 | /* only called if interval is 'full' */
402 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)memstackdist_instr_interval,IARG_END);
403 | }
404 | }
405 |
406 | /* finishing... */
407 | VOID fini_memstackdist(INT32 code, VOID* v){
408 |
409 | int i;
410 |
411 | if(interval_size == -1){
412 | output_file_memstackdist.open(mkfilename("memstackdist_full_int"), ios::out|ios::trunc);
413 | }
414 | else{
415 | output_file_memstackdist.open(mkfilename("memstackdist_phases_int"), ios::out|ios::app);
416 | }
417 | output_file_memstackdist << mem_ref_cnt << " " << cold_refs;
418 | for(i=0; i < BUCKET_CNT; i++){
419 | output_file_memstackdist << " " << buckets[i];
420 | }
421 | //output_file_memstackdist << endl << "number of instructions: " << total_ins_count_for_hpc_alignment << endl;
422 | output_file_memstackdist << " ";
423 | output_file_memstackdist.close();
424 | }
425 |
--------------------------------------------------------------------------------
/mica_memstackdist.h:
--------------------------------------------------------------------------------
1 | /*
2 | * This file is part of MICA, a Pin tool to collect
3 | * microarchitecture-independent program characteristics using the Pin
4 | * instrumentation framework.
5 | *
6 | * Please see the README.txt file distributed with the MICA release for more
7 | * information.
8 | */
9 |
10 | #include "mica.h"
11 |
12 | void init_memstackdist();
13 | VOID instrument_memstackdist(INS ins, VOID* v);
14 | VOID fini_memstackdist(INT32 code, VOID* v);
15 |
16 | VOID memstackdist_memRead(ADDRINT effMemAddr, ADDRINT size);
17 | VOID memstackdist_instr_interval_output();
18 | VOID memstackdist_instr_interval_reset();
19 |
--------------------------------------------------------------------------------
/mica_ppm.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * This file is part of MICA, a Pin tool to collect
3 | * microarchitecture-independent program characteristics using the Pin
4 | * instrumentation framework.
5 | *
6 | * Please see the README.txt file distributed with the MICA release for more
7 | * information.
8 | */
9 |
10 | #include "pin.H"
11 |
12 | /* MICA includes */
13 | #include "mica_ppm.h"
14 | #include "mica_utils.h"
15 |
16 | /* Global variables */
17 |
18 | extern INT64 interval_size;
19 | extern INT64 interval_ins_count;
20 | extern INT64 interval_ins_count_for_hpc_alignment;
21 | extern INT64 total_ins_count;
22 | extern INT64 total_ins_count_for_hpc_alignment;
23 |
24 | ofstream output_file_ppm;
25 |
26 | BOOL lastInstBr; // was the last instruction a cond. branch instruction?
27 | ADDRINT nextAddr; // address of the instruction after the last cond.branch
28 | UINT32 numStatCondBranchInst; // number of static cond. branch instructions up until now (-> unique id for the cond. branch)
29 | //UINT32 lastBrId; // index of last cond. branch instruction
30 | INT64* transition_counts;
31 | char* local_taken;
32 | INT64* local_taken_counts;
33 | INT64* local_brCounts;
34 | ADDRINT* indices_condBr;
35 | UINT32 indices_condBr_size;
36 | /* incorrect predictions counters */
37 | INT64 GAg_incorrect_pred[NUM_HIST_LENGTHS];
38 | INT64 GAs_incorrect_pred[NUM_HIST_LENGTHS];
39 | INT64 PAg_incorrect_pred[NUM_HIST_LENGTHS];
40 | INT64 PAs_incorrect_pred[NUM_HIST_LENGTHS];
41 | /* prediction for each of the 4 predictors */
42 | INT32 GAg_pred_taken[NUM_HIST_LENGTHS];
43 | INT32 GAs_pred_taken[NUM_HIST_LENGTHS];
44 | INT32 PAg_pred_taken[NUM_HIST_LENGTHS];
45 | INT32 PAs_pred_taken[NUM_HIST_LENGTHS];
46 | /* size of local pattern history */
47 | INT64 brHist_size;
48 | /* global/local history */
49 | INT32 bhr;
50 | INT32* local_bhr;
51 | /* global/local pattern history tables */
52 | char*** GAg_pht;
53 | char*** PAg_pht;
54 | char**** GAs_pht;
55 | char**** PAs_pht;
56 | /* check if page entries were touched (memory efficiency) */
57 | char* GAs_touched;
58 | char* PAs_touched;
59 | /* prediction history */
60 | int GAg_pred_hist[NUM_HIST_LENGTHS];
61 | int PAg_pred_hist[NUM_HIST_LENGTHS];
62 | int GAs_pred_hist[NUM_HIST_LENGTHS];
63 | int PAs_pred_hist[NUM_HIST_LENGTHS];
64 |
65 | /* initializing */
66 | void init_ppm(){
67 |
68 | UINT32 i,j;
69 | int k;
70 |
71 | /* initializing total instruction counts is done in mica.cpp */
72 |
73 | brHist_size = 512;
74 |
75 | numStatCondBranchInst = 1;
76 |
77 | /* translation of instruction address to indices */
78 | indices_condBr_size = 1024;
79 | indices_condBr = (ADDRINT*) checked_malloc(indices_condBr_size*sizeof(ADDRINT));
80 |
81 | lastInstBr = false;
82 |
83 | /* global/local history */
84 | bhr = 0;
85 | local_bhr = (int*) checked_malloc(brHist_size * sizeof(int));
86 |
87 | /* GAg PPM predictor */
88 | GAg_pht = (char***) checked_malloc(NUM_HIST_LENGTHS * sizeof(char**));
89 | for(j = 0; j < NUM_HIST_LENGTHS; j++) {
90 | GAg_pht[j] = (char**) checked_malloc((history_lengths[j]+1)*sizeof(char*));
91 | for(i = 0; i <= history_lengths[j]; i++){
92 | GAg_pht[j][i] = (char*) checked_malloc((1 << i)*sizeof(char));
93 | for(k = 0; k < (1 << i); k++)
94 | GAg_pht[j][i][k] = 0;
95 | }
96 | }
97 |
98 | /* PAg PPM predictor */
99 | PAg_pht = (char***) checked_malloc(NUM_HIST_LENGTHS * sizeof(char**));
100 | for(j = 0; j < NUM_HIST_LENGTHS; j++) {
101 | PAg_pht[j] = (char**) checked_malloc((history_lengths[j]+1)*sizeof(char*));
102 | for(i = 0; i <= history_lengths[j]; i++){
103 | PAg_pht[j][i] = (char*) checked_malloc((1 << i)*sizeof(char));
104 | for(k = 0; k < (1 << i); k++)
105 | PAg_pht[j][i][k] = 0;
106 | }
107 | }
108 |
109 | /* GAs PPM predictor */
110 | GAs_touched = (char*) checked_malloc(brHist_size * sizeof(char));
111 | GAs_pht = (char****) checked_malloc(brHist_size * sizeof(char***));
112 |
113 | /* PAs PPM predictor */
114 | PAs_touched = (char*) checked_malloc(brHist_size * sizeof(char));
115 | PAs_pht = (char****) checked_malloc(brHist_size * sizeof(char***));
116 |
117 | transition_counts = (INT64*) checked_malloc(brHist_size * sizeof(INT64));
118 | local_taken = (char*) checked_malloc(brHist_size * sizeof(char));
119 | local_brCounts = (INT64*) checked_malloc(brHist_size * sizeof(INT64));
120 | local_taken_counts = (INT64*) checked_malloc(brHist_size * sizeof(INT64));
121 |
122 | for(i = 0; i < brHist_size; i++){
123 | transition_counts[i] = 0;
124 | local_taken[i] = -1;
125 | local_brCounts[i] = 0;
126 | local_taken_counts[i] = 0;
127 | GAs_touched[i] = 0;
128 | PAs_touched[i] = 0;
129 | }
130 |
131 | for(j=0; j < NUM_HIST_LENGTHS; j++){
132 | GAg_incorrect_pred[j] = 0;
133 | GAs_incorrect_pred[j] = 0;
134 | PAg_incorrect_pred[j] = 0;
135 | PAs_incorrect_pred[j] = 0;
136 | }
137 |
138 | if(interval_size != -1){
139 | output_file_ppm.open(mkfilename("ppm_phases_int"), ios::out|ios::trunc);
140 | output_file_ppm.close();
141 | }
142 |
143 | }
144 |
145 | /*VOID ppm_instr_full(){
146 | }*/
147 |
148 | ADDRINT ppm_instr_intervals(){
149 |
150 | return (ADDRINT)(interval_ins_count_for_hpc_alignment == interval_size);
151 | }
152 |
153 | VOID ppm_instr_interval_output(){
154 | int i;
155 | INT64 total_transition_count = 0;
156 | INT64 total_taken_count = 0;
157 | INT64 total_brCount = 0;
158 |
159 | output_file_ppm.open(mkfilename("ppm_phases_int"), ios::out|ios::app);
160 |
161 | output_file_ppm << interval_size;
162 | for(i = 0; i < NUM_HIST_LENGTHS; i++)
163 | output_file_ppm << " " << GAg_incorrect_pred[i] << " " << PAg_incorrect_pred[i] << " " << GAs_incorrect_pred[i] << " " << PAs_incorrect_pred[i];
164 |
165 | for(i=0; i < brHist_size; i++){
166 | if(local_brCounts[i] > 0){
167 | if( transition_counts[i] > local_brCounts[i]/2)
168 | total_transition_count += local_brCounts[i]-transition_counts[i];
169 | else
170 | total_transition_count += transition_counts[i];
171 |
172 | if( local_taken_counts[i] > local_brCounts[i]/2)
173 | total_taken_count += local_brCounts[i] - local_taken_counts[i];
174 | else
175 | total_taken_count += local_taken_counts[i];
176 | total_brCount += local_brCounts[i];
177 | }
178 | }
179 | output_file_ppm << " " << total_brCount << " " << total_transition_count << " " << total_taken_count << endl;
180 | output_file_ppm.close();
181 | }
182 |
183 | VOID ppm_instr_interval_reset(){
184 |
185 | int i;
186 |
187 | for(i = 0; i < NUM_HIST_LENGTHS; i++){
188 | GAg_incorrect_pred[i] = 0;
189 | GAs_incorrect_pred[i] = 0;
190 | PAg_incorrect_pred[i] = 0;
191 | PAs_incorrect_pred[i] = 0;
192 | }
193 | for(i=0; i < brHist_size; i++){
194 | local_brCounts[i] = 0;
195 | local_taken_counts[i] = 0;
196 | transition_counts[i] = 0;
197 | }
198 | }
199 |
200 | VOID ppm_instr_interval(){
201 |
202 |
203 | ppm_instr_interval_output();
204 | ppm_instr_interval_reset();
205 |
206 | interval_ins_count = 0;
207 | interval_ins_count_for_hpc_alignment = 0;
208 | }
209 |
210 | /* double memory space for branch history size when needed */
211 | VOID reallocate_brHist(){
212 |
213 | INT32* int_ptr;
214 | char* char_ptr;
215 | char**** char4_ptr;
216 | INT64* int64_ptr;
217 |
218 | brHist_size = brHist_size*2;
219 |
220 | int_ptr = (INT32*) checked_realloc(local_bhr,brHist_size * sizeof(INT32));
221 | /*if(int_ptr == (INT32*) NULL) {
222 | cerr << "Could not allocate memory" << endl;
223 | exit(1);
224 | }*/
225 | local_bhr = int_ptr;
226 |
227 | char_ptr = (char*) checked_realloc(GAs_touched, brHist_size * sizeof(char));
228 | /*if(char_ptr == (char*) NULL){
229 | cerr << "Could not allocate memory" << endl;
230 | exit(1);
231 | }*/
232 | GAs_touched = char_ptr;
233 |
234 | char4_ptr = (char****) checked_realloc(GAs_pht,brHist_size * sizeof(char***));
235 | /*if(char4_ptr == (char****) NULL) {
236 | cerr << "Could not allocate memory" << endl;
237 | exit(1);
238 | }*/
239 | GAs_pht = char4_ptr;
240 |
241 | char_ptr = (char*) checked_realloc(PAs_touched,brHist_size * sizeof(char));
242 | /*if(char_ptr == (char*) NULL) {
243 | cerr << "Could not allocate memory" << endl;
244 | exit(1);
245 | }*/
246 | PAs_touched = char_ptr;
247 |
248 | char4_ptr = (char****) checked_realloc(PAs_pht,brHist_size * sizeof(char***));
249 | /*if(char4_ptr == (char****) NULL) {
250 | cerr << "Could not allocate memory" << endl;
251 | exit(1);
252 | }*/
253 | PAs_pht = char4_ptr;
254 |
255 | char_ptr = (char*) checked_realloc(local_taken,brHist_size * sizeof(char));
256 | /*if(char_ptr == (char*) NULL) {
257 | cerr << "Could not allocate memory" << endl;
258 | exit(1);
259 | }*/
260 | local_taken = char_ptr;
261 |
262 | int64_ptr = (INT64*) realloc(transition_counts, brHist_size * sizeof(INT64));
263 | /*if(int64_ptr == (INT64*)NULL) {
264 | cerr,"Could not allocate memory" << endl;
265 | exit(1);
266 | }*/
267 | transition_counts = int64_ptr;
268 |
269 | int64_ptr = (INT64*) realloc(local_brCounts, brHist_size * sizeof(INT64));
270 | /*if(int64_ptr == (INT64*)NULL) {
271 | cerr << "Could not allocate memory" << endl;
272 | exit(1);
273 | }*/
274 | local_brCounts = int64_ptr;
275 |
276 | int64_ptr = (INT64*) realloc(local_taken_counts, brHist_size * sizeof(INT64));
277 | /*if(int64_ptr == (INT64*)NULL) {
278 | cerr << "Could not allocate memory" << endl;
279 | exit(1);
280 | }*/
281 | local_taken_counts = int64_ptr;
282 | }
283 |
284 |
285 | VOID condBr(UINT32 id, BOOL _t){
286 |
287 | int i,j,k;
288 | int hist;
289 | BOOL taken = (_t != 0) ? 1 : 0;
290 |
291 | /* predict direction */
292 |
293 | /* GAs PPM predictor lookup */
294 | if(!GAs_touched[id]){
295 | /* allocate PPM predictor */
296 |
297 | GAs_touched[id] = 1;
298 |
299 | GAs_pht[id] = (char***) checked_malloc(NUM_HIST_LENGTHS * sizeof(char**));
300 | for(j = 0; j < NUM_HIST_LENGTHS; j++){
301 | GAs_pht[id][j] = (char**) checked_malloc((history_lengths[j]+1) * sizeof(char*));
302 | for(i = 0; i <= (int)history_lengths[j]; i++){
303 | GAs_pht[id][j][i] = (char*) checked_malloc((1 << i) * sizeof(char));
304 | for(k = 0; k < (1<= 0; i--){
332 |
333 | hist = bhr & (((int) 1 << i) -1);
334 | if(GAg_pht[j][i][hist] != 0){
335 | GAg_pred_hist[j] = i; // used to only update predictor doing the prediction and higher order predictors (update exclusion)
336 | if(GAg_pht[j][i][hist] > 0)
337 | GAg_pred_taken[j] = 1;
338 | else
339 | GAg_pred_taken[j] = 0;
340 | break;
341 | }
342 | }
343 |
344 | /* PAg PPM predictor lookup */
345 | for(i = (int)history_lengths[j]; i >= 0; i--){
346 | hist = local_bhr[id] & (((int) 1 << i) -1);
347 | if(PAg_pht[j][i][hist] != 0){
348 | PAg_pred_hist[j] = i;
349 | if(PAg_pht[j][i][hist] > 0)
350 | PAg_pred_taken[j] = 1;
351 | else
352 | PAg_pred_taken[j] = 0;
353 | break;
354 | }
355 | }
356 |
357 | /* GAs PPM predictor lookup */
358 | for(i = (int)history_lengths[j]; i >= 0; i--){
359 | hist = bhr & (((int) 1 << i) -1);
360 | if(GAs_pht[id][j][i][hist] != 0){
361 | GAs_pred_hist[j] = i;
362 | if(GAs_pht[id][j][i][hist] > 0)
363 | GAs_pred_taken[j] = 1;
364 | else
365 | GAs_pred_taken[j] = 0;
366 | break;
367 | }
368 | }
369 |
370 | /* PAs PPM predictor lookup */
371 | for(i = (int)history_lengths[j]; i >= 0; i--){
372 | hist = local_bhr[id] & (((int) 1 << i) -1);
373 | if(PAs_pht[id][j][i][hist] != 0){
374 | PAs_pred_hist[j] = i;
375 | if(PAs_pht[id][j][i][hist] > 0)
376 | PAs_pred_taken[j] = 1;
377 | else
378 | PAs_pred_taken[j] = 0;
379 | break;
380 | }
381 | }
382 | }
383 |
384 | /* transition/taken rate */
385 | if(local_taken[id] > -1){
386 | if(taken != local_taken[id])
387 | transition_counts[id]++;
388 | }
389 | local_taken[id] = taken;
390 | local_brCounts[id]++;
391 | if(taken)
392 | local_taken_counts[id]++;
393 |
394 | for(j=0; j < NUM_HIST_LENGTHS; j++){
395 | /* update statistics according to predictions */
396 | if(taken != GAg_pred_taken[j])
397 | GAg_incorrect_pred[j]++;
398 | if(taken != GAs_pred_taken[j])
399 | GAs_incorrect_pred[j]++;
400 | if(taken != PAg_pred_taken[j])
401 | PAg_incorrect_pred[j]++;
402 | if(taken != PAs_pred_taken[j])
403 | PAs_incorrect_pred[j]++;
404 |
405 | /* using update exclusion: only update predictor doing the prediction and higher order predictors */
406 |
407 | /* update GAg PPM pattern history tables */
408 | for(i = (int)GAg_pred_hist[j]; i <= (int)history_lengths[j]; i++){
409 | hist = bhr & ((1 << i) - 1);
410 | if(taken){
411 | if(GAg_pht[j][i][hist] < 127)
412 | GAg_pht[j][i][hist]++;
413 | }
414 | else{
415 | if(GAg_pht[j][i][hist] > -127)
416 | GAg_pht[j][i][hist]--;
417 | }
418 | /* avoid == 0 because that means 'not set' */
419 | if(GAg_pht[j][i][hist] == 0){
420 | if(taken){
421 | GAg_pht[j][i][hist]++;
422 | }
423 | else{
424 | GAg_pht[j][i][hist]--;
425 | }
426 | }
427 | }
428 | /* update PAg PPM pattern history tables */
429 | for(i = (int)PAg_pred_hist[j]; i <= (int)history_lengths[j]; i++){
430 | hist = local_bhr[id] & ((1 << i) - 1);
431 | if(taken){
432 | if(PAg_pht[j][i][hist] < 127)
433 | PAg_pht[j][i][hist]++;
434 | }
435 | else{
436 | if(PAg_pht[j][i][hist] > -127)
437 | PAg_pht[j][i][hist]--;
438 | }
439 | /* avoid == 0 because that means 'not set' */
440 | if(PAg_pht[j][i][hist] == 0){
441 | if(taken){
442 | PAg_pht[j][i][hist]++;
443 | }
444 | else{
445 | PAg_pht[j][i][hist]--;
446 | }
447 | }
448 | }
449 | /* update GAs PPM pattern history tables */
450 | for(i = (int)GAs_pred_hist[j]; i <= (int)history_lengths[j]; i++){
451 | hist = bhr & ((1 << i) - 1);
452 | if(taken){
453 | if(GAs_pht[id][j][i][hist] < 127)
454 | GAs_pht[id][j][i][hist]++;
455 | }
456 | else{
457 | if(GAs_pht[id][j][i][hist] > -127)
458 | GAs_pht[id][j][i][hist]--;
459 | }
460 | /* avoid == 0 because that means 'not set' */
461 | if(GAs_pht[id][j][i][hist] == 0){
462 | if(taken){
463 | GAs_pht[id][j][i][hist]++;
464 | }
465 | else{
466 | GAs_pht[id][j][i][hist]--;
467 | }
468 | }
469 | }
470 | /* update PAs PPM pattern history tables */
471 | for(i = (int)PAs_pred_hist[j]; i <= (int)history_lengths[j]; i++){
472 | hist = local_bhr[id] & ((1 << i) - 1);
473 | if(taken){
474 | if(PAs_pht[id][j][i][hist] < 127)
475 | PAs_pht[id][j][i][hist]++;
476 | }
477 | else{
478 | if(PAs_pht[id][j][i][hist] > -127)
479 | PAs_pht[id][j][i][hist]--;
480 | }
481 | /* avoid == 0 because that means 'not set' */
482 | if(PAs_pht[id][j][i][hist] == 0){
483 | if(taken){
484 | PAs_pht[id][j][i][hist]++;
485 | }
486 | else{
487 | PAs_pht[id][j][i][hist]--;
488 | }
489 | }
490 | }
491 | }
492 |
493 | /* update global history register */
494 | bhr = bhr << 1;
495 | bhr |= taken;
496 |
497 | /* update local history */
498 | local_bhr[id] = local_bhr[id] << 1;
499 | local_bhr[id] |= taken;
500 | }
501 |
502 | /* index for static conditional branch */
503 | UINT32 index_condBr(ADDRINT ins_addr){
504 |
505 | UINT64 i;
506 | for(i=0; i <= numStatCondBranchInst; i++){
507 | if(indices_condBr[i] == ins_addr)
508 | return i; /* found */
509 | }
510 | return 0; /* not found */
511 | }
512 |
513 | /* register static conditional branch with some index */
514 | void register_condBr(ADDRINT ins_addr){
515 |
516 | ADDRINT* ptr;
517 |
518 | /* reallocation needed */
519 | if(numStatCondBranchInst >= indices_condBr_size){
520 |
521 | indices_condBr_size *= 2;
522 | ptr = (ADDRINT*) realloc(indices_condBr, indices_condBr_size*sizeof(ADDRINT));
523 | /*if(ptr == (ADDRINT*)NULL){
524 | cerr << "Could not allocate memory (realloc in register_condBr)!" << endl;
525 | exit(1);
526 | }*/
527 | indices_condBr = ptr;
528 |
529 | }
530 |
531 | /* register instruction to index */
532 | indices_condBr[numStatCondBranchInst++] = ins_addr;
533 | }
534 |
535 | // static int _count = 0;
536 | VOID instrument_ppm_cond_br(INS ins){
537 | UINT32 index = index_condBr(INS_Address(ins));
538 | if(index < 1){
539 |
540 | /* We don't know the number of static conditional branch instructions up front,
541 | * so we double the size of the branch history tables as needed by calling this function */
542 | if(numStatCondBranchInst >= brHist_size)
543 | reallocate_brHist();
544 |
545 | index = numStatCondBranchInst;
546 |
547 | register_condBr(INS_Address(ins));
548 | register_condBr(INS_Address(ins));
549 | }
550 |
551 | const char* str = INS_Disassemble(ins).c_str();
552 | const char* substr = "xbegin";
553 | if (strncmp(str, substr, strlen(substr)) == 0){
554 | printf("as of pin 3.4 -- I don't think we can parse xbegin so skipping...\n");
555 | return;
556 | }
557 | substr = "xend";
558 | if (strncmp(str, substr, strlen(substr)) == 0){
559 | printf("as of pin 3.4 -- I don't think we can parse xend so skipping...\n");
560 | return;
561 | }
562 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)condBr,IARG_UINT32, index, IARG_BRANCH_TAKEN, IARG_END);
563 | }
564 |
565 | /* instrumenting (instruction level) */
566 | VOID instrument_ppm(INS ins, VOID* v){
567 |
568 | char cat[50];
569 | strcpy(cat,CATEGORY_StringShort(INS_Category(ins)).c_str());
570 |
571 | if(strcmp(cat,"COND_BR") == 0){
572 | instrument_ppm_cond_br(ins);
573 | }
574 |
575 | /* inserting calls for counting instructions (full) is done in mica.cpp */
576 |
577 | if(interval_size != -1){
578 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)ppm_instr_intervals,IARG_END);
579 | /* only called if interval is 'full' */
580 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)ppm_instr_interval,IARG_END);
581 | }
582 | }
583 |
584 |
585 | /* finishing... */
586 | VOID fini_ppm(INT32 code, VOID* v){
587 |
588 | int i;
589 |
590 | if(interval_size == -1){
591 | output_file_ppm.open(mkfilename("ppm_full_int"), ios::out|ios::trunc);
592 | //output_file_ppm << total_ins_count;
593 | }
594 | else{
595 | output_file_ppm.open(mkfilename("ppm_phases_int"), ios::out|ios::app);
596 | //output_file_ppm << interval_ins_count;
597 | }
598 | for(i=0; i < NUM_HIST_LENGTHS; i++)
599 | output_file_ppm << GAg_incorrect_pred[i] << " " << PAg_incorrect_pred[i] << " " << GAs_incorrect_pred[i] << " " << PAs_incorrect_pred[i] << " ";
600 |
601 | INT64 total_transition_count = 0;
602 | INT64 total_taken_count = 0;
603 | INT64 total_brCount = 0;
604 | for(i=0; i < brHist_size; i++){
605 | if(local_brCounts[i] > 0){
606 | if( transition_counts[i] > local_brCounts[i]/2)
607 | total_transition_count += local_brCounts[i]-transition_counts[i];
608 | else
609 | total_transition_count += transition_counts[i];
610 |
611 | if( local_taken_counts[i] > local_brCounts[i]/2)
612 | total_taken_count += local_brCounts[i] - local_taken_counts[i];
613 | else
614 | total_taken_count += local_taken_counts[i];
615 | total_brCount += local_brCounts[i];
616 | }
617 | }
618 | output_file_ppm << total_brCount << " " << total_transition_count << " " << total_taken_count << endl;
619 | //output_file_ppm << "number of instructions: " << total_ins_count_for_hpc_alignment << endl;
620 | output_file_ppm << " ";
621 | output_file_ppm.close();
622 | }
623 |
--------------------------------------------------------------------------------
/mica_ppm.h:
--------------------------------------------------------------------------------
1 | /*
2 | * This file is part of MICA, a Pin tool to collect
3 | * microarchitecture-independent program characteristics using the Pin
4 | * instrumentation framework.
5 | *
6 | * Please see the README.txt file distributed with the MICA release for more
7 | * information.
8 | */
9 |
10 | #include "mica.h"
11 |
12 | void init_ppm();
13 | VOID instrument_ppm(INS ins, VOID* v);
14 | VOID fini_ppm(INT32 code, VOID* v);
15 |
16 | VOID instrument_ppm_cond_br(INS ins);
17 | VOID ppm_instr_interval_output();
18 | VOID ppm_instr_interval_reset();
19 |
--------------------------------------------------------------------------------
/mica_reg.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * This file is part of MICA, a Pin tool to collect
3 | * microarchitecture-independent program characteristics using the Pin
4 | * instrumentation framework.
5 | *
6 | * Please see the README.txt file distributed with the MICA release for more
7 | * information.
8 | */
9 |
10 | #include "pin.H"
11 |
12 | /* MICA includes */
13 | #include "mica_reg.h"
14 |
15 | /* Global variables */
16 |
17 | extern INT64 interval_size;
18 | extern INT64 interval_ins_count;
19 | extern INT64 interval_ins_count_for_hpc_alignment;
20 | extern INT64 total_ins_count;
21 | extern INT64 total_ins_count_for_hpc_alignment;
22 |
23 | ofstream output_file_reg;
24 |
25 | UINT64* opCounts; // array which keeps track of number-of-operands-per-instruction stats
26 | BOOL* regRef; // register references
27 | INT64* PCTable; // production addresses of registers
28 | INT64* regUseCnt; // usage counters for each register
29 | INT64* regUseDistr; // distribution of register usage
30 | INT64* regAgeDistr; // distribution of register ages
31 |
32 | /* initializing */
33 | void init_reg(){
34 |
35 | int i;
36 |
37 | /* initializing total instruction counts is done in mica.cpp */
38 |
39 | /* allocate memory */
40 | opCounts = (UINT64*) checked_malloc(MAX_NUM_OPER * sizeof(UINT64));
41 | regRef = (BOOL*) checked_malloc(MAX_NUM_REGS * sizeof(BOOL));
42 | PCTable = (INT64*) checked_malloc(MAX_NUM_REGS * sizeof(INT64));
43 | regUseCnt = (INT64*) checked_malloc(MAX_NUM_REGS * sizeof(INT64));
44 | regUseDistr = (INT64*) checked_malloc(MAX_REG_USE * sizeof(INT64));
45 | regAgeDistr = (INT64*) checked_malloc(MAX_COMM_DIST * sizeof(INT64));
46 |
47 | /* initialize */
48 | for(i = 0; i < MAX_NUM_OPER; i++){
49 | opCounts[i] = 0;
50 | }
51 | for(i = 0; i < MAX_NUM_REGS; i++){
52 | regRef[i] = false;
53 | PCTable[i] = 0;
54 | regUseCnt[i] = 0;
55 | }
56 | for(i = 0; i < MAX_REG_USE; i++){
57 | regUseDistr[i] = 0;
58 | }
59 | for(i = 0; i < MAX_COMM_DIST; i++){
60 | regAgeDistr[i] = 0;
61 | }
62 |
63 | if(interval_size != -1){
64 | output_file_reg.open(mkfilename("reg_phases_int"), ios::out|ios::trunc);
65 | output_file_reg.close();
66 | }
67 | }
68 |
69 | /* read register operand */
70 | VOID readRegOp_reg(UINT32 regId){
71 |
72 | /* *** REG *** */
73 |
74 |
75 | /* register age */
76 | INT64 age = total_ins_count - PCTable[regId]; // dependency distance
77 | if(age >= MAX_COMM_DIST){
78 | age = MAX_COMM_DIST - 1; // trim if needed
79 | }
80 | //assert(age >= 0);
81 | regAgeDistr[age]++;
82 |
83 | /* register usage */
84 | regUseCnt[regId]++;
85 | regRef[regId] = 1; // (operand) register was referenced
86 | }
87 |
88 | VOID writeRegOp_reg(UINT32 regId){
89 |
90 | /* *** REG *** */
91 | UINT32 num;
92 |
93 | /* if register was referenced before, adjust use distribution */
94 | if(regRef[regId]){
95 | num = regUseCnt[regId];
96 | if(num >= MAX_REG_USE) // trim if needed
97 | num = MAX_REG_USE - 1;
98 | //assert(num >= 0);
99 | regUseDistr[num]++;
100 | }
101 |
102 | /* reset register stuff because of new value produced */
103 |
104 | PCTable[regId] = total_ins_count; // last production = now
105 | regUseCnt[regId] = 0; // new value is never used (yet)
106 | regRef[regId] = true; // (destination) register was referenced (for tracking use distribution)
107 | }
108 |
109 | VOID reg_instr_full(VOID* _e){
110 |
111 | /* counting instructions is done in all_instr_full() */
112 |
113 | ins_buffer_entry* e = (ins_buffer_entry*)_e;
114 |
115 | INT32 i;
116 |
117 | for(i=0; i < e->regReadCnt; i++){
118 | readRegOp_reg((UINT32)e->regsRead[i]);
119 | }
120 | for(i=0; i < e->regWriteCnt; i++){
121 | writeRegOp_reg((UINT32)e->regsWritten[i]);
122 | }
123 |
124 | opCounts[e->regOpCnt]++;
125 | }
126 |
127 | ADDRINT reg_instr_intervals(VOID* _e) {
128 |
129 | /* counting instructions is done in all_instr_intervals() */
130 |
131 | ins_buffer_entry* e = (ins_buffer_entry*)_e;
132 |
133 | INT32 i;
134 |
135 | for(i=0; i < e->regReadCnt; i++){
136 | readRegOp_reg((UINT32)e->regsRead[i]);
137 | }
138 | for(i=0; i < e->regWriteCnt; i++){
139 | writeRegOp_reg((UINT32)e->regsWritten[i]);
140 | }
141 |
142 | opCounts[e->regOpCnt]++;
143 |
144 | return (ADDRINT) (interval_ins_count_for_hpc_alignment == interval_size);
145 | }
146 |
147 | VOID reg_instr_interval_output(){
148 | int i;
149 |
150 | output_file_reg.open(mkfilename("reg_phases_int"), ios::out|ios::app);
151 |
152 | UINT64 totNumOps = 0;
153 | UINT64 num;
154 |
155 | /* total number of operands */
156 | for(i = 1; i < MAX_NUM_OPER; i++){
157 | totNumOps += opCounts[i]*i;
158 | }
159 | output_file_reg << interval_size << " " << totNumOps;
160 |
161 | /* average degree of use */
162 | num = 0;
163 | for(i = 0; i < MAX_REG_USE; i++){
164 | num += regUseDistr[i];
165 | }
166 | output_file_reg << " " << num;
167 | num = 0;
168 | for(i = 0; i < MAX_REG_USE; i++){
169 | num += i * regUseDistr[i];
170 | }
171 | output_file_reg << " " << num;
172 |
173 | /* register dependency distributions */
174 | num = 0;
175 | for(i = 0; i < MAX_COMM_DIST; i++){
176 | num += regAgeDistr[i];
177 | }
178 | output_file_reg << " " << num;
179 | num = 0;
180 | for(i = 0; i < MAX_COMM_DIST; i++){
181 | num += regAgeDistr[i];
182 | if( (i == 1) || (i == 2) || (i == 4) || (i == 8) || (i == 16) || (i == 32) || (i == 64)){
183 | output_file_reg << " " << num;
184 | }
185 | }
186 | output_file_reg << endl;
187 |
188 | output_file_reg.close();
189 | }
190 |
191 | VOID reg_instr_interval_reset(){
192 |
193 | int i;
194 |
195 | for(i = 0; i < MAX_NUM_OPER; i++){
196 | opCounts[i] = 0;
197 | }
198 | /* do NOT reset register use counts or register definition addresses
199 | * that should only be done when the register is written to */
200 | /* for(i = 0; i < MAX_NUM_REGS; i++){
201 | regRef[i] = false;
202 | PCTable[i] = 0;
203 | regUseCnt[i] = 0;
204 | } */
205 | for(i = 0; i < MAX_REG_USE; i++){
206 | regUseDistr[i] = 0;
207 | }
208 | for(i = 0; i < MAX_COMM_DIST; i++){
209 | regAgeDistr[i] = 0;
210 | }
211 | }
212 |
213 | VOID reg_instr_interval() {
214 |
215 | reg_instr_interval_output();
216 | reg_instr_interval_reset();
217 | interval_ins_count = 0;
218 | interval_ins_count_for_hpc_alignment = 0;
219 |
220 | }
221 |
222 | VOID instrument_reg(INS ins, ins_buffer_entry* e){
223 |
224 |
225 | UINT32 i, maxNumRegsProd, maxNumRegsCons, regReadCnt, regWriteCnt, opCnt, regOpCnt;
226 | REG reg;
227 |
228 | if(!e->setRead){
229 |
230 | maxNumRegsCons = INS_MaxNumRRegs(ins); // maximum number of register consumations (reads)
231 |
232 | regReadCnt = 0;
233 | for(i = 0; i < maxNumRegsCons; i++){ // finding all register operands which are read
234 | reg = INS_RegR(ins,i);
235 | //assert(((UINT32)reg) < MAX_NUM_REGS);
236 | /* only consider valid general-purpose registers (any bit-width) and floating-point registers,
237 | * i.e. exlude branch, segment and pin registers, among others */
238 | if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){
239 | regReadCnt++;
240 | }
241 | }
242 |
243 | e->regReadCnt = regReadCnt;
244 | e->regsRead = (REG*) checked_malloc(regReadCnt*sizeof(REG));
245 |
246 | regReadCnt = 0;
247 | for(i = 0; i < maxNumRegsCons; i++){ // finding all register operands which are read
248 | reg = INS_RegR(ins,i);
249 | //assert(((UINT32)reg) < MAX_NUM_REGS);
250 | /* only consider valid general-purpose registers (any bit-width) and floating-point registers,
251 | * i.e. exlude branch, segment and pin registers, among others */
252 | if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){
253 | e->regsRead[regReadCnt++] = reg;
254 | }
255 | }
256 | e->setRead = true;
257 | }
258 | if(!e->setWritten){
259 |
260 | maxNumRegsProd = INS_MaxNumWRegs(ins);
261 |
262 | regWriteCnt = 0;
263 | for(i=0; i < maxNumRegsProd; i++){
264 |
265 | reg = INS_RegW(ins, i);
266 | //assert(((UINT32)reg) < MAX_NUM_REGS);
267 | /* only consider valid general-purpose registers (any bit-width) and floating-point registers,
268 | * i.e. exlude branch, segment and pin registers, among others */
269 | if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){
270 | regWriteCnt++;
271 | }
272 | }
273 |
274 | e->regWriteCnt = regWriteCnt;
275 | e->regsWritten = (REG*)checked_malloc(regWriteCnt*sizeof(REG));
276 |
277 | regWriteCnt = 0;
278 | for(i=0; i < maxNumRegsProd; i++){
279 |
280 | reg = INS_RegW(ins, i);
281 | //assert(((UINT32)reg) < MAX_NUM_REGS);
282 | /* only consider valid general-purpose registers (any bit-width) and floating-point registers,
283 | * i.e. exlude branch, segment and pin registers, among others */
284 | if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){
285 | e->regsWritten[regWriteCnt++] = reg;
286 | }
287 | }
288 |
289 |
290 | e->setWritten = true;
291 | }
292 |
293 | if(!e->setRegOpCnt){
294 | regOpCnt = 0;
295 | opCnt = INS_OperandCount(ins);
296 | for(i = 0; i < opCnt; i++){
297 | if(INS_OperandIsReg(ins,i))
298 | regOpCnt++;
299 | }
300 | /*if(regOpCnt >= MAX_NUM_OPER){
301 | cerr << "BOOM! -> MAX_NUM_OPER is exceeded! (" << regOpCnt << ")" << endl;
302 | exit(1);
303 | }*/
304 | e->regOpCnt = regOpCnt;
305 | e->setRegOpCnt = true;
306 | }
307 |
308 | if(interval_size == -1){
309 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)reg_instr_full, IARG_PTR, (void*)e, IARG_END);
310 | }
311 | else{
312 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)reg_instr_intervals, IARG_PTR, (void*)e, IARG_END);
313 | /* only called if interval is full */
314 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)reg_instr_interval, IARG_END);
315 | }
316 | }
317 |
318 | /* finishing... */
319 | VOID fini_reg(INT32 code, VOID* v){
320 |
321 | if(interval_size == -1){
322 | output_file_reg.open(mkfilename("reg_full_int"), ios::out|ios::trunc);
323 | //output_file_reg << total_ins_count;
324 | }
325 | else{
326 | output_file_reg.open(mkfilename("reg_phases_int"), ios::out|ios::app);
327 | //output_file_reg << interval_ins_count;
328 | }
329 |
330 | int i;
331 | UINT64 totNumOps = 0;
332 | UINT64 num;
333 | /* total number of operands */
334 | for(i = 1; i < MAX_NUM_OPER; i++){
335 | totNumOps += opCounts[i]*i;
336 | }
337 | output_file_reg << totNumOps;
338 |
339 | // ** average degree of use **
340 | num = 0;
341 | for(i = 0; i < MAX_REG_USE; i++){
342 | num += regUseDistr[i];
343 | }
344 | output_file_reg << " " << num;
345 | num = 0;
346 | for(i = 0; i < MAX_REG_USE; i++){
347 | num += i * regUseDistr[i];
348 | }
349 | output_file_reg << " " << num;
350 |
351 | // ** register dependency distributions **
352 | num = 0;
353 | for(i = 0; i < MAX_COMM_DIST; i++){
354 | num += regAgeDistr[i];
355 | }
356 | output_file_reg << " " << num;
357 | num = 0;
358 | for(i = 0; i < MAX_COMM_DIST; i++){
359 | num += regAgeDistr[i];
360 | if( (i == 1) || (i == 2) || (i == 4) || (i == 8) || (i == 16) || (i == 32) || (i == 64)){
361 | output_file_reg << " " << num;
362 | }
363 | }
364 | output_file_reg << endl;
365 | //output_file_reg << "number of instructions: " << total_ins_count_for_hpc_alignment << endl;
366 | output_file_reg << " ";
367 | output_file_reg.close();
368 | }
369 |
--------------------------------------------------------------------------------
/mica_reg.h:
--------------------------------------------------------------------------------
1 | /*
2 | * This file is part of MICA, a Pin tool to collect
3 | * microarchitecture-independent program characteristics using the Pin
4 | * instrumentation framework.
5 | *
6 | * Please see the README.txt file distributed with the MICA release for more
7 | * information.
8 | */
9 |
10 | #include "mica.h"
11 | #include "mica_utils.h"
12 |
13 | void init_reg();
14 | VOID instrument_reg(INS ins, ins_buffer_entry* e);
15 | VOID fini_reg(INT32 code, VOID* v);
16 |
17 | VOID reg_instr_full(VOID* _e);
18 | ADDRINT reg_instr_intervals(VOID* _e);
19 | VOID reg_instr_interval_output();
20 | VOID reg_instr_interval_reset();
21 |
22 |
--------------------------------------------------------------------------------
/mica_stride.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * This file is part of MICA, a Pin tool to collect
3 | * microarchitecture-independent program characteristics using the Pin
4 | * instrumentation framework.
5 | *
6 | * Please see the README.txt file distributed with the MICA release for more
7 | * information.
8 | */
9 |
10 | #include "pin.H"
11 |
12 | /* MICA includes */
13 | #include "mica_utils.h"
14 | #include "mica_stride.h"
15 |
16 | /* Global variables */
17 |
18 | extern INT64 interval_size;
19 | extern INT64 interval_ins_count;
20 | extern INT64 interval_ins_count_for_hpc_alignment;
21 | extern INT64 total_ins_count;
22 | extern INT64 total_ins_count_for_hpc_alignment;
23 |
24 | ofstream output_file_stride;
25 |
26 | UINT64 numRead, numWrite;
27 | UINT32 readIndex;
28 | UINT32 writeIndex;
29 | ADDRINT* instrRead;
30 | ADDRINT* instrWrite;
31 | UINT64 numInstrsAnalyzed;
32 | UINT64 numReadInstrsAnalyzed;
33 | UINT64 numWriteInstrsAnalyzed;
34 | UINT64 localReadDistrib[MAX_DISTR];
35 | UINT64 globalReadDistrib[MAX_DISTR];
36 | UINT64 localWriteDistrib[MAX_DISTR];
37 | UINT64 globalWriteDistrib[MAX_DISTR];
38 | ADDRINT lastReadAddr;
39 | ADDRINT lastWriteAddr;
40 | ADDRINT* indices_memRead;
41 | UINT32 indices_memRead_size;
42 | ADDRINT* indices_memWrite;
43 | UINT32 indices_memWrite_size;
44 |
45 |
46 | /* initializing */
47 | void init_stride(){
48 |
49 | int i;
50 |
51 | /* initializing total instruction counts is done in mica.cpp */
52 |
53 | /* initial sizes */
54 | numRead = 1024;
55 | numWrite = 1024;
56 |
57 | /* allocate memory */
58 | instrRead = (ADDRINT*) checked_malloc(numRead * sizeof(ADDRINT));
59 | instrWrite = (ADDRINT*) checked_malloc(numWrite * sizeof(ADDRINT));
60 |
61 | /* initialize */
62 | readIndex = 1;
63 | writeIndex = 1;
64 | for (i = 0; i < (int)numRead; i++)
65 | instrRead[i] = 0;
66 | for (i = 0; i < (int)numWrite; i++)
67 | instrWrite[i] = 0;
68 | lastReadAddr = 0;
69 | lastWriteAddr = 0;
70 | for (i = 0; i < MAX_DISTR; i++) {
71 | localReadDistrib[i] = 0;
72 | localWriteDistrib[i] = 0;
73 | globalReadDistrib[i] = 0;
74 | globalWriteDistrib[i] = 0;
75 | }
76 | numInstrsAnalyzed = 0;
77 | numReadInstrsAnalyzed = 0;
78 | numWriteInstrsAnalyzed = 0;
79 |
80 | indices_memRead_size = 1024;
81 | indices_memRead = (ADDRINT*) checked_malloc(indices_memRead_size*sizeof(ADDRINT));
82 | for (i = 0; i < (int)indices_memRead_size; i++)
83 | indices_memRead[i] = 0;
84 |
85 | indices_memWrite_size = 1024;
86 | indices_memWrite = (ADDRINT*) checked_malloc(indices_memWrite_size*sizeof(ADDRINT));
87 | for (i = 0; i < (int)indices_memWrite_size; i++)
88 | indices_memWrite[i] = 0;
89 |
90 | if(interval_size != -1){
91 | output_file_stride.open(mkfilename("stride_phases_int"), ios::out|ios::trunc);
92 | output_file_stride.close();
93 | }
94 | }
95 |
96 | /*VOID stride_instr_full(){
97 | }*/
98 |
99 | ADDRINT stride_instr_intervals(){
100 | /* counting instructions is done in all_instr_intervals() */
101 |
102 | return (ADDRINT) (interval_ins_count_for_hpc_alignment == interval_size);
103 | }
104 |
105 | VOID stride_instr_interval_output(){
106 | int i;
107 |
108 | UINT64 cum;
109 |
110 | output_file_stride.open(mkfilename("stride_phases_int"), ios::out|ios::app);
111 |
112 | output_file_stride << numReadInstrsAnalyzed;
113 | /* local read distribution */
114 | cum = 0;
115 | for(i = 0; i < MAX_DISTR; i++){
116 | cum += localReadDistrib[i];
117 | if( (i == 0) || (i == 8) || (i == 64) || (i == 512) || (i == 4096) || (i == 32768) || (i == 262144) ){
118 | output_file_stride << " " << cum;
119 | }
120 | if(i == 262144)
121 | break;
122 | }
123 | /* global read distribution */
124 | cum = 0;
125 | for(i = 0; i < MAX_DISTR; i++){
126 | cum += globalReadDistrib[i];
127 | if( (i == 0) || (i == 8) || (i == 64) || (i == 512) || (i == 4096) || (i == 32768) || (i == 262144) ){
128 | output_file_stride << " " << cum;
129 | }
130 | if(i == 262144)
131 | break;
132 | }
133 | output_file_stride << " " << numWriteInstrsAnalyzed;
134 | /* local write distribution */
135 | cum = 0;
136 | for(i = 0; i < MAX_DISTR; i++){
137 | cum += localWriteDistrib[i];
138 | if( (i == 0) || (i == 8) || (i == 64) || (i == 512) || (i == 4096) || (i == 32768) || (i == 262144) ){
139 | output_file_stride << " " << cum;
140 | }
141 | if(i == 262144)
142 | break;
143 | }
144 | /* global write distribution */
145 | cum = 0;
146 | for(i = 0; i < MAX_DISTR; i++){
147 | cum += globalWriteDistrib[i];
148 | if( (i == 0) || (i == 8) || (i == 64) || (i == 512) || (i == 4096) || (i == 32768) ){
149 | output_file_stride << " " << cum;
150 | }
151 | if(i == 262144){
152 | output_file_stride << " " << cum << endl;
153 | break;
154 | }
155 | }
156 | output_file_stride.close();
157 | }
158 |
159 | VOID stride_instr_interval_reset(){
160 | int i;
161 |
162 | for (i = 0; i < MAX_DISTR; i++) {
163 | localReadDistrib [i] = 0;
164 | localWriteDistrib [i] = 0;
165 | globalReadDistrib [i] = 0;
166 | globalWriteDistrib [i] = 0;
167 | }
168 | numInstrsAnalyzed = 0;
169 | numReadInstrsAnalyzed = 0;
170 | numWriteInstrsAnalyzed = 0;
171 | interval_ins_count = 0;
172 | interval_ins_count_for_hpc_alignment = 0;
173 | }
174 |
175 | void stride_instr_interval(){
176 |
177 | stride_instr_interval_output();
178 | stride_instr_interval_reset();
179 | }
180 |
181 | /* Finds indices for instruction at some address, given some list of index-instruction pairs
182 | * Note: the 'nth_occur' argument is needed because a single instruction can have two read memory operands (which both have a different index) */
183 | UINT32 index_memRead_stride(int nth_occur, ADDRINT ins_addr){
184 |
185 | UINT32 i;
186 | int j=0;
187 | for(i=1; i <= readIndex; i++){
188 | if(indices_memRead[i] == ins_addr)
189 | j++;
190 | if(j==nth_occur)
191 | return i; /* found */
192 | }
193 | return 0; /* not found */
194 | }
195 |
196 | /* We don't know the static number of read/write operations until
197 | * the entire program has executed, hence we dynamically allocate the arrays */
198 | VOID reallocate_readArray_stride(){
199 |
200 | ADDRINT* ptr;
201 |
202 | numRead *= 2;
203 |
204 | ptr = (ADDRINT*) checked_realloc(instrRead, numRead * sizeof(ADDRINT));
205 | /*if (ptr == (ADDRINT*) NULL) {
206 | cerr << "Not enough memory (in reallocate_readArray_stride)" << endl;
207 | exit(1);
208 | }*/
209 | instrRead = ptr;
210 | }
211 |
212 | UINT32 index_memWrite_stride(ADDRINT ins_addr){
213 |
214 | UINT32 i;
215 | for(i=1; i <= writeIndex; i++){
216 | if(indices_memWrite[i] == ins_addr)
217 | return i; /* found */
218 | }
219 | return 0; /* not found */
220 | }
221 |
222 |
223 | VOID reallocate_writeArray_stride(){
224 |
225 | ADDRINT* ptr;
226 |
227 | numWrite *= 2;
228 |
229 | ptr = (ADDRINT*) checked_realloc(instrWrite, numWrite * sizeof(ADDRINT));
230 | /*if (ptr == (ADDRINT*) NULL) {
231 | cerr << "Not enough memory (in reallocate_writeArray_stride)" << endl;
232 | exit(1);
233 | }*/
234 | instrWrite = ptr;
235 | }
236 |
237 | void register_memRead_stride(ADDRINT ins_addr){
238 |
239 | ADDRINT* ptr;
240 |
241 | /* reallocation needed */
242 | if(readIndex >= indices_memRead_size){
243 |
244 | indices_memRead_size *= 2;
245 | ptr = (ADDRINT*) realloc(indices_memRead, indices_memRead_size*sizeof(ADDRINT));
246 | /*if(ptr == (ADDRINT*)NULL){
247 | cerr << "Could not allocate memory (realloc in register_readMem)!" << endl;
248 | exit(1);
249 | }*/
250 | indices_memRead = ptr;
251 |
252 | }
253 |
254 | /* register instruction to index */
255 | indices_memRead[readIndex++] = ins_addr;
256 | }
257 |
258 | void register_memWrite_stride(ADDRINT ins_addr){
259 |
260 | ADDRINT* ptr;
261 |
262 | /* reallocation needed */
263 | if(writeIndex >= indices_memWrite_size){
264 |
265 | indices_memWrite_size *= 2;
266 | ptr = (ADDRINT*) realloc(indices_memWrite, indices_memWrite_size*sizeof(ADDRINT));
267 | /*if(ptr == (ADDRINT*)NULL){
268 | cerr << "Could not allocate memory (realloc in register_writeMem)!" << endl;
269 | exit(1);
270 | }*/
271 | indices_memWrite = ptr;
272 |
273 | }
274 |
275 | /* register instruction to index */
276 | indices_memWrite[writeIndex++] = ins_addr;
277 | }
278 |
279 | VOID readMem_stride(UINT32 index, ADDRINT effAddr, ADDRINT size){
280 |
281 | ADDRINT stride;
282 |
283 | numReadInstrsAnalyzed++;
284 |
285 | /* local stride */
286 | /* avoid negative values, has to be done like this (not stride < 0 => stride = -stride (avoid problems with unsigned values)) */
287 | if(effAddr > instrRead[index])
288 | stride = effAddr - instrRead[index];
289 | else
290 | stride = instrRead[index] - effAddr;
291 | if(stride >= MAX_DISTR){
292 | stride = MAX_DISTR-1; // trim if needed
293 | }
294 |
295 | localReadDistrib[stride]++;
296 | instrRead[index] = effAddr + size - 1;
297 |
298 | /* global stride */
299 | /* avoid negative values, has to be done like this (not stride < 0 => stride = -stride (avoid problems with unsigned values)) */
300 | if(effAddr > lastReadAddr)
301 | stride = effAddr - lastReadAddr;
302 | else
303 | stride = lastReadAddr - effAddr;
304 | if(stride >= MAX_DISTR){
305 | stride = MAX_DISTR-1; // trim if needed
306 | }
307 |
308 | globalReadDistrib[stride]++;
309 | lastReadAddr = effAddr + size - 1;
310 | }
311 |
312 | VOID writeMem_stride(UINT32 index, ADDRINT effAddr, ADDRINT size){
313 |
314 | ADDRINT stride;
315 |
316 | numWriteInstrsAnalyzed++;
317 |
318 | /* local stride */
319 | /* avoid negative values, has to be doen like this (not stride < 0 => stride = -stride) */
320 | if(effAddr > instrWrite[index])
321 | stride = effAddr - instrWrite[index];
322 | else
323 | stride = instrWrite[index] - effAddr;
324 | if(stride >= MAX_DISTR){
325 | stride = MAX_DISTR-1; // trim if needed
326 | }
327 |
328 | localWriteDistrib[stride]++;
329 | instrWrite[index] = effAddr + size - 1;
330 |
331 | /* global stride */
332 | /* avoid negative values, has to be doen like this (not stride < 0 => stride = -stride) */
333 | if(effAddr > lastWriteAddr)
334 | stride = effAddr - lastWriteAddr;
335 | else
336 | stride = lastWriteAddr - effAddr;
337 | if(stride >= MAX_DISTR){
338 | stride = MAX_DISTR-1; // trim if needed
339 | }
340 |
341 | globalWriteDistrib[stride]++;
342 | lastWriteAddr = effAddr + size - 1;
343 | }
344 |
345 | UINT32 stride_index_memRead1(ADDRINT a){
346 |
347 | UINT32 index = index_memRead_stride(1, a);
348 | if(index < 1){
349 | if(readIndex >= numRead){
350 | reallocate_readArray_stride();
351 | }
352 | index = readIndex;
353 |
354 | register_memRead_stride(a);
355 | }
356 | return index;
357 | }
358 |
359 | UINT32 stride_index_memRead2(ADDRINT a){
360 | UINT32 index = index_memRead_stride(2, a);
361 | if(index < 1){
362 | if(readIndex >= numRead){
363 | reallocate_readArray_stride();
364 | }
365 | index = readIndex;
366 |
367 | register_memRead_stride(a);
368 | }
369 | return index;
370 | }
371 |
372 | UINT32 stride_index_memWrite(ADDRINT a){
373 | UINT32 index = index_memWrite_stride(a);
374 | if(index < 1){
375 | if(writeIndex >= numWrite)
376 | reallocate_writeArray_stride();
377 | index = writeIndex;
378 | register_memWrite_stride(a);
379 | }
380 | return index;
381 | }
382 |
383 | /* instrumenting (instruction level) */
384 | VOID instrument_stride(INS ins, VOID* v){
385 |
386 | UINT32 index;
387 |
388 | if( INS_IsMemoryRead(ins) ){ // instruction has memory read operand
389 |
390 | index = stride_index_memRead1(INS_Address(ins));
391 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)readMem_stride, IARG_UINT32, index, IARG_MEMORYREAD_EA, IARG_MEMORYREAD_SIZE, IARG_END);
392 |
393 | if( INS_HasMemoryRead2(ins) ){ // second memory read operand
394 |
395 | index = stride_index_memRead2(INS_Address(ins));
396 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)readMem_stride, IARG_UINT32, index, IARG_MEMORYREAD2_EA, IARG_MEMORYREAD_SIZE, IARG_END);
397 | }
398 | }
399 |
400 | if( INS_IsMemoryWrite(ins) ){ // instruction has memory write operand
401 | index = stride_index_memWrite(INS_Address(ins));
402 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)writeMem_stride, IARG_UINT32, index, IARG_MEMORYWRITE_EA, IARG_MEMORYWRITE_SIZE, IARG_END);
403 |
404 | }
405 |
406 | /* inserting calls for counting instructions (full) is done in mica.cpp */
407 |
408 | if(interval_size != -1){
409 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)stride_instr_intervals, IARG_END);
410 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)stride_instr_interval, IARG_END);
411 | }
412 | }
413 |
414 | /* finishing... */
415 | VOID fini_stride(INT32 code, VOID* v){
416 |
417 | int i;
418 |
419 | UINT64 cum;
420 |
421 | if(interval_size == -1){
422 | output_file_stride.open(mkfilename("stride_full_int"), ios::out|ios::trunc);
423 | }
424 | else{
425 | output_file_stride.open(mkfilename("stride_phases_int"), ios::out|ios::app);
426 | }
427 | output_file_stride << numReadInstrsAnalyzed;
428 | /* local read distribution */
429 | cum = 0;
430 | for(i = 0; i < MAX_DISTR; i++){
431 | cum += localReadDistrib[i];
432 | if( (i == 0) || (i == 8) || (i == 64) || (i == 512) || (i == 4096) || (i == 32768) || (i == 262144) ){
433 | output_file_stride << " " << cum;
434 | }
435 | if(i == 262144)
436 | break;
437 | }
438 | /* global read distribution */
439 | cum = 0;
440 | for(i = 0; i < MAX_DISTR; i++){
441 | cum += globalReadDistrib[i];
442 | if( (i == 0) || (i == 8) || (i == 64) || (i == 512) || (i == 4096) || (i == 32768) || (i == 262144) ){
443 | output_file_stride << " " << cum;
444 | }
445 | if(i == 262144)
446 | break;
447 | }
448 | output_file_stride << " " << numWriteInstrsAnalyzed;
449 | /* local write distribution */
450 | cum = 0;
451 | for(i = 0; i < MAX_DISTR; i++){
452 | cum += localWriteDistrib[i];
453 | if( (i == 0) || (i == 8) || (i == 64) || (i == 512) || (i == 4096) || (i == 32768) || (i == 262144) ){
454 | output_file_stride << " " << cum;
455 | }
456 | if(i == 262144)
457 | break;
458 | }
459 | /* global write distribution */
460 | cum = 0;
461 | for(i = 0; i < MAX_DISTR; i++){
462 | cum += globalWriteDistrib[i];
463 | if( (i == 0) || (i == 8) || (i == 64) || (i == 512) || (i == 4096) || (i == 32768) ){
464 | output_file_stride << " " << cum;
465 | }
466 | if(i == 262144){
467 | output_file_stride << " " << cum << endl;
468 | break;
469 | }
470 | }
471 | //output_file_stride << "number of instructions: " << total_ins_count_for_hpc_alignment << endl;
472 | output_file_stride.close();
473 | }
474 |
--------------------------------------------------------------------------------
/mica_stride.h:
--------------------------------------------------------------------------------
1 | /*
2 | * This file is part of MICA, a Pin tool to collect
3 | * microarchitecture-independent program characteristics using the Pin
4 | * instrumentation framework.
5 | *
6 | * Please see the README.txt file distributed with the MICA release for more
7 | * information.
8 | */
9 |
10 | #include "mica.h"
11 |
12 | void init_stride();
13 | VOID instrument_stride(INS ins, VOID* v);
14 | VOID fini_stride(INT32 code, VOID* v);
15 |
16 | UINT32 stride_index_memRead1(ADDRINT a);
17 | UINT32 stride_index_memRead2(ADDRINT a);
18 | UINT32 stride_index_memWrite(ADDRINT a);
19 |
20 | VOID readMem_stride(UINT32 index, ADDRINT effAddr, ADDRINT size);
21 | VOID writeMem_stride(UINT32 index, ADDRINT effAdrr, ADDRINT size);
22 |
23 | VOID stride_instr_interval_output();
24 | VOID stride_instr_interval_reset();
25 |
--------------------------------------------------------------------------------
/mica_utils.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * This file is part of MICA, a Pin tool to collect
3 | * microarchitecture-independent program characteristics using the Pin
4 | * instrumentation framework.
5 | *
6 | * Please see the README.txt file distributed with the MICA release for more
7 | * information.
8 | */
9 |
10 | /* MICA includes */
11 | #include "mica_utils.h"
12 |
13 | /* lookup memNode for key in table
14 | * returns NULL is no such memNode is found
15 | */
16 | memNode* lookup(nlist** table, ADDRINT key){
17 |
18 | nlist* np;
19 |
20 | for (np = table[key % MAX_MEM_TABLE_ENTRIES]; np != (nlist*)NULL; np = np->next){
21 | if(np-> id == key)
22 | return np->mem;
23 | }
24 |
25 | return (memNode*)NULL;
26 | }
27 |
28 | /* install new memNode in table */
29 | memNode* install(nlist** table, ADDRINT key){
30 |
31 | nlist* np;
32 | ADDRINT index;
33 |
34 | index = key % MAX_MEM_TABLE_ENTRIES;
35 |
36 | np = table[index];
37 |
38 | if(np == (nlist*)NULL) {
39 | np = (nlist*)checked_malloc(sizeof(nlist));
40 | table[index] = np;
41 | }
42 | else{
43 | while(np->next != (nlist*)NULL){
44 | np = np->next;
45 | }
46 | np->next = (nlist*)checked_malloc(sizeof(nlist));
47 | np = np->next;
48 | }
49 | np->next = (nlist*)NULL;
50 | np->id = key;
51 | np->mem = (memNode*)checked_malloc(sizeof(memNode));
52 | for(ADDRINT i = 0; i < MAX_MEM_ENTRIES; i++){
53 | (np->mem)->timeAvailable[i] = 0;
54 | }
55 | for(ADDRINT i = 0; i < MAX_MEM_BLOCK; i++){
56 | (np->mem)->numReferenced[i] = false;
57 | }
58 | return (np->mem);
59 | }
60 |
61 | /**
62 | * Free a nlist and set the pointer to NULL.
63 | */
64 | void free_nlist(nlist*& np) {
65 | nlist* np_rm;
66 | while(np != (nlist*)NULL){
67 | np_rm = np;
68 | np = np->next;
69 | free(np_rm->mem);
70 | free(np_rm);
71 | }
72 | }
73 |
--------------------------------------------------------------------------------
/mica_utils.h:
--------------------------------------------------------------------------------
1 | /*
2 | * This file is part of MICA, a Pin tool to collect
3 | * microarchitecture-independent program characteristics using the Pin
4 | * instrumentation framework.
5 | *
6 | * Please see the README.txt file distributed with the MICA release for more
7 | * information.
8 | */
9 |
10 | #include "mica.h"
11 |
12 | #ifndef MICA_UTILS
13 |
14 | #define MICA_UTILS
15 |
16 |
17 | /* *** utility functions *** */
18 |
19 | #define WRAP(x) #x
20 | #define REWRAP(x) WRAP(x)
21 | #define LOCATION __BASE_FILE__ ":" __FILE__ ":" REWRAP(__LINE__)
22 |
23 | #define checked_malloc(size) ({ void *result = malloc (size); if (__builtin_expect (!result, false)) { ERROR_MSG ("Out of memory at " LOCATION "."); exit (1); }; result; })
24 | #define checked_strdup(string) ({ char *result = strdup (string); if (__builtin_expect (!result, false)) { ERROR_MSG ("Out of memory at " LOCATION "."); exit (1); }; result; })
25 | #define checked_realloc(ptr, size) ({ void *result = realloc (ptr, size); if (__builtin_expect (!result, false)) { ERROR_MSG ("Out of memory at " LOCATION "."); exit (1); }; result; })
26 |
27 |
28 | /* *** struct definitions *** */
29 |
30 | /* memory node struct */
31 | typedef struct memNode_type{
32 | /* ilp */
33 | INT32 timeAvailable[MAX_MEM_ENTRIES];
34 | /* memfootprint */
35 | bool numReferenced [MAX_MEM_BLOCK];
36 | } memNode;
37 |
38 | /* linked list struct */
39 | typedef struct nlist_type {
40 | ADDRINT id;
41 | memNode* mem;
42 | struct nlist_type* next;
43 | } nlist;
44 |
45 | memNode* lookup(nlist** table, ADDRINT key);
46 | memNode* install(nlist** table, ADDRINT key);
47 | void free_nlist(nlist*& np);
48 |
49 | typedef struct ins_buffer_entry_type {
50 | ADDRINT insAddr;
51 | BOOL setRead;
52 | BOOL setWritten;
53 | BOOL setRegOpCnt;
54 | INT32 regOpCnt;
55 | INT32 regReadCnt;
56 | REG* regsRead;
57 | INT32 regWriteCnt;
58 | REG* regsWritten;
59 | ins_buffer_entry_type* next;
60 | } ins_buffer_entry;
61 |
62 | #endif
63 |
--------------------------------------------------------------------------------
/tableGen.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Amir H. Ashouri - 2017
4 | # (www.eecg.toronto.edu/~aashouri/)
5 | # This script looks for all MICA output files corresponds to a pid and generates a MICA table. The first row is the header and is added as well.
6 | # Tested with MICA v0.40
7 |
8 | benchmarks=*
9 |
10 | echo -n "APPLICATION_NAME DATASET totInstruction ILP32 ILP64 ILP128 ILP256 total_ins_count_for_hpc_alignment totInstruction mem-read mem-write control-flow arithmetic floating-point stack shift string sse other nop InstrFootprint64 InstrFootprint4k DataFootprint64 DataFootprint4k mem_access memReuseDist0-2 memReuseDist2-4 memReuseDist4-8 memReuseDist8-16 memReuseDist16-32 memReuseDist32-64 memReuseDist64-128 memReuseDist128-256 memReuseDist256-512 memReuseDist512-1k memReuseDist1k-2k memReuseDist2k-4k memReuseDist4k-8k memReuseDist8k-16k memReuseDist16k-32k memReuseDist32k-64k memReuseDist64k-128k memReuseDist128k-256k memReuseDist256k-512k memReuseDist512k-00 GAg_mispred_cnt_4bits PAg_mispred_cnt_4bits GAs_mispred_cnt_4bits PAs_mispred_cnt_4bits GAg_mispred_cnt_8bits PAg_mispred_cnt_8bits GAs_mispred_cnt_8bits PAs_mispred_cnt_8bits GAg_mispred_cnt_12bits PAg_mispred_cnt_12bits GAs_mispred_cnt_12bits PAs_mispred_cnt_12bits total_brCount total_transactionCount total_takenCount total_num_ops instr_reg_cnt total_reg_use_cnt total_reg_age reg_age_cnt_1 reg_age_cnt_2 reg_age_cnt_4 reg_age_cnt_8 reg_age_cnt_16 reg_age_cnt_32 reg_age_cnt_64 mem_read_cnt mem_read_local_stride_0 mem_read_local_stride_8 mem_read_local_stride_64 mem_read_local_stride_512 mem_read_local_stride_4096 mem_read_local_stride_32768 mem_read_local_stride_262144 mem_read_global_stride_0 mem_read_global_stride_8 mem_read_global_stride_64 mem_read_global_stride_512 mem_read_global_stride_4096 mem_read_global_stride_32768 mem_read_global_stride_262144 mem_write_cnt mem_write_local_stride_0 mem_write_local_stride_8 mem_write_local_stride_64 mem_write_local_stride_512 mem_write_local_stride_4096 mem_write_local_stride_32768 mem_write_local_stride_262144 mem_write_global_stride_0 mem_write_global_stride_8 mem_write_global_stride_64 mem_write_global_stride_512 mem_write_global_stride_4096 mem_write_global_stride_32768 mem_write_global_stride_262144" > micaTable.txt
11 |
12 | for i in $benchmarks
13 | do
14 | printf "$benchmarks"
15 |
16 | if [ -d "$i" ]
17 | then
18 | tmp=$PWD
19 | cd $i
20 | # *** process directory ***
21 | echo "**********************************************************"
22 | echo $i
23 | j_pid=1
24 | pidList=$(ls * |grep ilp_full_int_ |sed 's/ilp_full_int_//' |sed 's/_pin.out/ /' | tr -d "\n")
25 | for i_pid in $pidList
26 | do
27 | echo -en "\n$i dataset$j_pid " >> ../micaTable.txt
28 | cat *$i_pid* | tr -d "\n" >> ../micaTable.txt
29 | j_pid=$(($j_pid+1))
30 | done
31 | echo ""
32 | echo ""
33 | # *************************
34 |
35 | cd $tmp
36 | fi
37 |
38 | done
39 |
40 |
41 |
--------------------------------------------------------------------------------