├── LICENSE ├── Makefile ├── README.md ├── RELEASE_NOTES ├── index.html ├── itypes_default.spec ├── itypes_default_available ├── mica.conf.example ├── mica.cpp ├── mica.h ├── mica_all.cpp ├── mica_all.h ├── mica_ilp.cpp ├── mica_ilp.h ├── mica_init.cpp ├── mica_init.h ├── mica_itypes.cpp ├── mica_itypes.h ├── mica_memfootprint.cpp ├── mica_memfootprint.h ├── mica_memstackdist.cpp ├── mica_memstackdist.h ├── mica_ppm.cpp ├── mica_ppm.h ├── mica_reg.cpp ├── mica_reg.h ├── mica_stride.cpp ├── mica_stride.h ├── mica_utils.cpp ├── mica_utils.h └── tableGen.sh /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2007-2011, Kenneth Hoste and Lieven Eeckhout (Ghent University, Belgium) 2 | kehoste@elis.ugent.be, leeckhou@elis.ugent.be 3 | 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 10 | * Neither the name of the organization nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 11 | 12 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 13 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 14 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 15 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 16 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 17 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 18 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 19 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 20 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 21 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | ifdef PIN_ROOT 2 | CONFIG_ROOT := $(PIN_ROOT)/source/tools/Config 3 | else 4 | CONFIG_ROOT := ../Config 5 | endif 6 | 7 | include $(CONFIG_ROOT)/makefile.config 8 | include $(TOOLS_ROOT)/Config/makefile.default.rules 9 | CXXFLAGS = -std=gnu++11 -DVERBOSE -Wall -Werror -Wno-unknown-pragmas $(DBG) $(OPT) 10 | 11 | SRC_DIR := . 12 | SRC_FILES := $(wildcard $(SRC_DIR)/*.cpp) 13 | OBJ_FILES := $(patsubst $(SRC_DIR)/%.cpp,$(OBJDIR)%$(OBJ_SUFFIX),$(SRC_FILES)) 14 | 15 | all: $(OBJDIR)mica$(PINTOOL_SUFFIX) 16 | 17 | # Build the intermediate object file. 18 | $(OBJDIR)%$(OBJ_SUFFIX): %.cpp 19 | $(CXX) $(CXXFLAGS) $(TOOL_CXXFLAGS_NOOPT) $(COMP_OBJ)$@ $< 20 | 21 | $(OBJDIR)mica$(PINTOOL_SUFFIX): $(OBJ_FILES) 22 | $(LINKER) $(TOOL_LDFLAGS) $(LINK_EXE)$@ $^ $(TOOL_LPATHS) $(TOOL_LIBS) 23 | 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | MICA: Microarchitecture-Independent Characterization of Applications 2 | ==================================================================== 3 | version 1.0 4 | 5 | [Kenneth Hoste](http://kejo.be/ELIS/) & [Lieven Eeckhout](http://users.elis.ugent.be/~leeckhou/) (Ghent University, Belgium) 6 | 7 | Current maintainer: 8 | [Amir H. Ashouri](http://www.eecg.toronto.edu/~aashouri/) (University of Toronto, Canada) 9 | 10 | with contributions by: 11 | - Hamid Fadishei (multi-process support) 12 | - Petr Tuma (code cleanup) 13 | - Maxime Chéramy (cleanup, bug fixes, additional features) 14 | 15 | Websites: 16 | (http://boegel.kejo.be/ELIS/MICA) 17 | (http://www.elis.ugent.be/~kehoste/mica) 18 | 19 | A set of tutorial slides on MICA, which were presented at IISWC-2007 are 20 | available from the MICA website. 21 | 22 | # Disclaimer 23 | ------------ 24 | 25 | Currently, this software is only tested on Linux/x86 and [Pin.2.10](https://drive.google.com/file/d/0B-AkmAlNRsymNVl1RndzbFVpZEU/view?usp=drivesdk&resourcekey=0-YSVKSR2SXSpFSaYZvAA4Cg) We had users reporting the corect installation on [Pin-3.4](https://software.intel.com/en-us/articles/pin-a-binary-instrumentation-tool-downloads) and the details are [here](https://github.com/boegel/MICA/commit/1293082a05e97854e3ccc48490d5b72e765b48bf). Anyone who wants to use it on a different 26 | platform supported by Pin is free to do so, but should expect problems. We work on adapting MICA on newer Pin versions. 27 | 28 | Any problem reports or questions are welcome at kenneth.hoste@ugent.be . 29 | 30 | # Compilation 31 | -------------- 32 | 33 | The easiest way to compile MICA is to add unzip/untar mica_vXYZ.tar.gz to the source/tools 34 | directory of the Pin kit you are using. If you wish to place mica in a different 35 | directory, you'll have to adjust the makefile included accordingly. 36 | Running 'make' should produce the 'mica_v0-X' shared library. 37 | 38 | By default, MICA is built using the GCC C++ compiler (g++). 39 | Since Pin kit 39599 (March 2nd 2011), building Pin tools with the Intel compilers is 40 | also supported. To build MICA using the Intel C++ compiler, run "make CXX=icpc". 41 | Make sure /opt/intel/lib is added to the LD_LIBRARY_PATH environment variable to 42 | use MICA built using the Intel compilers. 43 | 44 | # Specifying type of analysis 45 | ----------------------------- 46 | 47 | MICA supports various types of microarchitecture-independent characteristics. 48 | It also allows to measure the characteristics either for the entire execution, or 49 | per interval of N dynamic instructions. 50 | 51 | Specifying the parameters is done using the mica.conf configuration file. 52 | A sample mica.conf file is provided with the distribution, and details 53 | on how to specify the parameters are found below. 54 | ``` 55 | analysis_type: all | ilp | ilp_one | itypes | ppm | reg | stride | memfootprint | memstackdist | custom 56 | interval_size: full | 57 | [ilp_size: ] 58 | [block_size: <2^size>] 59 | [page_size: <2^size>] 60 | [itypes_spec_file: ] 61 | ``` 62 | ## example: 63 | ``` 64 | analysis_type: all 65 | interval_size: 100000000 66 | block_size: 6 67 | page_size: 12 68 | itypes_spec_file: itypes_default.spec 69 | ``` 70 | 71 | specifies to measure all supported characteristics per interval of 100,000,000 instructions, 72 | with block size of 64 (2^6), page size of 4K (2^12), and using the instruction mix categories 73 | described in the file itypes_default.spec 74 | 75 | ## Usage 76 | ------- 77 | 78 | Using MICA is very easy; just run: 79 | ``` 80 | pin -t mica.so -- [] 81 | ``` 82 | The type of analysis is specified in the mica.conf file, and some 83 | logging is written to mica.log. 84 | 85 | ## Output files 86 | --------------- 87 | 88 | (I realize the output file names are a bit strange, but that's just the way I 89 | chose them... It's easy to adjust them yourself! ). 90 | ``` 91 | ilp: 92 | full: ilp_full_int_pin.out 93 | interval: ilp_phases_int_pin.out 94 | ilp_one: 95 | full: ilp_full_int_pin.out 96 | interval: ilp_phases_int_pin.out 97 | itypes: 98 | full: itypes_full_int_pin.out 99 | interval: itypes_phases_int_pin.out 100 | ppm: 101 | full: ppm_full_int_pin.out 102 | interval: ppm_phases_int_pin.out 103 | reg: 104 | full: reg_full_int_pin.out 105 | interval: reg_phases_int_pin.out 106 | stride: 107 | full: stride_full_int_pin.out 108 | interval: stride_phases_int_pin.out 109 | memfootprint: 110 | full: memfootprint_full_int_pin.out 111 | interval: memfootprint_phases_int_pin.out 112 | memstackdist: 113 | full: memstackdist_full_int_pin.out 114 | interval: memstackdist_phases_int_pin.out 115 | ``` 116 | 117 | ## Full execution metrics 118 | ----------------------------------- 119 | 120 | ### +++ ilp +++ 121 | 122 | Instruction-Level Parallellism (ILP) available for four different instruction 123 | window sizes (32, 64, 128, 256). 124 | This is measured by assuming perfect caches, perfect branch prediction, etc. 125 | The only limitations are the instruction window size and the data dependences. 126 | ``` 127 | analysis_type: ilp 128 | ``` 129 | Besides measuring these four window sizes at once, MICA also supports 130 | specifying a single window size, which is specified as follows (for 131 | characterizing the full run using an instruction window of 32 entries): 132 | ``` 133 | analysis_type: ilp_one 134 | interval_size: full 135 | ilp_size: 32 136 | ``` 137 | You can tweak the block size used using the block_size configuration parameter. 138 | 139 | ### +++ itypes +++ 140 | ``` 141 | analysis_type: itypes 142 | ``` 143 | ### +++ Instruction mix +++ 144 | 145 | The instruction mix is evaluated by categorizing the executed instructions. 146 | Because the x86 architecture isn't a load-store architecture, we count memory 147 | reads/writes seperately. The following categories are used by default (in order 148 | of output): 149 | ``` 150 | - memory read (instructions which read from memory) 151 | - memory write (instructions which write to memory) 152 | - control flow 153 | - arithmetic 154 | - floating-point 155 | - stack 156 | - shift 157 | - string 158 | - sse 159 | - other 160 | - nop 161 | ``` 162 | It is possible to redefine the instruction mix categories, by creating a specification 163 | file and mentioning it in the mica.conf file (itypes_spec_file). 164 | 165 | ### +++ ppm +++ 166 | ``` 167 | analysis_type: ppm 168 | ``` 169 | Branch predictability. 170 | 171 | The branch predictability of the conditional branches in the program is 172 | evaluated using a Prediction-by-Partial-Match (PPM) predictor, in 4 different 173 | configurations (global/local branch history, shared/seperate prediction 174 | table(s)), using 3 different history length (4,8,12 bits). Additionally, 175 | average taken and transition count are also being measured. 176 | 177 | ### +++ reg +++ 178 | ``` 179 | analysis_type: reg 180 | ``` 181 | ### Register traffic. 182 | 183 | The register traffic is analyzed in different aspects: 184 | ``` 185 | - average number of register operands 186 | - average degree of use 187 | - dependency distances (prob. <= D) 188 | 189 | Dependency distances are chosen in powers of 2, i.e. 1, 2, 4, 8, 16, 32, 64 190 | ``` 191 | ### +++ stride +++ 192 | ``` 193 | analysis_type: stride 194 | ``` 195 | Data stream strides. 196 | 197 | The distances between subsequent memory accesses are characterised by: 198 | ``` 199 | - local load (memory read) strides 200 | - global load (memory read) strides 201 | - local store (memory write) strides 202 | - global store (memory write) strides 203 | ``` 204 | Local means per static instruction accesses, global means over all 205 | instructions. The strides are characterized by powers of 8 (prob. <= 0, 8, 64, 206 | 512, 4096, 32768, 262144) 207 | 208 | ### +++ memfootprint +++ 209 | ``` 210 | analysis_type: memfootprint 211 | ``` 212 | Instruction and data memory footprint. 213 | 214 | The size of the instruction and data memory footprint is characterized by 215 | counting the number of blocks (64-byte) and pages (4KB) touched. This 216 | is done seperately for data and instruction addresses. 217 | 218 | ### +++ memstackdist +++ 219 | ``` 220 | analysis_type: memstackdist 221 | ``` 222 | Memory reuse distances. 223 | 224 | This is a highly valuable set of numbers to characterize the cache behavior 225 | of the application of interest. For each memory read, the corresponding 226 | 64-byte cache block is determined. For each cache block accessed, the number 227 | of unique cache blocks accessed since the last time it was referenced is 228 | determined, using a LRU stack. 229 | The reuse distances for all memory reads are reported in buckets. The first 230 | bucket is used for so called 'cold references'. The subsequent buckets capture reuse 231 | distances of [2^n, 2^(n+1)[, where n ranges from 0 to 18. The first of these 232 | actually captures [0,2[ (not [1,2[), while the last bucket, [2^18, 2^19[, captures all 233 | reuse distances larger then or equal to 2^18, so it's in fact [2^18, oo[. 234 | In total, this delivers 20 buckets, and the total number of memory accesses 235 | (the first number in the output), thus 21 numbers. 236 | 237 | For example: the fifth bucket, corresponds to accesses with reuse distance 238 | between 2^3 and 2^4 (or 8 64-byte cache blocks to 16 64-byte cache blocks). 239 | 240 | Note: because memory addresses vary over different executions of the same 241 | program, these numbers may vary slightly across multiple runs. Please be aware 242 | of this when using these metrics for research purposes. 243 | 244 | To track the progress of the MICA analysis being run, see the mica_progress.txt tool 245 | which shows how many dynamic instructions have been analyzed. Disabling this can be 246 | done by removing the -DVERBOSE flag in the Makefile and rebuilding MICA. 247 | 248 | ### * Interval metrics 249 | ------------------- 250 | 251 | Besides characterization total program execution, the tool is also capable of 252 | characterizing interval behavior. The analysis is identical to the tools 253 | above, but flush the state for each new each interval. 254 | 255 | ### +++ ilp +++ 256 | 257 | RESET: instruction and cycle counters (per interval), free memory used for 258 | memory address stuff (to avoid huge memory requirements for large workloads) 259 | 260 | DON'T TOUCH: instruction window contents; global instruction and cycle counters 261 | 262 | +++ itypes +++ 263 | 264 | RESET: instruction type counters 265 | 266 | +++ ppm +++ 267 | 268 | RESET: misprediction counts, taken/transition counts 269 | 270 | DON'T TOUCH: branch history tables 271 | 272 | +++ reg +++ 273 | 274 | RESET: operand counts, register use distribution and register age distribution 275 | 276 | DON'T TOUCH: register use counts (i.e. keep track of register use counts across 277 | interval boundaries); register definition addresses 278 | 279 | +++ stride +++ 280 | 281 | RESET: instruction counts (mem.read, mem.write, interval), distribution counts 282 | 283 | DON'T TOUCH: last (global/local) read/write memory addresses 284 | 285 | +++ memfootprint +++ 286 | 287 | RESET: reference counters, free memory used for memory address stuff (to avoid 288 | huge memory requirements for large workloads) 289 | 290 | DON'T TOUCH: - 291 | 292 | +++ memstackdist +++ 293 | 294 | RESET: bucket counts (including cold reference and memory access counts) 295 | 296 | DON't TOUCH: LRU stack (keep track of reuse distances over interval boundaries) 297 | 298 | * Measured in integer values, convert to floating-point 299 | ------------------------------------------------------- 300 | 301 | Because of historical reasons (problems with printing out floating-point 302 | numbers in certain situations with previous Pin kits), we only print out 303 | integer values and convert to floating-point metrics offline. This also allows 304 | aggregating data measured per interval to larger intervals or full execution 305 | for most characteristics. 306 | ``` 307 | S: interval size 308 | N: number of intervals 309 | I: number of instructions 310 | ``` 311 | +++ ilp +++ 312 | 313 | FORMAT: 314 | 315 | instruction_countcycle_count_win_size_1cycle_count_win_size_2...cycle_count_win_size_n 316 | 317 | CONVERSION: 318 | 319 | instruction_count/cycle_count 320 | ``` 321 | i.e. 322 | 1 to (N-1)th line: S/cycle_count_win_size_i 323 | Nth line: (I-N*S)/cycle_count_win_size_i 324 | ``` 325 | +++ itypes +++ 326 | 327 | FORMAT: 328 | 329 | instruction_cntmem_read_cntmem_write_cntcontrol_cntarith_cntfp_cntstack_cntshift_cntstring_cntsse_cntsystem_cntnop_cntother_cnt 330 | 331 | CONVERSION: 332 | ``` 333 | mem_write_cnt/instruction_cnt 334 | ... 335 | other_cnt/instruction_cnt 336 | ``` 337 | NOTE 338 | 339 | Note that simply adding the (n-1) last numbers won't necceseraly yield the first number. 340 | First of all, the memory read and write counts shouldn't be added to the total, because 341 | the x86 architecture is not a load/store architecture (e.g. an instruction can both read 342 | memory and be a floating-point instruction). 343 | Secondly, some instructions may fit in multiple categories, and therefore simply adding the 344 | counts for the various categories will cause instructions to be counted double. 345 | 346 | Also note that the (sum of) instruction_cnt value(s) will not match the instruction count 347 | printed at the last line of the output file ("number of instructions: "). This is because 348 | in the former, each iteration of a REP-prefixed instruction is counted, while in the latter 349 | a REP-prefixed instruction in only counted once. 350 | 351 | The other_cnt contains the number of instructions that did not fit in any of the other categories 352 | (excluding mem_read and mem_write). More details on which kind of instructions this includes can 353 | be found in the itypes_other_group_categories.txt output file. 354 | 355 | +++ ppm +++ 356 | 357 | FORMAT: 358 | 359 | instr_cntGAg_mispred_cnt_4bitsPAg_mispred_cnt_4bitsGAs_mispred_cnt_4bitsPAs_mispred_cnt_4bits...PAs_mispred_cnt_12bits 360 | 361 | CONVERSION: 362 | ``` 363 | GAg_mispred_cnt_Kbits/instr_cnt 364 | ... 365 | PAs_mispred_cnt_Kbits/instr_cnt 366 | ``` 367 | +++ reg +++ 368 | 369 | FORMAT: 370 | 371 | instr_cnttotal_oper_cntinstr_reg_cnttotal_reg_use_cnttotal_reg_agereg_age_cnt_1reg_age_cnt_2reg_age_cnt_4...reg_age_cnt_64 372 | 373 | CONVERSION: 374 | ``` 375 | total_oper_cnt/instr_cnt 376 | total_reg_use_cnt/instr_reg_cnt 377 | reg_age_cnt_1/total_reg_age 378 | reg_age_cnt_2/total_reg_age 379 | ... 380 | reg_age_cnt_64/total_reg_age 381 | ``` 382 | +++ stride +++ 383 | 384 | FORMAT: 385 | 386 | mem_read_cntmem_read_local_stride_0mem_read_local_stride_8...mem_read_local_stride_262144mem_read_global_stride_0...mem_read_global_stride_262144mem_write_cntmem_write_local_stride_0...mem_write_global_stride_262144 387 | 388 | CONVERSION: 389 | 390 | mem_read_local_stride_0/mem_read_cnt 391 | ... 392 | mem_read_global_stride_262144/mem_read_cnt 393 | mem_write_local_stride_0/mem_write_cnt 394 | ... 395 | mem_write_global_stride_262144/mem_write_cnt 396 | 397 | +++ memfootprint +++ 398 | 399 | Integer output (no conversion needed). 400 | 401 | FORMAT: 402 | 403 | num_64-byte_blocks_datanum_4KB_pages_datanum_64-byte_blocks_instrnum_4KB_pages_instr 404 | 405 | +++ memstackdist +++ 406 | 407 | FORMAT: 408 | 409 | mem_access_cntcold_ref_cntacc_cnt_0-2acc_cnt_2-2^2acc_cnt_2^2-2^3...acc_cnt_2^17-2^18acc_cnt_over_2^18 410 | 411 | CONVERSION: 412 | ``` 413 | cold_ref_cnt/mem_access_cnt 414 | acc_cnt_0/mem_access_cnt 415 | ... 416 | acc_cnt_2^18-2^19/mem_access_cnt 417 | acc_cnt_rest/mem_access_cnt 418 | ``` 419 | * Multi-process binaries 420 | ----------------------------------- 421 | 422 | If you want to use MICA on multiprocess binaries which call fork and execv, you should specify this entry in the MICA configuration file: 423 | ``` 424 | append_pid: yes 425 | ``` 426 | This will tell MICA to append the current process ID to the report file names so multiple processes do not overwrite each other's output. 427 | Additionally, you should pass "-follow_execv 1" parameter to pin in order to trace multiprocess applications. 428 | 429 | ------------------------------------------------------------------ 430 | # Complete list of Headers - Table Generation 431 | For ease of use, we provide tableGen.sh to automatically look for all mica instrumented output files beloging to a unique Pid. It generates a CSV file having the first row as the headers. Please refer to the headers in the script for the complete set of names. 432 | 433 | ------------------------------------------------------------------ 434 | # Examples of using MICA in the recent literature 435 | 436 | You can see an example of using MICA in building prediction models targetted to Compiler optimization problems here at [COBAYN's github page](https://github.com/amirjamez/COBAYN). There is also a provided dataset located at: 437 | ``` 438 | >>COBAYN/data/ft_MICA_cbench.csv 439 | ``` 440 | -------------------------------------------------------------------------------- /RELEASE_NOTES: -------------------------------------------------------------------------------- 1 | March 26th 2012 2 | --------------- 3 | 4 | MICA v0.40 5 | 6 | - contributions by Hamid Fadishei: 7 | * append_pid config entry added by Hamid Fadishei, mainly for multiprocess binaries 8 | * some warning messages resolved 9 | 10 | Aug. 29th 2011 11 | --------------- 12 | 13 | MICA v0.32 14 | 15 | - significant code cleanup by Petr Tuma; notes: 16 | * cleaned up some redundant NULL pointer casts 17 | * cleaned up names of LRU stack entry references 18 | * systematic testing of malloc return value in most of the tool code 19 | * added warning on presence of multiple threads 20 | * replaced sprintf with string streams 21 | * added branch prediction hints using __builtin_expect where appropriate 22 | - guard tracking progress in mica_progress.txt with a preprocessor flag (-DVERBOSE) 23 | - test MICA built using the Intel C++ compiler, and document how to build it using 24 | icpc in the README (TODO: benchmark the performance difference) 25 | 26 | February 28th 2011 27 | ------------------ 28 | 29 | MICA v0.31 30 | 31 | - improved config file parsing, i.e. remove dependency on order of entries 32 | 33 | - updated README file 34 | - describe some details regarding instruction mix 35 | 36 | February 27th 2011 37 | ------------------ 38 | 39 | MICA v0.3 40 | 41 | - increased flexibility of itypes analysis significantly 42 | - instruction groups used in itypes analysis can be specified by the user now, 43 | using a itypes.spec file; specify the filename in mica.conf using an entry like: 44 | itypes_spec_file: 45 | - by default, the old instruction groups are used 46 | (except for SYSCALL, which was added to the group formely known as 'other') 47 | - made block size in ilp, memfootprint and memreusedist flexible 48 | - size can be set by specifying 'block_size: ' in the mica.conf file 49 | - default block size is 2^6 (64) bytes, which is a change compared to MICA v0.23 for ilp 50 | - made page size in memfootprint flexible 51 | - size can be set by specifying 'page_size: ' in the mica.conf file 52 | - default page size is 4096 (2^12) bytes 53 | - possibly expensive assert statements and other sanity checks were removed 54 | - bug fixes: 55 | - memory read size wasn't being used 100% correctly 56 | in ilp, memfootprint, memreusedist and stride analysis, 57 | the size was being added to the start address of the read, 58 | while (size-1) should be added; otherwise, e.g. for memfootprint, 59 | we count an extra block being touched if the access is near a block boundary 60 | - a small problem with an assert statement was fixed in memfootprint (>= 0 instead of >) 61 | - fprintf statements were fixed for 64-bit systems 62 | 63 | 64 | September 22th 2009 65 | ------------------- 66 | 67 | MICA v0.23 68 | 69 | Several people have reported small problems when MICA is being used with 70 | recent Pin kits. This small patch release should resolve these issues. 71 | 72 | - bug fixes: 73 | * adjusted makefile and README according to Pin kit directory tree changes 74 | * adjust mica_itypes.cpp to recognize both NOP and WIDENOP categories 75 | 76 | 77 | June 13th 2008 78 | -------------- 79 | 80 | MICA v0.22 81 | 82 | - bug fixes in itypes: 83 | * fixed issue with instructions in MISC category being counted double (both in 'control flow' and 'other' buckets) 84 | (thanks to Ahmed S. Al-Zawawi for bringing my attention to this issue) 85 | * added NOP instructions category 86 | - adjusted README file to make meaning of different buckets in memreusedist more clear 87 | (thanks to Kshitij Sudan for reporting this) 88 | 89 | May 20th 2008 90 | -------------- 91 | 92 | MICA v0.21 93 | 94 | - removed -static from makefile, to avoid issues when linking MICA 95 | (thanks to J. K. Rai and Ahmed S. Al-Zawawi for reporting this) 96 | 97 | Dec. 3rd 2007 98 | -------------- 99 | 100 | MICA v0.2: 101 | 102 | - various bug fixes, including: 103 | * reg: include non-full-width registers 104 | * ilp: not all registers were included in analysis (stopped after first non-valid register) 105 | * stride: fixed faulty use of readIndex/writeIndex in readMem/writeMem 106 | * reset interval_ins_count for all characteristics 107 | - added features: 108 | * taking size of memory read/write into account 109 | * implementation of memreusedist characteristics, useful for characterizing cache behavior 110 | - adjusted: 111 | * memory footprint measured for 64-byte blocks instead of 32-byte blocks (because most modern processors have 64-byte cache blocks) 112 | * configuring MICA is done using a mica.conf configuration file instead of command line parameters 113 | - speed: 114 | * used InsertIfCall/InsertThenCall to make more analysis routines inlineable 115 | * buffering for ilp implementation, which yields roughly 10% speedup 116 | 117 | Sept. 29th 2007 118 | --------------- 119 | 120 | Initial release: MICA v0.1 121 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 7 | 8 | 9 | Kenneth Hoste @ ELIS (UGent) -- MICA 10 | 11 | 12 | 14 | 18 | 19 | 20 | 21 | 22 |

MICA: Microarchitecture-Independent Characterization of Applications

23 | 24 | 25 | 26 | 37 | 38 | 39 |

Ghent University, Belgium

27 |

Kenneth Hoste and Lieven Eeckhout

28 |
kenneth.hoste@ugent.be , leeckhou@elis.ugent.be
29 |

30 | [ what? ] :: 31 | [ news ] :: 32 | [ download ] :: 33 | [ how? ] :: 34 | [ publications ] :: 35 | [ links ] 36 |

40 | 41 | 42 | 43 |
44 | 45 | 46 |

What is MICA?

47 | 48 |

MICA is short for Microarchitecture-Independent 49 | Characterization of Applications.

50 | 51 |

MICA is a Pin 52 | tool which allows the user to collect a number of program characteristics to 53 | quantify runtime program behavior.

54 | 55 |

These program characteristics are totally independent of the 56 | microarchitecture (cache configuration, branch predictor, ...) on which the 57 | measurements are done, in contrast to other workload characterization 58 | techniques using simulation or hardware performance counters.

59 | 60 |
61 | 62 | 63 |

News

64 | 65 |

66 | (Feb. 27th 2011)
67 | I am no longer actively working in the field of computer architecture research.
68 | Nevertheless, I will try and support MICA in the coming years.
69 | If you notice that the last release of MICA isn't working for you, e.g. with the latest Pin kit, please contact me (kenneth.hoste@ugent.be)

70 | 71 |

March 26th 2012

72 |

Release of MICA v0.40

73 |
    74 |
  • contributions by Hamid Fadishei: 75 |
      76 |
    • add support for multi-process binaries, see append_pid entry for config file
    • 77 |
    • resolve some warning messages
    • 78 |
    79 |
  • 80 |
81 |

82 | 83 |

Aug. 29th 2011

84 |

Release of MICA v0.32

85 |
    86 |
  • significant code cleanup by Petr Tuma
    notes: 87 |
      88 |
    • cleaned up some redundant NULL pointer casts
    • 89 |
    • cleaned up names of LRU stack entry references
    • 90 |
    • systematic testing of malloc return value in most of the tool code
    • 91 |
    • added warning on presence of multiple threads
    • 92 |
    • replaced sprintf with string streams
    • 93 |
    • added branch prediction hints using __builtin_expect where appropriate
    • 94 |
    95 |
  • guard tracking progress in mica_progress.txt with a preprocessor flag (-DVERBOSE)
  • 96 |
  • test MICA built using the Intel C++ compiler, and document how to build it using 97 | icpc in the README (TODO: benchmark the performance difference)
  • 98 |
99 | 100 |

Feb. 28th 2011

101 |

Release of MICA v0.31

102 |
    103 |
  • improved config file parsing, i.e. remove dependency on order of entries
  • 104 |
  • updated README file 105 |
      106 |
    • describe some details regarding instruction mix
    • 107 |
    108 |
  • 109 |
110 | 111 | 112 |

Feb. 27th 2011

113 |

Release of MICA v0.3

114 |
    115 |
  • increased flexibility of itypes analysis significantly 116 |
      117 |
    • instruction groups used in itypes analysis can be specified by the user now, 118 | using a itypes.spec file; specify the filename in mica.conf using an entry like:
      119 | 'itypes_spec_file: <filename>'
    • 120 |
    • by default, the old instruction groups are used
      121 | (except for SYSCALL, which was added to the group formely known as 'other')
    • 122 |
    123 |
  • 124 |
  • made block size in ilp, memfootprint and memreusedist flexible 125 |
      126 |
    • size can be set by specifying 'block_size: <power of 2>' in the mica.conf file
    • 127 |
    • default block size is 2^6 (64) bytes, which is a change compared to MICA v0.23 for ilp
    • 128 |
    129 |
  • 130 |
  • made page size in memfootprint flexible 131 |
      132 |
    • size can be set by specifying 'page_size: <power of 2>' in the mica.conf file
    • 133 |
    • default page size is 4096 (2^12) bytes
    • 134 |
    135 |
  • 136 |
  • possibly expensive assert statements and other sanity checks were removed
  • 137 |
  • bug fixes: 138 |
      139 |
    • memory read size wasn't being used 100% correctly 140 | in ilp, memfootprint, memreusedist and stride analysis,
      141 | the size was being added to the start address of the read, 142 | while (size-1) should be added;
      otherwise, e.g. for memfootprint, 143 | we count an extra block being touched if the access is near a block boundary
    • 144 |
    • a small problem with an assert statement was fixed in memfootprint (>= 0 instead of >)
    • 145 |
    • fprintf statements were fixed for 64-bit systems
    • 146 |
    147 |
  • 148 |
149 | 150 |

Sep. 22th 2009

151 |

152 |

Release of MICA v0.23:

153 |
    154 |
  • bug fixes w.r.t. changes in Pin kit:
  • 155 |
      156 |
    • adjusted makefile and README to reflect changed Pin kit directory tree
    • 157 |
    • adjusted mica_itypes.cpp to recognize both NOP and WIDENOP categories
    • 158 |
    159 |
160 | 161 | 162 |

Jun. 13th 2008

163 |

164 |

Release of MICA v0.22, including:

165 |
    166 |
  • bug fixes in itypes:
  • 167 |
      168 | fixed issue with instructions in MISC category being counted double (both in 'control flow' and 'other' buckets) 169 | (thanks to Ahmed S. Al-Zawawi for bringing my attention to this issue) 170 |
    • added NOP instructions category
    • 171 |
    172 |
  • adjusted README file to make meaning of different buckets in memreusedist more clear 173 | (thanks to Kshitij Sudan for reporting this)
  • 174 |
175 |

176 | 177 |

May 20th 2008

178 |

179 |

Release of MICA v0.21, including:

180 |
  • removed -static from makefile, to avoid issues when linking MICA 181 | (thanks to J. K. Rai and Ahmed S. Al-Zawawi for reporting this)
182 |

183 | 184 |

Mar. 26th 2008

185 |

Fixed some faulty hyperlinks in the publication section, and added a new paper on phase-level workload characterization, to be presented at ISPASS-2008 (Austin (TX), April 2008). This is the first paper that actually uses MICA... 186 |

187 |

Dec. 3rd 2007

188 |

Release of MICA v0.2, which includes some important bug fixes and added features: 189 |

    190 |
  • bugs fixed: reg (not including non-full-width registers), ilp (stop including after first non-valid register), stride (faulty use of readIndex/writeIndex in readMem/writeMem), not resetting interval_ins_count for all characteristics
  • 191 |
  • added features: taking size of memory reads/writes into account in ilp, stride and memfootprint, added memreusedist characteristics (for characterizing cache behavior)
  • 192 |
  • adjustments: using 64-byte cache blocks for memfootprint (instead of 32-byte), configuring MICA is done using a mica.conf configuration file instead of using command line parameters
  • 193 |
  • speed: used InsertIfCall/InsertThenCall to make more analysis routines inlineable, used buffering for ilp, which leads to a ~10% speedup
  • 194 |
195 | The new release is available here. 196 |

197 | 198 |

Sept. 29th 2007

199 |

Official introduction of MICA at the Pin tutorial at IISWC-2007. Slides for the presentation are available here, full code examples used in the tutorial are available here (gzipped tarball here).

200 | 201 |

Sept. 26th 2007

202 |

A sneak preview of MICA was shown as part of an presentation I was giving at Intel Hudson (near Boston (MA), US). The Pin development team gave a lot of great feedback on how MICA could be improved and extended, and they were interested in using part of the code for their ongoing research.

203 | 204 |
205 | 206 | 207 |

Download

208 | 209 |

You can download MICA below. The Pin tool is released under a BSD license, which basically means "do what you want with it, just don't pretend it's yours".

210 |

If you are using MICA for a paper, please refer to the IEEE Micro article below for the microarchitecture-independent characterization methodology.

211 |

Download MICA v0.40

212 | 213 |
214 | 215 | 216 |

How do I ...?

217 | 218 |
    219 |
  • 220 |

    learn more about it?

    221 | 222 |

    A README-file containing information about using the tool and the outputs it produces is available in the release.

    223 |

    A good place to start is the IISWC-2007 Pin tutorial presentation, available here.

    224 |

    For further questions about the use and implementation of MICA, please mail Kenneth Hoste (kehoste@elis.ugent.be).

    225 |
  • 226 |
  • 227 |

    use it?

    228 |

    MICA is very easy to use, just like any other Pin tool. To analyze the /bin/ls program in Linux, measuring all available characteristics for the full run of the program, execute:

    229 |
    pin -t mica.so -- ls
    230 | using a MICA config file that contains: 231 |
    analysis_type: all
    232 | interval_size: full
    233 |
  • 234 |
  • 235 |

    get support for it?

    236 | 237 |

    For now, please contact Kenneth Hoste (kenneth.hoste@ugent.be) if you experience any problems using MICA. A MICA mailinglist will probably be set up some time soon.

    238 |
  • 239 |
  • 240 |

    contribute to it?

    241 |

    If you have improved MICA (fixed bugs, added features, ...), and want to contribute your efforts, please contact Kenneth Hoste (kenneth.hoste@ugent.be).

    242 |
  • 243 |
244 | 245 |
246 | 247 | 248 |

Related publications

249 | 250 |

Methodology:

251 | 252 | 259 | 260 |

Applications:

261 | 262 |
    263 |
  • 264 |

    Performance Prediction based on Inherent Program Similarity 265 |
    [abstract; paper: PDF, PS; presentation]

    266 | Aashish Phansalkar, 267 | Lieven Eeckhout, 268 | Andy Georges, 269 | Lizy K. John and 270 | Koen De Bosschere 271 |

    PACT-2006, Sept. 2006; Seattle, WA (US)

    272 |
  • 273 |
  • 274 |

    Comparing Benchmarks Using Key Microarchitecture-Independent Characteristics 275 |
    [abstract; paper: PDF; presentation ]

    276 | by 277 | Kenneth Hoste and 278 | Lieven Eeckhout 279 |

    IISWC2006, Oct. 2006; San Jose, CA (US)

    280 |
  • 281 |
  • 282 |

    Analyzing Commercial Processor Performance Numbers for Predicting Performance of Applications of Interest 283 |
    [abstract; paper: PDF; poster ]

    284 | by 285 | Kenneth Hoste, Lieven Eeckhout and 286 | Hendrik Blockeel 287 |

    SIGMETRICS'07, June 2007; San Diego, CA (US)

    288 |
  • 289 |
  • Characterizing the Unique and Diverse Behaviors in Existing and Emerging General-Purpose and Domain-Specific Benchmark Suites
    [PDF]

    290 | by Kenneth Hoste and Lieven Eeckhout 291 |

    ISPASS-2008, April. 2008; Austin, TX (US)

    292 |
  • 293 |
  • Scheduling on Heterogeneous Multicore Processors Using Architectural Signatures
    [PDF]

    294 | by Daniel Shelepov and Alexandra Fedorova (Simon Fraser University, Vancouver, Canada) 295 |

    WIOSCA-2008 (ISCA workshop), June 2008; Beijing, China

    296 |
  • 297 |
  • HASS: A Scheduler for Heterogeneous Multicore Systems
    [PDF]

    298 | by Daniel Shelepov, Juan Carlos Saez", Stacey Jeffery°, Alexandra Fedorova, 299 | Nestor Perez, Zhi Feng Huang, Sergey Blagodurov and Viren Kumar
    300 | (Simon Fraser University, Vancouver, Canada)
    301 | (° University of Waterloo, Ontario, Canada)
    302 | (" University of Madrid, Spain) 303 |

    Operating Systems Review, vol. 43, issue 2, pp. 66-75, April 2009
    304 | (Special Issue on the Interaction among the OS, Compilers and Multicore processors)

    305 |
  • 306 |
  • 307 |

    Analysis, Estimation and Optimization of Computer System Performance Using Machine Learning
    308 | [PDF]

    309 | Kenneth Hoste, PhD dissertation
    310 | Ghent University (Belgium), September 2010 311 |
  • 312 |
313 |
314 | 315 | 316 |

Links

317 | 318 | 323 | 324 | 325 | 326 | 327 | 328 | -------------------------------------------------------------------------------- /itypes_default.spec: -------------------------------------------------------------------------------- 1 | 0, 0, SPECIAL, mem_read 2 | 1, 0, SPECIAL, mem_write 3 | 2, 0, CATEGORY, COND_BR 4 | 2, 1, CATEGORY, UNCOND_BR 5 | 2, 2, OPCODE, LEAVE 6 | 2, 3, OPCODE, RET_NEAR 7 | 2, 4, OPCODE, CALL_NEAR 8 | 3, 0, CATEGORY, LOGICAL 9 | 3, 1, CATEGORY, DATAXFER 10 | 3, 2, CATEGORY, BINARY 11 | 3, 3, CATEGORY, FLAGOP 12 | 3, 4, CATEGORY, BITBYTE 13 | 4, 0, CATEGORY, X87_ALU 14 | 4, 1, CATEGORY, FCMOV 15 | 4, 2, CATEGORY, LOGICAL_FP 16 | 5, 0, CATEGORY, WIDENOP 17 | 5, 1, CATEGORY, NOP 18 | 6, 0, SPECIAL, reg_transfer 19 | -------------------------------------------------------------------------------- /itypes_default_available: -------------------------------------------------------------------------------- 1 | 0, 0, SPECIAL, mem_read 2 | 1, 0, SPECIAL, mem_write 3 | 2, 0, CATEGORY, COND_BR 4 | 2, 1, CATEGORY, UNCOND_BR 5 | 2, 2, OPCODE, LEAVE 6 | 2, 3, OPCODE, RET_NEAR 7 | 2, 4, OPCODE, CALL_NEAR 8 | 3, 0, CATEGORY, LOGICAL 9 | 3, 1, CATEGORY, DATAXFER 10 | 3, 2, CATEGORY, BINARY 11 | 3, 3, CATEGORY, FLAGOP 12 | 3, 4, CATEGORY, BITBYTE 13 | 4, 0, CATEGORY, X87_ALU 14 | 4, 1, CATEGORY, FCMOV 15 | 5, 0, CATEGORY, POP 16 | 5, 1, CATEGORY, PUSH 17 | 6, 0, CATEGORY, SHIFT 18 | 7, 0, CATEGORY, STRINGOP 19 | 8, 0, CATEGORY, MMX 20 | 8, 1, CATEGORY, SSE 21 | 9, 0, CATEGORY, INTERRUPT 22 | 9, 1, CATEGORY, ROTATE 23 | 9, 2, CATEGORY, SEMAPHORE 24 | 9, 3, CATEGORY, CMOV 25 | 9, 4, CATEGORY, SYSTEM 26 | 9, 5, CATEGORY, MISC 27 | 9, 6, CATEGORY, PREFETCH 28 | 9, 7, CATEGORY, SYSCALL 29 | 10, 0, CATEGORY, WIDENOP 30 | 10, 1, CATEGORY, NOP 31 | 11, 0, SPECIAL, reg_transfer 32 | -------------------------------------------------------------------------------- /mica.conf.example: -------------------------------------------------------------------------------- 1 | analysis_type: ilp_one 2 | interval_size: full 3 | ilp_size: 32 4 | page_size: 12 5 | block_size: 6 6 | itypes_spec_file: itypes_default.spec 7 | append_pid: yes -------------------------------------------------------------------------------- /mica.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of MICA, a Pin tool to collect 3 | * microarchitecture-independent program characteristics using the Pin 4 | * instrumentation framework. 5 | * 6 | * Please see the README.txt file distributed with the MICA release for more 7 | * information. 8 | */ 9 | 10 | /************************************************************************* 11 | * * 12 | * MICA: Microarchitecture-Independent Characterization of Workloads * 13 | * * 14 | ************************************************************************* 15 | * 16 | * implementation by Kenneth Hoste (Ghent University, Belgium), December 2006 - March 2011 17 | * based on code written by Lieven Eeckhout (for ATOM on Alpha) 18 | * 19 | * PLEASE DO NOT REDISTRIBUTE THIS CODE WITHOUT INFORMING THE AUTHORS. 20 | * 21 | * contact: kenneth.hoste@ugent.be , lieven.eeckhout@elis.ugent.be 22 | * 23 | */ 24 | 25 | /* MICA includes */ 26 | #include "mica.h" 27 | #include "mica_init.h" 28 | #include "mica_utils.h" 29 | 30 | #include "mica_all.h" 31 | #include "mica_ilp.h" 32 | #include "mica_itypes.h" 33 | #include "mica_ppm.h" 34 | #include "mica_reg.h" 35 | #include "mica_stride.h" 36 | #include "mica_memfootprint.h" 37 | #include "mica_memstackdist.h" 38 | 39 | #include 40 | #include 41 | #include 42 | #include 43 | using namespace std; 44 | 45 | /* *** Variables *** */ 46 | 47 | /* global */ 48 | INT64 interval_size; // interval size chosen 49 | INT64 interval_ins_count; 50 | INT64 interval_ins_count_for_hpc_alignment; 51 | INT64 total_ins_count; 52 | INT64 total_ins_count_for_hpc_alignment; 53 | 54 | ins_buffer_entry* ins_buffer[MAX_MEM_TABLE_ENTRIES]; 55 | 56 | /* ILP */ 57 | UINT32 _ilp_win_size; 58 | char* _itypes_spec_file; 59 | 60 | /* ILP, MEMFOOTPRINT, MEMSTACKDIST */ 61 | UINT32 _block_size; 62 | 63 | /* MEMFOOTPRINT */ 64 | UINT32 _page_size; 65 | 66 | /* for multiprocess binaries */ 67 | int append_pid; 68 | 69 | /* helper */ 70 | int thread_count = 0; 71 | 72 | /********************************************** 73 | * MAIN * 74 | **********************************************/ 75 | 76 | //FILE* _log; 77 | ofstream _log; 78 | 79 | 80 | /* append _pin.out to name if necessary */ 81 | const char *mkfilename(const char *name) 82 | { 83 | char retx[100]; 84 | if (append_pid){ 85 | sprintf(retx,"%s_%d_pin.out",name,getpid()); 86 | } 87 | else{ 88 | sprintf(retx,"%s_pin.out",name); 89 | } 90 | char * x = (char*)malloc(sizeof(const char)*100); 91 | strcpy(x,retx); 92 | return (const char*)x; 93 | } 94 | 95 | // find buffer entry for instruction at given address in a hash table 96 | ins_buffer_entry* findInsBufferEntry(ADDRINT a){ 97 | 98 | ins_buffer_entry* e; 99 | INT64 key = a % MAX_MEM_TABLE_ENTRIES; 100 | 101 | e = ins_buffer[key]; 102 | 103 | if(e != NULL){ 104 | do{ 105 | if(e->insAddr == a) 106 | break; 107 | e = e->next; 108 | } while(e->next != (ins_buffer_entry*)NULL); 109 | 110 | /* ins address not found, installing */ 111 | if(e == NULL){ 112 | e = (ins_buffer_entry*)checked_malloc(sizeof(ins_buffer_entry)); 113 | e->insAddr = a; 114 | e->regReadCnt = 0; 115 | e->regsRead = NULL; 116 | e->regWriteCnt = 0; 117 | e->regsWritten = NULL; 118 | e->next = NULL; 119 | e->setRead = false; 120 | e->setWritten = false; 121 | e->setRegOpCnt = false; 122 | 123 | ins_buffer_entry* tmp = e = ins_buffer[key]; 124 | while(tmp->next != (ins_buffer_entry*)NULL) 125 | tmp = tmp->next; 126 | tmp->next = e; 127 | } 128 | } 129 | else{ 130 | /* new entry in hash table */ 131 | e = (ins_buffer_entry*)checked_malloc(sizeof(ins_buffer_entry)); 132 | e->insAddr = a; 133 | e->regOpCnt = 0; 134 | e->regReadCnt = 0; 135 | e->regsRead = NULL; 136 | e->regWriteCnt = 0; 137 | e->regsWritten = NULL; 138 | e->next = NULL; 139 | e->setRead = false; 140 | e->setWritten = false; 141 | } 142 | 143 | return e; 144 | } 145 | 146 | /* ALL */ 147 | VOID Instruction_all(INS ins, VOID* v){ 148 | if(interval_size == -1) { 149 | if(INS_HasRealRep(ins)){ 150 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); 151 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); 152 | } 153 | else{ 154 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_no_rep, IARG_END); 155 | } 156 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_always, IARG_END); 157 | } 158 | else{ 159 | if(INS_HasRealRep(ins)){ 160 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); 161 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); 162 | } 163 | else{ 164 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_no_rep, IARG_END); 165 | } 166 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_always, IARG_END); 167 | } 168 | 169 | ADDRINT insAddr = INS_Address(ins); 170 | ins_buffer_entry* e = findInsBufferEntry(insAddr); 171 | 172 | //instrument_ilp_all(ins, e); 173 | //instrument_itypes(ins, v); 174 | //instrument_ppm(ins, v); 175 | //instrument_reg(ins, e); 176 | //instrument_stride(ins, v); 177 | //instrument_memfootprint(ins, v); 178 | //instrument_memstackdist(ins, v); 179 | instrument_all(ins, v, e); 180 | } 181 | 182 | VOID Fini_all(INT32 code, VOID* v){ 183 | fini_ilp_all(code, v); 184 | fini_itypes(code, v); 185 | fini_ppm(code, v); 186 | fini_reg(code, v); 187 | fini_stride(code, v); 188 | fini_memfootprint(code, v); 189 | fini_memstackdist(code, v); 190 | } 191 | 192 | /* ILP */ 193 | VOID Instruction_ilp_all_only(INS ins, VOID* v){ 194 | if(interval_size == -1){ 195 | if(INS_HasRealRep(ins)){ 196 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); 197 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); 198 | } 199 | else{ 200 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_no_rep, IARG_END); 201 | } 202 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_always, IARG_END); 203 | } 204 | else{ 205 | if(INS_HasRealRep(ins)){ 206 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); 207 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); 208 | } 209 | else{ 210 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_no_rep, IARG_END); 211 | } 212 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_always, IARG_END); 213 | } 214 | 215 | ADDRINT insAddr = INS_Address(ins); 216 | 217 | ins_buffer_entry* e = findInsBufferEntry(insAddr); 218 | instrument_ilp_all(ins, e); 219 | } 220 | 221 | VOID Fini_ilp_all_only(INT32 code, VOID* v){ 222 | fini_ilp_all(code, v); 223 | } 224 | 225 | /* ILP_ONE */ 226 | VOID Instruction_ilp_one_only(INS ins, VOID* v){ 227 | if(interval_size == -1){ 228 | if(INS_HasRealRep(ins)){ 229 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); 230 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); 231 | } 232 | else{ 233 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_no_rep, IARG_END); 234 | } 235 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_always, IARG_END); 236 | } 237 | else{ 238 | if(INS_HasRealRep(ins)){ 239 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); 240 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); 241 | } 242 | else{ 243 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_no_rep, IARG_END); 244 | } 245 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_always, IARG_END); 246 | } 247 | 248 | ADDRINT insAddr = INS_Address(ins); 249 | 250 | ins_buffer_entry* e = findInsBufferEntry(insAddr); 251 | instrument_ilp_one(ins, e); 252 | } 253 | 254 | VOID Fini_ilp_one_only(INT32 code, VOID* v){ 255 | fini_ilp_one(code, v); 256 | } 257 | 258 | /* ITYPES */ 259 | VOID Instruction_itypes_only(INS ins, VOID* v){ 260 | if(interval_size == -1){ 261 | if(INS_HasRealRep(ins)){ 262 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); 263 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); 264 | } 265 | else{ 266 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_no_rep, IARG_END); 267 | } 268 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_always, IARG_END); 269 | } 270 | else{ 271 | if(INS_HasRealRep(ins)){ 272 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); 273 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); 274 | } 275 | else{ 276 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_no_rep, IARG_END); 277 | } 278 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_always, IARG_END); 279 | } 280 | 281 | instrument_itypes(ins, v); 282 | } 283 | 284 | VOID Fini_itypes_only(INT32 code, VOID* v){ 285 | fini_itypes(code, v); 286 | } 287 | 288 | /* PPM */ 289 | VOID Instruction_ppm_only(INS ins, VOID* v){ 290 | if(interval_size == -1){ 291 | if(INS_HasRealRep(ins)){ 292 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); 293 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); 294 | } 295 | else{ 296 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_no_rep, IARG_END); 297 | } 298 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_always, IARG_END); 299 | } 300 | else{ 301 | if(INS_HasRealRep(ins)){ 302 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); 303 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); 304 | } 305 | else{ 306 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_no_rep, IARG_END); 307 | } 308 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_always, IARG_END); 309 | } 310 | 311 | instrument_ppm(ins, v); 312 | } 313 | 314 | VOID Fini_ppm_only(INT32 code, VOID* v){ 315 | fini_ppm(code, v); 316 | } 317 | 318 | /* REG */ 319 | VOID Instruction_reg_only(INS ins, VOID* v){ 320 | if(interval_size == -1){ 321 | if(INS_HasRealRep(ins)){ 322 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); 323 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); 324 | } 325 | else{ 326 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_no_rep, IARG_END); 327 | } 328 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_always, IARG_END); 329 | } 330 | else{ 331 | if(INS_HasRealRep(ins)){ 332 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); 333 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); 334 | } 335 | else{ 336 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_no_rep, IARG_END); 337 | } 338 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_always, IARG_END); 339 | } 340 | 341 | ADDRINT insAddr = INS_Address(ins); 342 | 343 | ins_buffer_entry* e = findInsBufferEntry(insAddr); 344 | 345 | instrument_reg(ins, e); 346 | } 347 | 348 | VOID Fini_reg_only(INT32 code, VOID* v){ 349 | fini_reg(code, v); 350 | } 351 | 352 | /* STRIDE */ 353 | VOID Instruction_stride_only(INS ins, VOID* v){ 354 | if(interval_size == -1){ 355 | if(INS_HasRealRep(ins)){ 356 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); 357 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); 358 | } 359 | else{ 360 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_no_rep, IARG_END); 361 | } 362 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_always, IARG_END); 363 | } 364 | else{ 365 | if(INS_HasRealRep(ins)){ 366 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); 367 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); 368 | } 369 | else{ 370 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_no_rep, IARG_END); 371 | } 372 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_always, IARG_END); 373 | } 374 | 375 | instrument_stride(ins, v); 376 | } 377 | 378 | VOID Fini_stride_only(INT32 code, VOID* v){ 379 | fini_stride(code, v); 380 | } 381 | 382 | /* MEMFOOTPRINT */ 383 | VOID Instruction_memfootprint_only(INS ins, VOID* v){ 384 | if(interval_size == -1){ 385 | if(INS_HasRealRep(ins)){ 386 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); 387 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); 388 | } 389 | else{ 390 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_no_rep, IARG_END); 391 | } 392 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_always, IARG_END); 393 | } 394 | else{ 395 | if(INS_HasRealRep(ins)){ 396 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); 397 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); 398 | } 399 | else{ 400 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_no_rep, IARG_END); 401 | } 402 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_always, IARG_END); 403 | } 404 | 405 | instrument_memfootprint(ins, v); 406 | } 407 | 408 | VOID Fini_memfootprint_only(INT32 code, VOID* v){ 409 | fini_memfootprint(code, v); 410 | } 411 | 412 | /* MEMSTACKDIST */ 413 | VOID Instruction_memstackdist_only(INS ins, VOID* v){ 414 | if(interval_size == -1){ 415 | if(INS_HasRealRep(ins)){ 416 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); 417 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); 418 | } 419 | else{ 420 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_no_rep, IARG_END); 421 | } 422 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_always, IARG_END); 423 | } 424 | else{ 425 | if(INS_HasRealRep(ins)){ 426 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); 427 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); 428 | } 429 | else{ 430 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_no_rep, IARG_END); 431 | } 432 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_always, IARG_END); 433 | } 434 | 435 | instrument_memstackdist(ins, v); 436 | } 437 | 438 | VOID Fini_memstackdist_only(INT32 code, VOID* v){ 439 | fini_memstackdist(code, v); 440 | } 441 | 442 | /* MY TYPE */ 443 | VOID Instruction_custom(INS ins, VOID* v){ 444 | 445 | if(interval_size == -1){ 446 | if(INS_HasRealRep(ins)){ 447 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); 448 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); 449 | } 450 | else{ 451 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_for_hpc_alignment_no_rep, IARG_END); 452 | } 453 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full_count_always, IARG_END); 454 | } 455 | else{ 456 | if(INS_HasRealRep(ins)){ 457 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)returnArg, IARG_FIRST_REP_ITERATION, IARG_END); 458 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_with_rep, IARG_REG_VALUE, INS_RepCountRegister(ins), IARG_END); 459 | } 460 | else{ 461 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_for_hpc_alignment_no_rep, IARG_END); 462 | } 463 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals_count_always, IARG_END); 464 | } 465 | 466 | cerr << "Please choose a subset of characteristics you want to use, and remove this message (along with the exit call)" << endl; 467 | exit(1); 468 | // Choose subset of characteristics, and make the same adjustments in Fini_custom and init_custom below 469 | 470 | //ADDRINT insAddr = INS_Address(ins); 471 | //ins_buffer_entry* e = findInsBufferEntry(insAddr); 472 | 473 | //instrument_ilp_all(ins, e); 474 | //instrument_ilp_one(ins, e); 475 | //instrument_itypes(ins, v); 476 | //instrument_ppm(ins, v); 477 | //instrument_reg(ins, e); 478 | //instrument_stride(ins, v); 479 | //instrument_memfootprint(ins, v); 480 | //instrument_memstackdist(ins, v); 481 | } 482 | 483 | VOID Fini_custom(INT32 code, VOID* v){ 484 | //fini_ilp_all(code, v); 485 | //fini_ilp_one(code, v); 486 | //fini_itypes(code, v); 487 | //fini_ppm(code, v); 488 | //fini_reg(code, v); 489 | //fini_stride(code, v); 490 | //fini_memfootprint(code, v); 491 | //fini_memstackdist(code, v); 492 | } 493 | 494 | void init_custom(){ 495 | //init_ilp_all(); 496 | //init_ilp_one(); 497 | //init_itypes(); 498 | //init_ppm(); 499 | //init_reg(); 500 | //init_stride(); 501 | //init_memfootprint(); 502 | //init_memstackdist(); 503 | } 504 | 505 | 506 | VOID ThreadStart(THREADID id, CONTEXT *context, INT32 flags, VOID *data) 507 | { 508 | if (__sync_fetch_and_add(&thread_count, 1)) 509 | { 510 | LOG_MSG("WARNING: Thread creation detected, results can be corrupted!\n"); 511 | WARNING_MSG("Thread creation detected, results can be corrupted!"); 512 | } 513 | } 514 | 515 | 516 | /************ 517 | * MAIN * 518 | ************/ 519 | int main(int argc, char* argv[]){ 520 | 521 | int i; 522 | MODE mode; 523 | 524 | setup_mica_log(&_log); 525 | 526 | read_config(&_log, &interval_size, &mode, &_ilp_win_size, &_block_size, &_page_size, &_itypes_spec_file, &append_pid); 527 | 528 | cerr << "interval_size: " << interval_size << ", mode: " << mode << endl; 529 | 530 | interval_ins_count = 0; 531 | interval_ins_count_for_hpc_alignment = 0; 532 | total_ins_count = 0; 533 | total_ins_count_for_hpc_alignment = 0; 534 | 535 | for(i=0; i < MAX_MEM_TABLE_ENTRIES; i++){ 536 | ins_buffer[i] = (ins_buffer_entry*)NULL; 537 | } 538 | 539 | switch(mode){ 540 | case MODE_ALL: 541 | init_all(); 542 | PIN_Init(argc, argv); 543 | INS_AddInstrumentFunction(Instruction_all, 0); 544 | PIN_AddFiniFunction(Fini_all, 0); 545 | break; 546 | case MODE_ILP: 547 | init_ilp_all(); 548 | PIN_Init(argc, argv); 549 | INS_AddInstrumentFunction(Instruction_ilp_all_only, 0); 550 | PIN_AddFiniFunction(Fini_ilp_all_only, 0); 551 | break; 552 | case MODE_ILP_ONE: 553 | init_ilp_one(); 554 | PIN_Init(argc, argv); 555 | INS_AddInstrumentFunction(Instruction_ilp_one_only, 0); 556 | PIN_AddFiniFunction(Fini_ilp_one_only, 0); 557 | break; 558 | case MODE_ITYPES: 559 | init_itypes(); 560 | PIN_Init(argc, argv); 561 | INS_AddInstrumentFunction(Instruction_itypes_only, 0); 562 | PIN_AddFiniFunction(Fini_itypes_only, 0); 563 | break; 564 | case MODE_PPM: 565 | init_ppm(); 566 | PIN_Init(argc, argv); 567 | INS_AddInstrumentFunction(Instruction_ppm_only, 0); 568 | PIN_AddFiniFunction(Fini_ppm_only, 0); 569 | break; 570 | case MODE_REG: 571 | init_reg(); 572 | PIN_Init(argc, argv); 573 | INS_AddInstrumentFunction(Instruction_reg_only, 0); 574 | PIN_AddFiniFunction(Fini_reg_only, 0); 575 | break; 576 | case MODE_STRIDE: 577 | init_stride(); 578 | PIN_Init(argc, argv); 579 | INS_AddInstrumentFunction(Instruction_stride_only, 0); 580 | PIN_AddFiniFunction(Fini_stride_only, 0); 581 | break; 582 | case MODE_MEMFOOTPRINT: 583 | init_memfootprint(); 584 | PIN_Init(argc, argv); 585 | INS_AddInstrumentFunction(Instruction_memfootprint_only, 0); 586 | PIN_AddFiniFunction(Fini_memfootprint_only, 0); 587 | break; 588 | case MODE_MEMSTACKDIST: 589 | init_memstackdist(); 590 | PIN_Init(argc, argv); 591 | INS_AddInstrumentFunction(Instruction_memstackdist_only, 0); 592 | PIN_AddFiniFunction(Fini_memstackdist_only, 0); 593 | break; 594 | case MODE_CUSTOM: 595 | init_custom(); 596 | PIN_Init(argc, argv); 597 | INS_AddInstrumentFunction(Instruction_custom, 0); 598 | PIN_AddFiniFunction(Fini_custom, 0); 599 | break; 600 | default: 601 | cerr << "FATAL ERROR: Unknown mode while trying to allocate memory for Pin tool!" << endl; 602 | _log << "FATAL ERROR: Unknown mode while trying to allocate memory for Pin tool!" << endl; 603 | exit(1); 604 | } 605 | 606 | // The tool does not handle multithreaded programs. 607 | // Since results might be bogus, we print a warning 608 | // when presence of multiple threads is detected by PIN. 609 | PIN_AddThreadStartFunction(ThreadStart, NULL); 610 | 611 | // starts program, never returns 612 | PIN_StartProgram(); 613 | } 614 | -------------------------------------------------------------------------------- /mica.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of MICA, a Pin tool to collect 3 | * microarchitecture-independent program characteristics using the Pin 4 | * instrumentation framework. 5 | * 6 | * Please see the README.txt file distributed with the MICA release for more 7 | * information. 8 | */ 9 | 10 | /* standard library includes */ 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | #include 17 | using namespace std; 18 | 19 | /* Pin includes */ 20 | #include "pin.H" 21 | 22 | 23 | #ifndef MICA 24 | #define MICA 25 | 26 | /* *** global configurations *** */ 27 | extern int append_pid; 28 | 29 | /* *** conditional debugging *** */ 30 | 31 | #define LOG_MSG(x) _log << x << endl; 32 | #define DEBUG_MSG(x) cerr << "DEBUG: " << x << endl; 33 | 34 | #define WARNING_MSG(x) cerr << "WARNING: " << x << endl; 35 | #define ERROR_MSG(x) cerr << "ERROR: " << x << endl; 36 | 37 | 38 | /* *** utility macros *** */ 39 | 40 | #define BITS_TO_MASK(x) ((1ull << (x)) - 1ull) 41 | #define BITS_TO_COUNT(x) (1ull << (x)) 42 | 43 | 44 | /* *** defines *** */ 45 | 46 | #define CHAR_CNT 69 47 | 48 | /* ILP/MEMFOOTPRINT */ 49 | 50 | #define ILP_WIN_SIZE_BASE 32 51 | 52 | // number of stack entries in single hash table item 53 | #define LOG_MAX_MEM_ENTRIES 16 54 | #define MAX_MEM_ENTRIES BITS_TO_COUNT(LOG_MAX_MEM_ENTRIES) 55 | #define MASK_MAX_MEM_ENTRIES BITS_TO_MASK(LOG_MAX_MEM_ENTRIES) 56 | 57 | #define LOG_MAX_MEM_BLOCK LOG_MAX_MEM_ENTRIES 58 | #define MAX_MEM_BLOCK MAX_MEM_ENTRIES 59 | 60 | #define MAX_MEM_BLOCK_ENTRIES 65536 61 | #define MAX_MEM_TABLE_ENTRIES 12289 // hash table size, should be a prime number (769, 1543, 3079, 6151, 12289, 24593, 49157, 98317, 196613, 393241, 786433) 62 | 63 | /* PPM */ 64 | #define MAX_HIST_LENGTH 12 65 | #define NUM_HIST_LENGTHS 3 66 | const UINT32 history_lengths[NUM_HIST_LENGTHS] = {4,8,12}; 67 | 68 | /* REG */ 69 | #define MAX_NUM_REGS 4096 70 | #define MAX_NUM_OPER 7 71 | #define MAX_DIST 128 72 | #define MAX_COMM_DIST MAX_DIST 73 | #define MAX_REG_USE MAX_DIST 74 | 75 | /* STRIDE */ 76 | #define MAX_DISTR 524288 // 2^21 77 | 78 | /* MEMREUSEDIST */ 79 | 80 | #define BUCKET_CNT 19 // number of reuse distance buckets to use 81 | 82 | const char *mkfilename(const char *name); 83 | 84 | #endif 85 | -------------------------------------------------------------------------------- /mica_all.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of MICA, a Pin tool to collect 3 | * microarchitecture-independent program characteristics using the Pin 4 | * instrumentation framework. 5 | * 6 | * Please see the README.txt file distributed with the MICA release for more 7 | * information. 8 | */ 9 | 10 | /* MICA includes */ 11 | #include "mica_all.h" 12 | #include "mica_ilp.h" // needed for empty_all_buffer_all 13 | #include "mica_itypes.h" // needed for itypes_count , itypes_instr_interval_output and itypes_instr_interval_reset 14 | #include "mica_ppm.h" // needed for instrument_ppm_cond_br, ppm_instr_interval_output and ppm_instr_interval_reset 15 | #include "mica_reg.h" // needed for reg_instr_full, reg_instr_intervals, reg_instr_interval_output and reg_instr_interval_reset 16 | #include "mica_stride.h" // needed for stride_index_mem*, readMem_stride, writeMem_stride, stride_instr_interval_output and stride_instr_interval_reset 17 | #include "mica_memfootprint.h" // needed for memOp, memfootprint_instr_interval_output and memfootprint_instr_interval_reset 18 | #include "mica_memstackdist.h" // needed for memstackdist_memRead, memstackdist_instr_interval_output and memstackdist_instr_interval_reset 19 | 20 | #define PROGRESS_THRESHOLD 10000000 // 10M 21 | 22 | extern INT64 total_ins_count; 23 | extern INT64 total_ins_count_for_hpc_alignment; 24 | extern INT64 interval_ins_count; 25 | extern INT64 interval_ins_count_for_hpc_alignment; // one count for REP prefixed instructions 26 | 27 | extern INT64 interval_size; 28 | 29 | extern identifier** group_identifiers; 30 | extern INT64* group_ids_cnt; 31 | extern INT64* group_counts; 32 | extern INT64 number_of_groups; 33 | 34 | extern INT64 other_ids_cnt; 35 | extern INT64 other_ids_max_cnt; 36 | extern identifier* other_group_identifiers; 37 | 38 | void init_all(){ 39 | 40 | init_ilp_all(); 41 | init_itypes(); 42 | init_ppm(); 43 | init_reg(); 44 | init_stride(); 45 | init_memfootprint(); 46 | init_memstackdist(); 47 | } 48 | 49 | ADDRINT returnArg(BOOL arg){ 50 | 51 | return arg; 52 | } 53 | 54 | VOID all_instr_full_count_always(){ 55 | 56 | total_ins_count++; 57 | 58 | #ifdef VERBOSE 59 | if (__builtin_expect (total_ins_count % PROGRESS_THRESHOLD == 0, false)) { 60 | ofstream progress_file; 61 | progress_file.open ("mica_progress.txt", ios::out | ios::trunc); 62 | progress_file << total_ins_count << " instructions analyzed" << endl; 63 | progress_file.close (); 64 | } 65 | #endif 66 | } 67 | 68 | VOID all_instr_full_count_for_hpc_alignment_no_rep(){ 69 | total_ins_count_for_hpc_alignment++; 70 | } 71 | 72 | VOID all_instr_full_count_for_hpc_alignment_with_rep(UINT32 repCnt){ 73 | if(repCnt > 0){ 74 | total_ins_count_for_hpc_alignment++; 75 | } 76 | } 77 | 78 | VOID all_instr_intervals_count_always(){ 79 | total_ins_count++; 80 | interval_ins_count++; 81 | 82 | #ifdef VERBOSE 83 | if (__builtin_expect(total_ins_count % PROGRESS_THRESHOLD == 0, false)) { 84 | ofstream progress_file; 85 | progress_file.open("mica_progress.txt", ios::out | ios::trunc); 86 | progress_file << total_ins_count << " instructions analyzed" << endl; 87 | progress_file.close(); 88 | } 89 | #endif 90 | } 91 | 92 | VOID all_instr_intervals_count_for_hpc_alignment_no_rep(){ 93 | total_ins_count_for_hpc_alignment++; 94 | interval_ins_count_for_hpc_alignment++; 95 | } 96 | 97 | VOID all_instr_intervals_count_for_hpc_alignment_with_rep(UINT32 repCnt){ 98 | if(repCnt > 0){ 99 | total_ins_count_for_hpc_alignment++; 100 | interval_ins_count_for_hpc_alignment++; 101 | } 102 | } 103 | 104 | ADDRINT all_buffer_instruction_2reads_write(void* _e, ADDRINT read1_addr, ADDRINT read2_addr, ADDRINT read_size, UINT32 stride_index_memread1, UINT32 stride_index_memread2, ADDRINT write_addr, ADDRINT write_size, UINT32 stride_index_memwrite){ 105 | 106 | //itypes_count_mem_read(); 107 | //itypes_count_mem_write(); 108 | readMem_stride(stride_index_memread1, read1_addr, read_size); 109 | readMem_stride(stride_index_memread2, read2_addr, read_size); 110 | writeMem_stride(stride_index_memwrite, write_addr, write_size); 111 | memOp(read1_addr, read_size); // memfootprint 112 | memOp(read2_addr, read_size); 113 | memOp(write_addr, write_size); 114 | memstackdist_memRead(read1_addr, read_size); // memstackdist 115 | memstackdist_memRead(read2_addr, read_size); 116 | //return ilp_buffer_instruction_2reads_write(_e, read1_addr, read2_addr, read_size, write_addr, write_size); 117 | ilp_buffer_instruction_only(_e); 118 | ilp_buffer_instruction_read(read1_addr, read_size); 119 | ilp_buffer_instruction_read2(read2_addr); 120 | ilp_buffer_instruction_write(write_addr, write_size); 121 | return ilp_buffer_instruction_next(); 122 | } 123 | 124 | ADDRINT all_buffer_instruction_read_write(void* _e, ADDRINT read1_addr, ADDRINT read_size, UINT32 stride_index_memread1, ADDRINT write_addr, ADDRINT write_size, UINT32 stride_index_memwrite){ 125 | 126 | //itypes_count_mem_read(); 127 | //itypes_count_mem_write(); 128 | readMem_stride(stride_index_memread1, read1_addr, read_size); 129 | writeMem_stride(stride_index_memwrite, write_addr, write_size); 130 | memOp(read1_addr, read_size); // memfootprint 131 | memOp(write_addr, write_size); 132 | memstackdist_memRead(read1_addr, read_size); // memstackdist 133 | //return ilp_buffer_instruction_read_write(_e, read1_addr, read_size, write_addr, write_size); 134 | ilp_buffer_instruction_only(_e); 135 | ilp_buffer_instruction_read(read1_addr, read_size); 136 | ilp_buffer_instruction_write(write_addr, write_size); 137 | return ilp_buffer_instruction_next(); 138 | } 139 | 140 | ADDRINT all_buffer_instruction_2reads(void* _e, ADDRINT read1_addr, ADDRINT read2_addr, ADDRINT read_size, UINT32 stride_index_memread1, UINT32 stride_index_memread2){ 141 | 142 | //itypes_count_mem_read(); 143 | readMem_stride(stride_index_memread1, read1_addr, read_size); 144 | readMem_stride(stride_index_memread2, read2_addr, read_size); 145 | memOp(read1_addr, read_size); // memfootprint 146 | memOp(read2_addr, read_size); 147 | memstackdist_memRead(read1_addr, read_size); // memstackdist 148 | memstackdist_memRead(read2_addr, read_size); 149 | //return ilp_buffer_instruction_2reads(_e, read1_addr, read2_addr, read_size); 150 | ilp_buffer_instruction_only(_e); 151 | ilp_buffer_instruction_read(read1_addr, read_size); 152 | ilp_buffer_instruction_read2(read2_addr); 153 | return ilp_buffer_instruction_next(); 154 | } 155 | 156 | ADDRINT all_buffer_instruction_read(void* _e, ADDRINT read1_addr, ADDRINT read_size, UINT32 stride_index_memread1){ 157 | 158 | //itypes_count_mem_read(); 159 | readMem_stride(stride_index_memread1, read1_addr, read_size); 160 | memOp(read1_addr, read_size); // memfootprint 161 | memstackdist_memRead(read1_addr, read_size); // memstackdist 162 | //return ilp_buffer_instruction_read(_e, read1_addr, read_size); 163 | ilp_buffer_instruction_only(_e); 164 | ilp_buffer_instruction_read(read1_addr, read_size); 165 | return ilp_buffer_instruction_next(); 166 | } 167 | 168 | ADDRINT all_buffer_instruction_write(void* _e, ADDRINT write_addr, ADDRINT write_size, UINT32 stride_index_memwrite){ 169 | 170 | //itypes_count_mem_write(); 171 | writeMem_stride(stride_index_memwrite, write_addr, write_size); 172 | memOp(write_addr, write_size); // memfootprint 173 | //return ilp_buffer_instruction_write(_e, write_addr, write_size); 174 | ilp_buffer_instruction_only(_e); 175 | ilp_buffer_instruction_write(write_addr, write_size); 176 | return ilp_buffer_instruction_next(); 177 | } 178 | 179 | ADDRINT all_buffer_instruction(void* _e){ 180 | 181 | //return ilp_buffer_instruction(_e); 182 | ilp_buffer_instruction_only(_e); 183 | return ilp_buffer_instruction_next(); 184 | } 185 | 186 | VOID all_instr_full(VOID* _e, ADDRINT instrAddr, ADDRINT size){ 187 | reg_instr_full(_e); 188 | instrMem(instrAddr, size); 189 | } 190 | 191 | ADDRINT all_instr_intervals(VOID* _e, ADDRINT instrAddr, ADDRINT size){ 192 | reg_instr_intervals(_e); 193 | instrMem(instrAddr, size); 194 | return (ADDRINT)(interval_ins_count_for_hpc_alignment == interval_size); 195 | }; 196 | 197 | VOID all_instr_interval(){ 198 | 199 | /* output per interval for ILP is done by ilp-buffering functions */ 200 | 201 | itypes_instr_interval_output(); 202 | itypes_instr_interval_reset(); 203 | 204 | ppm_instr_interval_output(); 205 | ppm_instr_interval_reset(); 206 | 207 | reg_instr_interval_output(); 208 | reg_instr_interval_reset(); 209 | 210 | stride_instr_interval_output(); 211 | stride_instr_interval_reset(); 212 | 213 | memfootprint_instr_interval_output(); 214 | memfootprint_instr_interval_reset(); 215 | 216 | memstackdist_instr_interval_output(); 217 | memstackdist_instr_interval_reset(); 218 | 219 | interval_ins_count = 0; 220 | interval_ins_count_for_hpc_alignment = 0; 221 | } 222 | 223 | VOID all_instr_interval_for_ilp(){ 224 | 225 | // save these, because empty_ilp_buffer_all resets them 226 | INT64 interval_ins_count_backup = interval_ins_count; 227 | INT64 interval_ins_count_for_hpc_alignment_backup = interval_ins_count_for_hpc_alignment; 228 | 229 | empty_ilp_buffer_all(); 230 | 231 | // restore 232 | interval_ins_count = interval_ins_count_backup; 233 | interval_ins_count_for_hpc_alignment = interval_ins_count_for_hpc_alignment_backup; 234 | } 235 | 236 | VOID instrument_all(INS ins, VOID* v, ins_buffer_entry* e){ 237 | 238 | UINT32 i, j, maxNumRegsProd, maxNumRegsCons, regReadCnt, regWriteCnt, opCnt, regOpCnt; 239 | REG reg; 240 | BOOL categorized = false; 241 | char cat[50]; 242 | char opcode[50]; 243 | 244 | UINT32 stride_index_memread1; 245 | UINT32 stride_index_memread2; 246 | UINT32 stride_index_memwrite; 247 | 248 | /* fetch cateogry and opcode for this instruction */ 249 | strcpy(cat,CATEGORY_StringShort(INS_Category(ins)).c_str()); 250 | strcpy(opcode,INS_Mnemonic(ins).c_str()); 251 | 252 | // buffer register reads per static instruction 253 | if(!e->setRead){ 254 | 255 | 256 | // register reads and memory reads determine the issue time 257 | maxNumRegsCons = INS_MaxNumRRegs(ins); 258 | 259 | regReadCnt = 0; 260 | for(i=0; i < maxNumRegsCons; i++){ 261 | reg = INS_RegR(ins, i); 262 | //assert((UINT32)reg < MAX_NUM_REGS); 263 | // only consider valid general-purpose registers (any bit-width) and floating-point registers, 264 | // i.e. exlude branch, segment and pin registers, among others 265 | if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){ 266 | regReadCnt++; 267 | } 268 | } 269 | 270 | e->regReadCnt = regReadCnt; 271 | e->regsRead = (REG*)checked_malloc(regReadCnt*sizeof(REG)); 272 | 273 | regReadCnt = 0; 274 | for(i=0; i < maxNumRegsCons; i++){ 275 | 276 | reg = INS_RegR(ins, i); 277 | 278 | //assert((UINT32)reg < MAX_NUM_REGS); 279 | // only consider valid general-purpose registers (any bit-width) and floating-point registers, 280 | // i.e. exlude branch, segment and pin registers, among others 281 | if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){ 282 | e->regsRead[regReadCnt++] = reg; 283 | } 284 | } 285 | 286 | e->setRead = true; 287 | 288 | } 289 | 290 | // buffer register writes per static instruction 291 | if(!e->setWritten){ 292 | maxNumRegsProd = INS_MaxNumWRegs(ins); 293 | 294 | regWriteCnt = 0; 295 | for(i=0; i < maxNumRegsProd; i++){ 296 | 297 | reg = INS_RegW(ins, i); 298 | 299 | //assert((UINT32)reg < MAX_NUM_REGS); 300 | // only consider valid general-purpose registers (any bit-width) and floating-point registers, 301 | // i.e. exlude branch, segment and pin registers, among others */ 302 | if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){ 303 | regWriteCnt++; 304 | } 305 | } 306 | 307 | e->regWriteCnt = regWriteCnt; 308 | e->regsWritten = (REG*)checked_malloc(regWriteCnt*sizeof(REG)); 309 | 310 | regWriteCnt = 0; 311 | for(i=0; i < maxNumRegsProd; i++){ 312 | 313 | reg = INS_RegW(ins, i); 314 | 315 | //assert((UINT32)reg < MAX_NUM_REGS); 316 | // only consider valid general-purpose registers (any bit-width) and floating-point registers, 317 | // i.e. exlude branch, segment and pin registers, among others 318 | if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){ 319 | e->regsWritten[regWriteCnt++] = reg; 320 | } 321 | } 322 | 323 | e->setWritten = true; 324 | } 325 | 326 | if(!e->setRegOpCnt){ 327 | regOpCnt = 0; 328 | opCnt = INS_OperandCount(ins); 329 | for(i = 0; i < opCnt; i++){ 330 | if(INS_OperandIsReg(ins,i)) 331 | regOpCnt++; 332 | } 333 | /*if(regOpCnt >= MAX_NUM_OPER){ 334 | fprintf(stderr,"BOOM! -> MAX_NUM_OPER is exceeded! (%u)\n", regOpCnt); 335 | exit(1); 336 | }*/ 337 | e->regOpCnt = regOpCnt; 338 | e->setRegOpCnt = true; 339 | } 340 | 341 | // buffer memory operations (and instruction register buffer) with one single InsertCall 342 | if(INS_IsMemoryRead(ins)){ 343 | 344 | stride_index_memread1 = stride_index_memRead1(INS_Address(ins)); 345 | 346 | if(INS_IsMemoryWrite(ins)){ 347 | 348 | stride_index_memwrite = stride_index_memWrite(INS_Address(ins)); 349 | 350 | if(INS_HasMemoryRead2(ins)){ 351 | 352 | stride_index_memread2 = stride_index_memRead2(INS_Address(ins)); 353 | 354 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)all_buffer_instruction_2reads_write, IARG_PTR, (void*)e, IARG_MEMORYREAD_EA, IARG_MEMORYREAD2_EA, IARG_MEMORYREAD_SIZE, IARG_UINT32, stride_index_memread1, IARG_UINT32, stride_index_memread2, IARG_MEMORYWRITE_EA, IARG_MEMORYWRITE_SIZE, IARG_UINT32, stride_index_memwrite, IARG_END); 355 | } 356 | else{ 357 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)all_buffer_instruction_read_write, IARG_PTR, (void*)e, IARG_MEMORYREAD_EA, IARG_MEMORYREAD_SIZE, IARG_UINT32, stride_index_memread1, IARG_MEMORYWRITE_EA, IARG_MEMORYWRITE_SIZE, IARG_UINT32, stride_index_memwrite, IARG_END); 358 | 359 | } 360 | } 361 | else{ 362 | if(INS_HasMemoryRead2(ins)){ 363 | 364 | stride_index_memread2 = stride_index_memRead2(INS_Address(ins)); 365 | 366 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)all_buffer_instruction_2reads, IARG_PTR, (void*)e, IARG_MEMORYREAD_EA, IARG_MEMORYREAD2_EA, IARG_MEMORYREAD_SIZE, IARG_UINT32 , stride_index_memread1, IARG_UINT32, stride_index_memread2, IARG_END); 367 | } 368 | else{ 369 | 370 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)all_buffer_instruction_read, IARG_PTR, (void*)e, IARG_MEMORYREAD_EA, IARG_MEMORYREAD_SIZE, IARG_UINT32, stride_index_memread1, IARG_END); 371 | } 372 | } 373 | } 374 | else{ 375 | if(INS_IsMemoryWrite(ins)){ 376 | 377 | stride_index_memwrite = stride_index_memWrite(INS_Address(ins)); 378 | 379 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)all_buffer_instruction_write, IARG_PTR, (void*)e, IARG_MEMORYWRITE_EA, IARG_MEMORYWRITE_SIZE, IARG_UINT32, stride_index_memwrite, IARG_END); 380 | } 381 | else{ 382 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)all_buffer_instruction, IARG_PTR, (void*)e, IARG_END); 383 | } 384 | } 385 | 386 | /* InsertIfCall returns true if ILP buffer is full */ 387 | //INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)empty_ilp_buffer_all, IARG_END); 388 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_interval_for_ilp, IARG_END); // wrapper for empty_ilp_buffer_all 389 | 390 | /* +++ ITYPES +++ */ 391 | 392 | // go over all groups, increase group count if instruction matches that group 393 | // group counts are increased at most once per instruction executed, 394 | // even if the instruction matches multiple identifiers in that group 395 | for(i=0; i < number_of_groups; i++){ 396 | for(j=0; j < group_ids_cnt[i]; j++){ 397 | if(group_identifiers[i][j].type == identifier_type::ID_TYPE_CATEGORY){ 398 | if(strcmp(group_identifiers[i][j].str, cat) == 0){ 399 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count, IARG_UINT32, i, IARG_END); 400 | categorized = true; 401 | break; 402 | } 403 | } 404 | else{ 405 | if(group_identifiers[i][j].type == identifier_type::ID_TYPE_OPCODE){ 406 | if(strcmp(group_identifiers[i][j].str, opcode) == 0){ 407 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count, IARG_UINT32, i, IARG_END); 408 | categorized = true; 409 | break; 410 | } 411 | } 412 | else{ 413 | if(group_identifiers[i][j].type == identifier_type::ID_TYPE_SPECIAL){ 414 | if(strcmp(group_identifiers[i][j].str, "mem_read") == 0 && INS_IsMemoryRead(ins) ){ 415 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count, IARG_UINT32, i, IARG_END); 416 | categorized = true; 417 | break; 418 | } 419 | else{ 420 | if(strcmp(group_identifiers[i][j].str, "mem_write") == 0 && INS_IsMemoryWrite(ins) ){ 421 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count, IARG_UINT32, i, IARG_END); 422 | categorized = true; 423 | break; 424 | } 425 | else{ 426 | } 427 | } 428 | } 429 | else{ 430 | cerr << "ERROR! Unknown identifier type specified (" << group_identifiers[i][j].type << ")" << endl; 431 | } 432 | } 433 | } 434 | } 435 | } 436 | 437 | // count instruction that don't fit in any of the specified categories in the last group 438 | if( !categorized ){ 439 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count, IARG_UINT32, (unsigned int)number_of_groups, IARG_END); 440 | 441 | // check whether this category is already known in the 'other' group 442 | for(i=0; i < other_ids_cnt; i++){ 443 | if(strcmp(other_group_identifiers[i].str, cat) == 0) 444 | break; 445 | } 446 | 447 | // if a new instruction category is found, add it to the set 448 | if(i == other_ids_cnt){ 449 | other_group_identifiers[other_ids_cnt].type = identifier_type::ID_TYPE_CATEGORY; 450 | other_group_identifiers[other_ids_cnt].str = checked_strdup(cat); 451 | other_ids_cnt++; 452 | } 453 | 454 | // prepare for (possible) next category 455 | if(other_ids_cnt == other_ids_max_cnt){ 456 | other_ids_max_cnt *= 2; 457 | other_group_identifiers = (identifier*)checked_realloc(other_group_identifiers, other_ids_max_cnt*sizeof(identifier)); 458 | } 459 | } 460 | 461 | /* +++ PPM *** */ 462 | if(strcmp(cat,"COND_BR") == 0){ 463 | instrument_ppm_cond_br(ins); 464 | } 465 | /* inserting calls for counting instructions is done in mica.cpp */ 466 | if(interval_size != -1){ 467 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_intervals, IARG_PTR, (void*)e, IARG_ADDRINT, INS_Address(ins), IARG_ADDRINT, (ADDRINT)INS_Size(ins), IARG_END); 468 | /* only called if interval is 'full' */ 469 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_interval, IARG_END); 470 | } 471 | else{ 472 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)all_instr_full, IARG_PTR, (void*)e, IARG_ADDRINT, INS_Address(ins), IARG_ADDRINT, (ADDRINT)INS_Size(ins), IARG_END); 473 | } 474 | 475 | } 476 | -------------------------------------------------------------------------------- /mica_all.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of MICA, a Pin tool to collect 3 | * microarchitecture-independent program characteristics using the Pin 4 | * instrumentation framework. 5 | * 6 | * Please see the README.txt file distributed with the MICA release for more 7 | * information. 8 | */ 9 | 10 | #include "mica.h" 11 | #include "mica_utils.h" 12 | 13 | VOID init_all(); 14 | ADDRINT returnArg(BOOL arg); 15 | VOID all_instr_full_count_always(); 16 | VOID all_instr_full_count_for_hpc_alignment_no_rep(); 17 | VOID all_instr_full_count_for_hpc_alignment_with_rep(UINT32 repCnt); 18 | VOID all_instr_intervals_count_always(); 19 | VOID all_instr_intervals_count_for_hpc_alignment_no_rep(); 20 | VOID all_instr_intervals_count_for_hpc_alignment_with_rep(UINT32 repCnt); 21 | VOID instrument_all(INS ins, VOID* v, ins_buffer_entry* e); 22 | -------------------------------------------------------------------------------- /mica_ilp.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of MICA, a Pin tool to collect 3 | * microarchitecture-independent program characteristics using the Pin 4 | * instrumentation framework. 5 | * 6 | * Please see the README.txt file distributed with the MICA release for more 7 | * information. 8 | */ 9 | 10 | #include "pin.H" 11 | 12 | /* MICA includes */ 13 | #include "mica_utils.h" 14 | #include "mica_ilp.h" 15 | 16 | #include 17 | #include 18 | using namespace std; 19 | 20 | #define ILP_WIN_SIZE_CNT 4 21 | 22 | const UINT32 win_sizes[ILP_WIN_SIZE_CNT] = {32, 64, 128, 256}; 23 | 24 | extern UINT32 _ilp_win_size; 25 | UINT32 win_size; 26 | 27 | extern UINT32 _block_size; 28 | UINT32 ilp_block_size; 29 | 30 | /* buffer settings */ 31 | 32 | //#define ILP_BUFFER_SIZE 256 33 | #define ILP_BUFFER_SIZE 200 34 | 35 | /* buffer variables */ 36 | 37 | typedef struct ilp_buffer_entry_type{ 38 | 39 | ins_buffer_entry* e; 40 | 41 | ADDRINT mem_read1_addr; 42 | ADDRINT mem_read2_addr; 43 | ADDRINT mem_read_size; 44 | 45 | ADDRINT mem_write_addr; 46 | ADDRINT mem_write_size; 47 | 48 | } ilp_buffer_entry; 49 | 50 | ilp_buffer_entry* ilp_buffer[ILP_BUFFER_SIZE]; 51 | UINT32 ilp_buffer_index; 52 | 53 | void init_ilp_buffering(); 54 | VOID fini_ilp_buffering_all(); 55 | VOID fini_ilp_buffering_one(); 56 | 57 | /* Global variables */ 58 | 59 | extern INT64 interval_size; 60 | extern INT64 interval_ins_count; 61 | extern INT64 interval_ins_count_for_hpc_alignment; 62 | extern INT64 total_ins_count; 63 | extern INT64 total_ins_count_for_hpc_alignment; 64 | ofstream output_file_ilp_one; 65 | ofstream output_file_ilp_all; 66 | 67 | INT32 size_pow_all_times_all; 68 | INT64 index_all_times_all; 69 | UINT64* all_times_all[ILP_WIN_SIZE_CNT]; 70 | 71 | INT32 size_pow_times; 72 | INT64 index_all_times; 73 | UINT64* all_times; 74 | 75 | INT64 cpuClock_interval_all[ILP_WIN_SIZE_CNT]; 76 | UINT64 timeAvailable_all[ILP_WIN_SIZE_CNT][MAX_NUM_REGS]; 77 | nlist* memAddressesTable_all[MAX_MEM_TABLE_ENTRIES]; 78 | UINT32 windowHead_all[ILP_WIN_SIZE_CNT]; 79 | UINT32 windowTail_all[ILP_WIN_SIZE_CNT]; 80 | UINT64 cpuClock_all[ILP_WIN_SIZE_CNT]; 81 | UINT64* executionProfile_all[ILP_WIN_SIZE_CNT]; 82 | UINT64 issueTime_all[ILP_WIN_SIZE_CNT]; 83 | 84 | INT64 cpuClock_interval; 85 | UINT64 timeAvailable[MAX_NUM_REGS]; 86 | nlist* memAddressesTable[MAX_MEM_TABLE_ENTRIES]; 87 | UINT32 windowHead; 88 | UINT32 windowTail; 89 | UINT64 cpuClock; 90 | UINT64* executionProfile; 91 | UINT64 issueTime; 92 | 93 | /************************* 94 | ILP (COMMON) 95 | **************************/ 96 | 97 | /* initializing */ 98 | void init_ilp_common(){ 99 | /* initializing total instruction counts is done in mica.cpp */ 100 | } 101 | 102 | /************************************ 103 | ILP (one given window size) 104 | *************************************/ 105 | 106 | /* initializing */ 107 | void init_ilp_one(){ 108 | 109 | UINT32 i; 110 | 111 | init_ilp_common(); 112 | init_ilp_buffering(); 113 | 114 | win_size = _ilp_win_size; 115 | ilp_block_size = _block_size; 116 | 117 | size_pow_times = 10; 118 | all_times = (UINT64*)checked_malloc((1 << size_pow_times) * sizeof(UINT64)); 119 | index_all_times = 1; // don't use first element of all_times 120 | 121 | windowHead = 0; 122 | windowTail = 0; 123 | cpuClock = 0; 124 | cpuClock_interval = 0; 125 | for(i = 0; i < MAX_NUM_REGS; i++){ 126 | timeAvailable[i] = 0; 127 | } 128 | 129 | executionProfile = (UINT64*)checked_malloc(win_size*sizeof(UINT64)); 130 | 131 | for(i = 0; i < win_size; i++){ 132 | executionProfile[i] = 0; 133 | } 134 | issueTime = 0; 135 | 136 | if(interval_size != -1){ 137 | if(interval_size % ILP_BUFFER_SIZE != 0){ 138 | cerr << "ERROR! Interval size is not a multiple of ILP buffer size. (" << interval_size << " vs " << ILP_BUFFER_SIZE << ")" << endl; 139 | exit(-1); 140 | } 141 | char filename[100]; 142 | sprintf(filename, "ilp-win%d_phases_int", win_size); 143 | output_file_ilp_one.open(mkfilename(filename), ios::out|ios::trunc); 144 | output_file_ilp_one.close(); 145 | } 146 | } 147 | 148 | /* support */ 149 | void increase_size_all_times_one(){ 150 | UINT64* ptr; 151 | 152 | size_pow_times++; 153 | 154 | ptr = (UINT64*)realloc(all_times, (1 << size_pow_times)*sizeof(UINT64)); 155 | if(ptr == (UINT64*)NULL){ 156 | cerr << "Could not allocate memory (realloc)!" << endl; 157 | exit(1); 158 | } 159 | all_times = ptr; 160 | } 161 | 162 | /* per-instruction stuff */ 163 | VOID ilp_instr_one(){ 164 | 165 | const UINT32 win_size_const = win_size; 166 | UINT32 reordered; 167 | 168 | /* set issue time for tail of instruction window */ 169 | executionProfile[windowTail] = issueTime; 170 | windowTail = (windowTail + 1) % win_size_const; 171 | 172 | /* if instruction window (issue buffer) full */ 173 | if(windowHead == windowTail){ 174 | cpuClock++; 175 | cpuClock_interval++; 176 | reordered = 0; 177 | /* remove all instructions which are done from beginning of window, 178 | * until an instruction comes along which is not ready yet: 179 | * -> check executionProfile to see which instructions are done 180 | * -> commit maximum win_size instructions (i.e. stop when issue buffer is empty) 181 | */ 182 | while((executionProfile[windowHead] < cpuClock) && (reordered < win_size_const)) { 183 | windowHead = (windowHead + 1) % win_size_const; 184 | reordered++; 185 | } 186 | //assert(reordered != 0); 187 | } 188 | 189 | /* reset issue times */ 190 | issueTime = 0; 191 | } 192 | 193 | VOID ilp_instr_full_one(){ 194 | 195 | /* counting instructions is done in all_instr_full() */ 196 | 197 | ilp_instr_one(); 198 | } 199 | 200 | VOID ilp_instr_intervals_one(){ 201 | 202 | int i; 203 | 204 | /* counting instructions is done in all_instr_intervals() */ 205 | 206 | ilp_instr_one(); 207 | 208 | if(interval_ins_count_for_hpc_alignment == interval_size){ 209 | 210 | char filename[100]; 211 | sprintf(filename, "ilp-win%d_phases_int", win_size); 212 | 213 | output_file_ilp_one.open(mkfilename(filename), ios::out|ios::app); 214 | 215 | output_file_ilp_one << interval_size << " " << cpuClock_interval << endl; 216 | 217 | /* reset */ 218 | interval_ins_count = 0; 219 | interval_ins_count_for_hpc_alignment = 0; 220 | 221 | cpuClock_interval = 0; 222 | 223 | /* clean up memory used, to avoid memory problems for long (CPU2006) benchmarks */ 224 | size_pow_times = 10; 225 | 226 | free(all_times); 227 | all_times = (UINT64*)checked_malloc((1 << size_pow_times) * sizeof(UINT64)); 228 | index_all_times = 1; 229 | 230 | nlist* np; 231 | nlist* np_rm; 232 | for(i=0; i < MAX_MEM_TABLE_ENTRIES; i++){ 233 | np = memAddressesTable[i]; 234 | while(np != (nlist*)NULL){ 235 | np_rm = np; 236 | np = np->next; 237 | free(np_rm->mem); 238 | free(np_rm); 239 | } 240 | memAddressesTable[i] = (nlist*) NULL; 241 | } 242 | 243 | output_file_ilp_one.close(); 244 | } 245 | } 246 | 247 | VOID checkIssueTime_one(){ 248 | 249 | if(cpuClock > issueTime) 250 | issueTime = cpuClock; 251 | } 252 | 253 | /* register stuff */ 254 | VOID readRegOp_ilp_one(UINT32 regId){ 255 | 256 | if(timeAvailable[regId] > issueTime) 257 | issueTime = timeAvailable[regId]; 258 | } 259 | 260 | VOID readRegOp_ilp_one_fast(VOID* _e){ 261 | 262 | ins_buffer_entry* e = (ins_buffer_entry*)_e; 263 | 264 | INT32 i; 265 | 266 | UINT32 regId; 267 | 268 | for(i=0; i < e->regReadCnt; i++){ 269 | regId = (UINT32)e->regsRead[i]; 270 | if(timeAvailable[regId] > issueTime) 271 | issueTime = timeAvailable[regId]; 272 | } 273 | } 274 | 275 | VOID writeRegOp_ilp_one(UINT32 regId){ 276 | 277 | timeAvailable[regId] = issueTime + 1; 278 | } 279 | 280 | VOID writeRegOp_ilp_one_fast(VOID* _e){ 281 | 282 | ins_buffer_entry* e = (ins_buffer_entry*)_e; 283 | 284 | INT32 i; 285 | 286 | for(i=0; i < e->regWriteCnt; i++) 287 | timeAvailable[(UINT32)e->regsWritten[i]] = issueTime + 1; 288 | } 289 | 290 | /* memory access stuff */ 291 | VOID readMem_ilp_one(ADDRINT effAddr, ADDRINT size){ 292 | 293 | 294 | ADDRINT a; 295 | ADDRINT upperMemAddr, indexInChunk; 296 | memNode* chunk = (memNode*)NULL; 297 | ADDRINT shiftedAddr = effAddr >> ilp_block_size; 298 | ADDRINT shiftedEndAddr = (effAddr + size - 1) >> ilp_block_size; 299 | 300 | if(size > 0){ 301 | for(a = shiftedAddr; a <= shiftedEndAddr; a++){ 302 | upperMemAddr = a >> LOG_MAX_MEM_ENTRIES; 303 | indexInChunk = a ^ (upperMemAddr << LOG_MAX_MEM_ENTRIES); 304 | 305 | chunk = lookup(memAddressesTable, upperMemAddr); 306 | if(chunk == (memNode*)NULL) 307 | chunk = install(memAddressesTable, upperMemAddr); 308 | 309 | //assert(indexInChunk < MAX_MEM_ENTRIES); 310 | //assert(chunk->timeAvailable[indexInChunk] < (1 << size_pow_times)); 311 | if(all_times[chunk->timeAvailable[indexInChunk]] > issueTime) 312 | issueTime = all_times[chunk->timeAvailable[indexInChunk]]; 313 | } 314 | } 315 | } 316 | 317 | VOID writeMem_ilp_one(ADDRINT effAddr, ADDRINT size){ 318 | 319 | ADDRINT a; 320 | ADDRINT upperMemAddr, indexInChunk; 321 | memNode* chunk = (memNode*)NULL; 322 | ADDRINT shiftedAddr = effAddr >> ilp_block_size; 323 | ADDRINT shiftedEndAddr = (effAddr + size - 1) >> ilp_block_size; 324 | 325 | if(size > 0){ 326 | for(a = shiftedAddr; a <= shiftedEndAddr; a++){ 327 | upperMemAddr = a >> LOG_MAX_MEM_ENTRIES; 328 | indexInChunk = a ^ (upperMemAddr << LOG_MAX_MEM_ENTRIES); 329 | 330 | chunk = lookup(memAddressesTable,upperMemAddr); 331 | if(chunk == (memNode*)NULL) 332 | chunk = install(memAddressesTable,upperMemAddr); 333 | 334 | //assert(indexInChunk < MAX_MEM_ENTRIES); 335 | if(chunk->timeAvailable[indexInChunk] == 0){ 336 | index_all_times++; 337 | if(index_all_times >= (1 << size_pow_times)) 338 | increase_size_all_times_one(); 339 | chunk->timeAvailable[indexInChunk] = index_all_times; 340 | } 341 | //assert(chunk->timeAvailable[indexInChunk] < (1 << size_pow_times)); 342 | all_times[chunk->timeAvailable[indexInChunk]] = issueTime + 1; 343 | } 344 | } 345 | } 346 | 347 | /* instrumenting (instruction level) */ 348 | /*VOID instrument_ilp_one(INS ins, VOID* v){ 349 | 350 | UINT32 i; 351 | UINT32 maxNumRegsProd, maxNumRegsCons; 352 | REG reg; 353 | 354 | // register reads and memory reads determine the issue time 355 | maxNumRegsCons = INS_MaxNumRRegs(ins); 356 | 357 | for(i=0; i < maxNumRegsCons; i++){ 358 | 359 | reg = INS_RegR(ins, i); 360 | 361 | assert((UINT32)reg < MAX_NUM_REGS); 362 | // only consider valid general-purpose registers (any bit-width) and floating-point registers, 363 | // i.e. exlude branch, segment and pin registers, among others 364 | if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){ 365 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)readRegOp_ilp_one, IARG_UINT32, reg, IARG_END); 366 | } 367 | } 368 | 369 | if(INS_IsMemoryRead(ins)){ 370 | 371 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)readMem_ilp_one, IARG_MEMORYREAD_EA, IARG_MEMORYREAD_SIZE, IARG_END); 372 | 373 | if(INS_HasMemoryRead2(ins)){ 374 | 375 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)readMem_ilp_one, IARG_MEMORYREAD2_EA, IARG_MEMORYREAD_SIZE, IARG_END); 376 | } 377 | } 378 | 379 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)checkIssueTime_one, IARG_END); 380 | 381 | // register writes and memory writes determine the time when these locations are available 382 | 383 | maxNumRegsProd = INS_MaxNumWRegs(ins); 384 | for(i=0; i < maxNumRegsProd; i++){ 385 | 386 | reg = INS_RegW(ins, i); 387 | 388 | assert((UINT32)reg < MAX_NUM_REGS); 389 | // only consider valid general-purpose registers (any bit-width) and floating-point registers, 390 | // i.e. exlude branch, segment and pin registers, among others 391 | if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){ 392 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)writeRegOp_ilp_one, IARG_UINT32, reg, IARG_END); 393 | } 394 | } 395 | 396 | if(INS_IsMemoryWrite(ins)){ 397 | 398 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)writeMem_ilp_one, IARG_MEMORYWRITE_EA, IARG_MEMORYWRITE_SIZE, IARG_END); 399 | } 400 | 401 | // count instructions 402 | if(interval_size == -1) 403 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)ilp_instr_full_one, IARG_END); 404 | else 405 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)ilp_instr_intervals_one, IARG_END); 406 | 407 | }*/ 408 | 409 | /* finishing... */ 410 | VOID fini_ilp_one(INT32 code, VOID* v){ 411 | 412 | char filename[100]; 413 | 414 | fini_ilp_buffering_one(); 415 | 416 | if(interval_size == -1){ 417 | sprintf(filename, "ilp-win%d_full_int", win_size); 418 | 419 | output_file_ilp_one.open(mkfilename(filename), ios::out|ios::trunc); 420 | //output_file_ilp_one << total_ins_count; 421 | } 422 | else{ 423 | sprintf(filename, "ilp-win%d_phases_int", win_size); 424 | output_file_ilp_one.open(mkfilename(filename), ios::out|ios::app); 425 | output_file_ilp_one << interval_ins_count; 426 | } 427 | output_file_ilp_one << " " << cpuClock_interval << endl; 428 | 429 | //output_file_ilp_one << "number of instructions: " << total_ins_count_for_hpc_alignment << endl; 430 | output_file_ilp_one.close(); 431 | } 432 | 433 | /*************************************** 434 | ILP (all 4 hardcoded window sizes) 435 | ****************************************/ 436 | 437 | /* initializing */ 438 | void init_ilp_all(){ 439 | 440 | int i,j; 441 | 442 | init_ilp_common(); 443 | init_ilp_buffering(); 444 | 445 | size_pow_all_times_all = 10; 446 | for(i=0; i < ILP_WIN_SIZE_CNT; i++){ 447 | all_times_all[i] = (UINT64*)checked_malloc((1 << size_pow_all_times_all) * sizeof(UINT64)); 448 | } 449 | index_all_times_all = 1; // don't use first element of all_times_all 450 | 451 | ilp_block_size = _block_size; 452 | 453 | for(j=0; j < ILP_WIN_SIZE_CNT; j++){ 454 | windowHead_all[j] = 0; 455 | windowTail_all[j] = 0; 456 | cpuClock_all[j] = 0; 457 | cpuClock_interval_all[j] = 0; 458 | for(i = 0; i < MAX_NUM_REGS; i++){ 459 | timeAvailable_all[j][i] = 0; 460 | } 461 | 462 | executionProfile_all[j] = (UINT64*)checked_malloc(win_sizes[j]*sizeof(UINT64)); 463 | 464 | for(i = 0; i < (int)win_sizes[j]; i++){ 465 | executionProfile_all[j][i] = 0; 466 | } 467 | issueTime_all[j] = 0; 468 | } 469 | 470 | if(interval_size != -1){ 471 | if(interval_size % ILP_BUFFER_SIZE != 0){ 472 | cerr << "ERROR! Interval size is not a multiple of ILP buffer size. (" << interval_size << " vs " << ILP_BUFFER_SIZE << ")" << endl; 473 | exit(-1); 474 | } 475 | output_file_ilp_all.open(mkfilename("ilp_phases_int"), ios::out|ios::trunc); 476 | output_file_ilp_all.close(); 477 | } 478 | } 479 | 480 | /* support */ 481 | void increase_size_all_times_all(){ 482 | int i; 483 | UINT64* ptr; 484 | size_pow_all_times_all++; 485 | 486 | for(i=0; i < ILP_WIN_SIZE_CNT; i++){ 487 | ptr = (UINT64*)realloc(all_times_all[i],(1 << size_pow_all_times_all)*sizeof(UINT64)); 488 | if(ptr == (UINT64*)NULL){ 489 | cerr << "Could not allocate memory (realloc)!" << endl; 490 | exit(1); 491 | } 492 | all_times_all[i] = ptr; 493 | } 494 | } 495 | 496 | /* per-instruction stuff */ 497 | VOID ilp_instr_all(){ 498 | 499 | int i; 500 | UINT32 reordered; 501 | 502 | 503 | for(i=0; i < ILP_WIN_SIZE_CNT; i++){ 504 | 505 | /* set issue time for tail of instruction window */ 506 | executionProfile_all[i][windowTail_all[i]] = issueTime_all[i]; 507 | windowTail_all[i] = (windowTail_all[i] + 1) % win_sizes[i]; 508 | 509 | /* if instruction window (issue buffer) full */ 510 | if(windowHead_all[i] == windowTail_all[i]){ 511 | cpuClock_all[i]++; 512 | cpuClock_interval_all[i]++; 513 | reordered = 0; 514 | /* remove all instructions which are done from beginning of window, 515 | * until an instruction comes along which is not ready yet: 516 | * -> check executionProfile_all to see which instructions are done 517 | * -> commit maximum win_size instructions (i.e. stop when issue buffer is empty) 518 | */ 519 | while((executionProfile_all[i][windowHead_all[i]] < cpuClock_all[i]) && (reordered < win_sizes[i])) { 520 | windowHead_all[i] = (windowHead_all[i] + 1) % win_sizes[i]; 521 | reordered++; 522 | } 523 | //assert(reordered != 0); 524 | } 525 | 526 | /* reset issue times */ 527 | issueTime_all[i] = 0; 528 | 529 | } 530 | 531 | } 532 | 533 | VOID ilp_instr_full_all(){ 534 | 535 | /* counting instructions is done in all_instr_full() */ 536 | 537 | ilp_instr_all(); 538 | } 539 | 540 | VOID ilp_instr_intervals_all(){ 541 | 542 | int i; 543 | 544 | /* counting instructions is done in all_instr_intervals() */ 545 | 546 | if(interval_ins_count_for_hpc_alignment == interval_size){ 547 | 548 | output_file_ilp_all.open(mkfilename("ilp_phases_int"), ios::out|ios::app); 549 | 550 | output_file_ilp_all << interval_ins_count; 551 | for(i = 0; i < ILP_WIN_SIZE_CNT; i++) 552 | output_file_ilp_all << " " << cpuClock_interval_all[i]; 553 | output_file_ilp_all << endl; 554 | 555 | /* reset */ 556 | interval_ins_count = 0; 557 | interval_ins_count_for_hpc_alignment = 0; 558 | 559 | for(i = 0; i < ILP_WIN_SIZE_CNT; i++) 560 | cpuClock_interval_all[i] = 0; 561 | 562 | /* clean up memory used, to avoid memory problems for long (CPU2006) benchmarks */ 563 | size_pow_all_times_all = 10; 564 | for(i = 0; i < ILP_WIN_SIZE_CNT; i++){ 565 | free(all_times_all[i]); 566 | all_times_all[i] = (UINT64*)checked_malloc((1 << size_pow_all_times_all) * sizeof(UINT64)); 567 | } 568 | index_all_times_all = 1; 569 | 570 | nlist* np; 571 | nlist* np_rm; 572 | for(i=0; i < MAX_MEM_TABLE_ENTRIES; i++){ 573 | np = memAddressesTable_all[i]; 574 | while(np != (nlist*)NULL){ 575 | np_rm = np; 576 | np = np->next; 577 | free(np_rm->mem); 578 | free(np_rm); 579 | } 580 | memAddressesTable_all[i] = (nlist*) NULL; 581 | } 582 | 583 | output_file_ilp_all.close(); 584 | } 585 | 586 | ilp_instr_all(); 587 | } 588 | 589 | VOID checkIssueTime_all(){ 590 | int i; 591 | 592 | for(i=0; i < ILP_WIN_SIZE_CNT; i++){ 593 | if(cpuClock_all[i] > issueTime_all[i]) 594 | issueTime_all[i] = cpuClock_all[i]; 595 | } 596 | } 597 | 598 | /* register stuff */ 599 | VOID readRegOp_ilp_all(UINT32 regId){ 600 | int i; 601 | 602 | for(i=0; i < ILP_WIN_SIZE_CNT; i++){ 603 | 604 | if(timeAvailable_all[i][regId] > issueTime_all[i]) 605 | issueTime_all[i] = timeAvailable_all[i][regId]; 606 | } 607 | } 608 | 609 | VOID writeRegOp_ilp_all(UINT32 regId){ 610 | int i; 611 | 612 | for(i=0; i < ILP_WIN_SIZE_CNT; i++){ 613 | timeAvailable_all[i][regId] = issueTime_all[i] + 1; 614 | } 615 | } 616 | 617 | /* memory access stuff */ 618 | VOID readMem_ilp_all(ADDRINT effAddr, ADDRINT size){ 619 | 620 | int i; 621 | 622 | ADDRINT a; 623 | ADDRINT upperMemAddr, indexInChunk; 624 | memNode* chunk = (memNode*)NULL; 625 | ADDRINT shiftedAddr = effAddr >> ilp_block_size; 626 | ADDRINT shiftedEndAddr = (effAddr + size - 1) >> ilp_block_size; 627 | 628 | if(size > 0){ 629 | for(a = shiftedAddr; a <= shiftedEndAddr; a++){ 630 | upperMemAddr = a >> LOG_MAX_MEM_ENTRIES; 631 | indexInChunk = a ^ (upperMemAddr << LOG_MAX_MEM_ENTRIES); 632 | 633 | chunk = lookup(memAddressesTable_all,upperMemAddr); 634 | if(chunk == (memNode*)NULL) 635 | chunk = install(memAddressesTable_all,upperMemAddr); 636 | 637 | //assert(indexInChunk < MAX_MEM_ENTRIES); 638 | for(i=0; i < ILP_WIN_SIZE_CNT; i++){ 639 | 640 | if(all_times_all[i][chunk->timeAvailable[indexInChunk]] > issueTime_all[i]) 641 | issueTime_all[i] = all_times_all[i][chunk->timeAvailable[indexInChunk]]; 642 | } 643 | } 644 | } 645 | } 646 | 647 | VOID writeMem_ilp_all(ADDRINT effAddr, ADDRINT size){ 648 | int i; 649 | 650 | ADDRINT a; 651 | ADDRINT upperMemAddr, indexInChunk; 652 | memNode* chunk = (memNode*)NULL; 653 | ADDRINT shiftedAddr = effAddr >> ilp_block_size; 654 | ADDRINT shiftedEndAddr = (effAddr + size - 1) >> ilp_block_size; 655 | 656 | if(size > 0){ 657 | for(a = shiftedAddr; a <= shiftedEndAddr; a++){ 658 | upperMemAddr = a >> LOG_MAX_MEM_ENTRIES; 659 | indexInChunk = a ^ (upperMemAddr << LOG_MAX_MEM_ENTRIES); 660 | 661 | chunk = lookup(memAddressesTable_all,upperMemAddr); 662 | if(chunk == (memNode*)NULL) 663 | chunk = install(memAddressesTable_all,upperMemAddr); 664 | 665 | //assert(indexInChunk < MAX_MEM_ENTRIES); 666 | if(chunk->timeAvailable[indexInChunk] == 0){ 667 | index_all_times_all++; 668 | if(index_all_times_all >= (1 << size_pow_all_times_all)) 669 | increase_size_all_times_all(); 670 | chunk->timeAvailable[indexInChunk] = index_all_times_all; 671 | } 672 | for(i=0; i < ILP_WIN_SIZE_CNT; i++){ 673 | all_times_all[i][chunk->timeAvailable[indexInChunk]] = issueTime_all[i] + 1; 674 | } 675 | } 676 | } 677 | } 678 | 679 | /* instrumenting (instruction level) */ 680 | /*VOID instrument_ilp_all(INS ins, VOID* v){ 681 | 682 | UINT32 i; 683 | UINT32 maxNumRegsProd, maxNumRegsCons; 684 | REG reg; 685 | 686 | 687 | // register reads and memory reads determine the issue time 688 | maxNumRegsCons = INS_MaxNumRRegs(ins); 689 | 690 | for(i=0; i < maxNumRegsCons; i++){ 691 | 692 | reg = INS_RegR(ins, i); 693 | 694 | // only consider valid general-purpose registers (any bit-width) and floating-point registers, 695 | // i.e. exlude branch, segment and pin registers, among others 696 | if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){ 697 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)readRegOp_ilp_all, IARG_UINT32, reg, IARG_END); 698 | } 699 | } 700 | 701 | if(INS_IsMemoryRead(ins)){ 702 | 703 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)readMem_ilp_all, IARG_MEMORYREAD_EA, IARG_MEMORYREAD_SIZE, IARG_END); 704 | 705 | if(INS_HasMemoryRead2(ins)){ 706 | 707 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)readMem_ilp_all, IARG_MEMORYREAD2_EA, IARG_MEMORYREAD_SIZE, IARG_END); 708 | } 709 | } 710 | 711 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)checkIssueTime_all, IARG_END); 712 | 713 | // register writes and memory writes determine the time when these locations are available 714 | 715 | maxNumRegsProd = INS_MaxNumWRegs(ins); 716 | for(i=0; i < maxNumRegsProd; i++){ 717 | 718 | reg = INS_RegW(ins, i); 719 | 720 | // only consider valid general-purpose registers (any bit-width) and floating-point registers, 721 | // i.e. exlude branch, segment and pin registers, among others 722 | if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){ 723 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)writeRegOp_ilp_all, IARG_UINT32, reg, IARG_END); 724 | } 725 | } 726 | 727 | if(INS_IsMemoryWrite(ins)){ 728 | 729 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)writeMem_ilp_all, IARG_MEMORYWRITE_EA, IARG_MEMORYWRITE_SIZE, IARG_END); 730 | } 731 | 732 | // count instructions 733 | if(interval_size == -1) 734 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)ilp_instr_full_all,IARG_END); 735 | else 736 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)ilp_instr_intervals_all, IARG_END); 737 | }*/ 738 | 739 | /* finishing... */ 740 | VOID fini_ilp_all(INT32 code, VOID* v){ 741 | 742 | int i; 743 | 744 | fini_ilp_buffering_all(); 745 | 746 | if(interval_size == -1){ 747 | output_file_ilp_all.open(mkfilename("ilp_full_int"), ios::out|ios::trunc); 748 | output_file_ilp_all << total_ins_count; 749 | } 750 | else{ 751 | output_file_ilp_all.open(mkfilename("ilp_phases_int"), ios::out|ios::app); 752 | output_file_ilp_all << interval_ins_count; 753 | } 754 | for(i = 0; i < ILP_WIN_SIZE_CNT; i++) 755 | output_file_ilp_all << " " << cpuClock_interval_all[i]; 756 | output_file_ilp_all << " "; 757 | 758 | output_file_ilp_all << endl; 759 | //output_file_ilp_all << "number of instructions: " << total_ins_count_for_hpc_alignment << endl; 760 | output_file_ilp_all.close(); 761 | } 762 | 763 | /************************** 764 | ILP (BUFFERING) 765 | ***************************/ 766 | 767 | /* 768 | * notes 769 | * 770 | * using PIN_FAST_ANALYSIS_CALL for buffering functions was tested 771 | * during the preparation of MICA v0.3, but showed to slightly slowdown 772 | * things instead of speeding them up, so it was dropped in the end 773 | */ 774 | 775 | /* initializing */ 776 | void init_ilp_buffering(){ 777 | 778 | int i; 779 | 780 | ilp_buffer_index = 0; 781 | for(i=0; i < ILP_BUFFER_SIZE; i++){ 782 | ilp_buffer[i] = (ilp_buffer_entry*)checked_malloc(sizeof(ilp_buffer_entry)); 783 | ilp_buffer[i]->e = (ins_buffer_entry*)NULL; 784 | ilp_buffer[i]->mem_read1_addr = 0; 785 | ilp_buffer[i]->mem_read2_addr = 0; 786 | ilp_buffer[i]->mem_read_size = 0; 787 | ilp_buffer[i]->mem_write_addr = 0; 788 | ilp_buffer[i]->mem_write_size = 0; 789 | } 790 | } 791 | 792 | VOID ilp_buffer_instruction_only(void* _e){ 793 | ilp_buffer[ilp_buffer_index]->e = (ins_buffer_entry*)_e; 794 | } 795 | 796 | VOID ilp_buffer_instruction_read(ADDRINT read1_addr, ADDRINT read_size){ 797 | ilp_buffer[ilp_buffer_index]->mem_read1_addr = read1_addr; 798 | ilp_buffer[ilp_buffer_index]->mem_read_size = read_size; 799 | } 800 | 801 | VOID ilp_buffer_instruction_read2(ADDRINT read2_addr){ 802 | ilp_buffer[ilp_buffer_index]->mem_read2_addr = read2_addr; 803 | } 804 | 805 | VOID ilp_buffer_instruction_write(ADDRINT write_addr, ADDRINT write_size){ 806 | ilp_buffer[ilp_buffer_index]->mem_write_addr = write_addr; 807 | ilp_buffer[ilp_buffer_index]->mem_write_size = write_size; 808 | } 809 | 810 | ADDRINT ilp_buffer_instruction_next(){ 811 | ilp_buffer_index++; 812 | return (ADDRINT)(ilp_buffer_index == ILP_BUFFER_SIZE || interval_ins_count_for_hpc_alignment == interval_size); 813 | } 814 | 815 | /* empty buffer for one given window size */ 816 | VOID empty_buffer_one(){ 817 | UINT32 i,j; 818 | 819 | for(i=0; i < ilp_buffer_index; i++){ 820 | 821 | // register reads 822 | for(j=0; j < (UINT32)ilp_buffer[i]->e->regReadCnt; j++){ 823 | readRegOp_ilp_one((UINT32)ilp_buffer[i]->e->regsRead[j]); 824 | } 825 | 826 | // memory reads 827 | if(ilp_buffer[i]->mem_read1_addr != 0){ 828 | readMem_ilp_one(ilp_buffer[i]->mem_read1_addr, ilp_buffer[i]->mem_read_size); 829 | ilp_buffer[i]->mem_read1_addr = 0; 830 | 831 | if(ilp_buffer[i]->mem_read2_addr != 0){ 832 | readMem_ilp_one(ilp_buffer[i]->mem_read2_addr, ilp_buffer[i]->mem_read_size); 833 | ilp_buffer[i]->mem_read2_addr = 0; 834 | } 835 | 836 | ilp_buffer[i]->mem_read_size = 0; 837 | } 838 | 839 | checkIssueTime_one(); 840 | 841 | // register writes 842 | for(j=0; j < (UINT32)ilp_buffer[i]->e->regWriteCnt; j++){ 843 | writeRegOp_ilp_one((UINT32)ilp_buffer[i]->e->regsWritten[j]); 844 | } 845 | 846 | // memory writes 847 | if(ilp_buffer[i]->mem_write_addr != 0){ 848 | writeMem_ilp_one(ilp_buffer[i]->mem_write_addr, ilp_buffer[i]->mem_write_size); 849 | ilp_buffer[i]->mem_write_addr = 0; 850 | ilp_buffer[i]->mem_write_size = 0; 851 | } 852 | 853 | ilp_buffer[i]->e = (ins_buffer_entry*)NULL; 854 | 855 | if(interval_size == -1) 856 | ilp_instr_full_one(); 857 | else 858 | ilp_instr_intervals_one(); 859 | } 860 | 861 | ilp_buffer_index = 0; 862 | } 863 | 864 | /* empty buffer for all 4 (hardcoded) window sizes */ 865 | VOID empty_ilp_buffer_all(){ 866 | UINT32 i,j; 867 | 868 | for(i=0; i < ilp_buffer_index; i++){ 869 | 870 | // register reads 871 | for(j=0; j < (UINT32)ilp_buffer[i]->e->regReadCnt; j++){ 872 | readRegOp_ilp_all((UINT32)ilp_buffer[i]->e->regsRead[j]); 873 | } 874 | 875 | // memory reads 876 | if(ilp_buffer[i]->mem_read1_addr != 0){ 877 | readMem_ilp_all(ilp_buffer[i]->mem_read1_addr, ilp_buffer[i]->mem_read_size); 878 | ilp_buffer[i]->mem_read1_addr = 0; 879 | 880 | if(ilp_buffer[i]->mem_read2_addr != 0){ 881 | readMem_ilp_all(ilp_buffer[i]->mem_read2_addr, ilp_buffer[i]->mem_read_size); 882 | ilp_buffer[i]->mem_read2_addr = 0; 883 | } 884 | 885 | ilp_buffer[i]->mem_read_size = 0; 886 | } 887 | 888 | checkIssueTime_all(); 889 | 890 | // register writes 891 | for(j=0; j < (UINT32)ilp_buffer[i]->e->regWriteCnt; j++){ 892 | writeRegOp_ilp_all((UINT32)ilp_buffer[i]->e->regsWritten[j]); 893 | } 894 | 895 | // memory writes 896 | if(ilp_buffer[i]->mem_write_addr != 0){ 897 | writeMem_ilp_all(ilp_buffer[i]->mem_write_addr, ilp_buffer[i]->mem_write_size); 898 | ilp_buffer[i]->mem_write_addr = 0; 899 | ilp_buffer[i]->mem_write_size = 0; 900 | } 901 | 902 | ilp_buffer[i]->e = (ins_buffer_entry*)NULL; 903 | 904 | if(interval_size == -1) 905 | ilp_instr_full_all(); 906 | else 907 | ilp_instr_intervals_all(); 908 | } 909 | 910 | ilp_buffer_index = 0; 911 | } 912 | 913 | /* instrumenting (instruction level) */ 914 | VOID instrument_ilp_buffering_common(INS ins, ins_buffer_entry* e){ 915 | 916 | UINT32 i, maxNumRegsProd, maxNumRegsCons, regReadCnt, regWriteCnt; 917 | REG reg; 918 | 919 | // buffer register reads per static instruction 920 | if(!e->setRead){ 921 | 922 | 923 | // register reads and memory reads determine the issue time 924 | maxNumRegsCons = INS_MaxNumRRegs(ins); 925 | 926 | regReadCnt = 0; 927 | for(i=0; i < maxNumRegsCons; i++){ 928 | reg = INS_RegR(ins, i); 929 | //assert((UINT32)reg < MAX_NUM_REGS); 930 | // only consider valid general-purpose registers (any bit-width) and floating-point registers, 931 | // i.e. exlude branch, segment and pin registers, among others 932 | if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){ 933 | regReadCnt++; 934 | } 935 | } 936 | 937 | e->regReadCnt = regReadCnt; 938 | e->regsRead = (REG*)checked_malloc(regReadCnt*sizeof(REG)); 939 | 940 | regReadCnt = 0; 941 | for(i=0; i < maxNumRegsCons; i++){ 942 | 943 | reg = INS_RegR(ins, i); 944 | 945 | //assert((UINT32)reg < MAX_NUM_REGS); 946 | // only consider valid general-purpose registers (any bit-width) and floating-point registers, 947 | // i.e. exlude branch, segment and pin registers, among others 948 | if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){ 949 | e->regsRead[regReadCnt++] = reg; 950 | } 951 | } 952 | 953 | e->setRead = true; 954 | 955 | } 956 | 957 | // buffer register writes per static instruction 958 | if(!e->setWritten){ 959 | maxNumRegsProd = INS_MaxNumWRegs(ins); 960 | 961 | regWriteCnt = 0; 962 | for(i=0; i < maxNumRegsProd; i++){ 963 | 964 | reg = INS_RegW(ins, i); 965 | 966 | //assert((UINT32)reg < MAX_NUM_REGS); 967 | // only consider valid general-purpose registers (any bit-width) and floating-point registers, 968 | // i.e. exlude branch, segment and pin registers, among others */ 969 | if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){ 970 | regWriteCnt++; 971 | } 972 | } 973 | 974 | e->regWriteCnt = regWriteCnt; 975 | e->regsWritten = (REG*)checked_malloc(regWriteCnt*sizeof(REG)); 976 | 977 | regWriteCnt = 0; 978 | for(i=0; i < maxNumRegsProd; i++){ 979 | 980 | reg = INS_RegW(ins, i); 981 | 982 | //assert((UINT32)reg < MAX_NUM_REGS); 983 | // only consider valid general-purpose registers (any bit-width) and floating-point registers, 984 | // i.e. exlude branch, segment and pin registers, among others 985 | if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){ 986 | e->regsWritten[regWriteCnt++] = reg; 987 | } 988 | } 989 | 990 | e->setWritten = true; 991 | } 992 | 993 | // buffer memory operations (and instruction register buffer) with one single InsertCall 994 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)ilp_buffer_instruction_only, IARG_PTR, (void*)e, IARG_END); 995 | 996 | if(INS_IsMemoryRead(ins)){ 997 | 998 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)ilp_buffer_instruction_read, IARG_MEMORYREAD_EA, IARG_MEMORYREAD_SIZE, IARG_END); 999 | 1000 | if(INS_HasMemoryRead2(ins)){ 1001 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)ilp_buffer_instruction_read2, IARG_MEMORYREAD2_EA, IARG_END); 1002 | } 1003 | } 1004 | 1005 | if(INS_IsMemoryWrite(ins)){ 1006 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)ilp_buffer_instruction_write, IARG_MEMORYWRITE_EA, IARG_MEMORYWRITE_SIZE, IARG_END); 1007 | } 1008 | 1009 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)ilp_buffer_instruction_next, IARG_END); 1010 | 1011 | } 1012 | 1013 | VOID instrument_ilp_one(INS ins, ins_buffer_entry* e){ 1014 | 1015 | instrument_ilp_buffering_common(ins, e); 1016 | // only called if buffer is full 1017 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)empty_buffer_one, IARG_END); 1018 | } 1019 | 1020 | VOID instrument_ilp_all(INS ins, ins_buffer_entry* e){ 1021 | 1022 | instrument_ilp_buffering_common(ins, e); 1023 | // only called if buffer is full 1024 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)empty_ilp_buffer_all, IARG_END); 1025 | } 1026 | 1027 | VOID fini_ilp_buffering_all(){ 1028 | 1029 | if(ilp_buffer_index != 0) 1030 | empty_ilp_buffer_all(); 1031 | } 1032 | 1033 | VOID fini_ilp_buffering_one(){ 1034 | 1035 | if(ilp_buffer_index != 0) 1036 | empty_buffer_one(); 1037 | } 1038 | 1039 | -------------------------------------------------------------------------------- /mica_ilp.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of MICA, a Pin tool to collect 3 | * microarchitecture-independent program characteristics using the Pin 4 | * instrumentation framework. 5 | * 6 | * Please see the README.txt file distributed with the MICA release for more 7 | * information. 8 | */ 9 | 10 | #include "mica.h" 11 | #include "mica_utils.h" 12 | 13 | void init_ilp_all(); 14 | void init_ilp_one(); 15 | 16 | VOID instrument_ilp_all(INS ins, ins_buffer_entry* e); 17 | VOID instrument_ilp_one(INS ins, ins_buffer_entry* e); 18 | 19 | VOID fini_ilp_all(INT32 code, VOID* v); 20 | VOID fini_ilp_one(INT32 code, VOID* v); 21 | 22 | /* support for fast instrumentation of all characteristics in a single run (avoid multiple InsertCalls!) */ 23 | //void ilp_buffer_instruction_only(void* _e); 24 | VOID PIN_FAST_ANALYSIS_CALL ilp_buffer_instruction_only(void* _e); 25 | //void ilp_buffer_instruction_read(ADDRINT read1_addr, ADDRINT read_size); 26 | VOID PIN_FAST_ANALYSIS_CALL ilp_buffer_instruction_read(ADDRINT read1_addr, ADDRINT read_size); 27 | //void ilp_buffer_instruction_read2(ADDRINT read2_addr); 28 | VOID PIN_FAST_ANALYSIS_CALL ilp_buffer_instruction_read2(ADDRINT read2_addr); 29 | //void ilp_buffer_instruction_write(ADDRINT write_addr, ADDRINT write_size); 30 | VOID PIN_FAST_ANALYSIS_CALL ilp_buffer_instruction_write(ADDRINT write_addr, ADDRINT write_size); 31 | ADDRINT ilp_buffer_instruction_next(); 32 | /*ADDRINT ilp_buffer_instruction_2reads_write(void* _e, ADDRINT read1_addr, ADDRINT read2_addr, ADDRINT read_size, ADDRINT write_addr, ADDRINT write_size); 33 | ADDRINT ilp_buffer_instruction_read_write(void* _e, ADDRINT read1_addr, ADDRINT read_size, ADDRINT write_addr, ADDRINT write_size); 34 | ADDRINT ilp_buffer_instruction_2reads(void* _e, ADDRINT read1_addr, ADDRINT read2_addr, ADDRINT read_size); 35 | ADDRINT ilp_buffer_instruction_read(void* _e, ADDRINT read1_addr, ADDRINT read_size); 36 | ADDRINT ilp_buffer_instruction_write(void* _e, ADDRINT write_addr, ADDRINT write_size); 37 | ADDRINT ilp_buffer_instruction(void* _e);*/ 38 | VOID empty_ilp_buffer_all(); 39 | -------------------------------------------------------------------------------- /mica_init.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of MICA, a Pin tool to collect 3 | * microarchitecture-independent program characteristics using the Pin 4 | * instrumentation framework. 5 | * 6 | * Please see the README.txt file distributed with the MICA release for more 7 | * information. 8 | */ 9 | 10 | //#include 11 | //#include 12 | //#include 13 | 14 | /* MICA includes */ 15 | #include "mica_init.h" 16 | 17 | /* 18 | * Setup MICA log file. 19 | */ 20 | void setup_mica_log(ofstream *log){ 21 | 22 | (*log).open("mica.log", ios::out|ios::trunc); 23 | if(!(*log).is_open()){ 24 | ERROR_MSG("Could not create log file, aborting."); 25 | exit(1); 26 | } 27 | } 28 | 29 | /* 30 | * Read mica.conf config file for MICA. 31 | * 32 | * analysis_type: 'all' | 'ilp' | 'ilp_one' | 'itypes' | 'ppm' | 'reg' | 'stride' | 'memfootprint' | 'memstackdist' | 'custom' 33 | * interval_size: 'full' | 34 | * ilp_size: 35 | * itypes_spec_file: 36 | */ 37 | enum CONFIG_PARAM {UNKNOWN_CONFIG_PARAM = -1, ANALYSIS_TYPE = 0, INTERVAL_SIZE, ILP_SIZE, _BLOCK_SIZE, _PAGE_SIZE, ITYPES_SPEC_FILE, APPEND_PID, CONF_PAR_CNT}; 38 | const char* config_params_str[CONF_PAR_CNT] = {"analysis_type", "interval_size", "ilp_size", "block_size", "page_size", "itypes_spec_file"}; 39 | enum ANALYSIS_TYPE {UNKNOWN_ANALYSIS_TYPE = -1, ALL=0, ILP, ILP_ONE, ITYPES, PPM, MICA_REG, STRIDE, MEMFOOTPRINT, MEMSTACKDIST, CUSTOM, ANA_TYPE_CNT}; 40 | const char* analysis_types_str[ANA_TYPE_CNT] = { "all", "ilp", "ilp_one", "itypes", "ppm", "reg", "stride", "memfootprint", "memstackdist", "custom"}; 41 | 42 | enum CONFIG_PARAM findConfigParam(char* s){ 43 | 44 | if(strcmp(s, "analysis_type") == 0){ return ANALYSIS_TYPE; } 45 | if(strcmp(s, "interval_size") == 0){ return INTERVAL_SIZE; } 46 | if(strcmp(s, "ilp_size") == 0){ return ILP_SIZE; } 47 | if(strcmp(s, "block_size") == 0){ return _BLOCK_SIZE; } 48 | if(strcmp(s, "page_size") == 0){ return _PAGE_SIZE; } 49 | if(strcmp(s, "itypes_spec_file") == 0){ return ITYPES_SPEC_FILE; } 50 | if(strcmp(s, "append_pid") == 0){ return APPEND_PID; } 51 | 52 | return UNKNOWN_CONFIG_PARAM; 53 | } 54 | 55 | enum ANALYSIS_TYPE findAnalysisType(char* s){ 56 | 57 | if(strcmp(s, "all") == 0){ return ALL; } 58 | if(strcmp(s, "ilp") == 0){ return ILP; } 59 | if(strcmp(s, "ilp_one") == 0){ return ILP_ONE; } 60 | if(strcmp(s, "itypes") == 0){ return ITYPES; } 61 | if(strcmp(s, "ppm") == 0){ return PPM; } 62 | if(strcmp(s, "reg") == 0){ return MICA_REG; } 63 | if(strcmp(s, "stride") == 0){ return STRIDE; } 64 | if(strcmp(s, "memfootprint") == 0){ return MEMFOOTPRINT; } 65 | if(strcmp(s, "memstackdist") == 0){ return MEMSTACKDIST; } 66 | if(strcmp(s, "custom") == 0){ return CUSTOM; } 67 | 68 | return UNKNOWN_ANALYSIS_TYPE; 69 | } 70 | 71 | void read_config(ofstream* log, INT64* intervalSize, MODE* mode, UINT32* _ilp_win_size, UINT32* _block_size, UINT32* _page_size, char** _itypes_spec_file, int* append_pid){ 72 | 73 | int i; 74 | char* param; 75 | char* val; 76 | FILE* config_file = fopen("mica.conf","r"); 77 | 78 | /* a config file named 'mica.conf' is required */ 79 | if(config_file == (FILE*)NULL){ 80 | cerr << "ERROR: No config file 'mica.conf' found, please create one!" << endl; 81 | (*log) << "ERROR: No config file 'mica.conf' found, please create one!" << endl; 82 | exit(1); 83 | } 84 | 85 | (*log) << "Reading config file ..." << endl; 86 | 87 | param = (char*)checked_malloc(1000*sizeof(char)); 88 | val = (char*)checked_malloc(1000*sizeof(char)); 89 | 90 | // default values 91 | *mode = UNKNOWN_MODE; 92 | *_ilp_win_size = 0; 93 | *_block_size = 6; // default block size = 64 bytes (2^6) 94 | *_page_size = 12; // default page size = 4KB (2^12) 95 | 96 | while(!feof(config_file)){ 97 | 98 | if (fscanf(config_file, "%[^:]: %s\n", param, val) != 2) 99 | { 100 | cerr << "ERROR: invalid config entry found" << endl; 101 | (*log) << "ERROR: invalid config entry found" << endl; 102 | exit(1); 103 | } 104 | 105 | switch(findConfigParam(param)){ 106 | 107 | case ANALYSIS_TYPE: 108 | // figure out mode we are running in 109 | cerr << "Analysis type: " << val << endl; 110 | 111 | switch(findAnalysisType(val)){ 112 | 113 | case ALL: 114 | *mode = MODE_ALL; 115 | cerr << "Measuring ALL characteristics..." << endl; 116 | (*log) << "Measuring ALL characteristics..." << endl; 117 | break; 118 | 119 | case ILP: 120 | *mode = MODE_ILP; 121 | cerr << "Measuring ILP characteristics..." << endl; 122 | (*log) << "Measuring ILP characteristics..." << endl; 123 | break; 124 | 125 | case ILP_ONE: 126 | *mode = MODE_ILP_ONE; 127 | cerr << "Measuring ILP characteristics for a given window size..." << endl; 128 | (*log) << "Measuring ILP characteristics for a given window size..." << endl; 129 | break; 130 | 131 | case ITYPES: 132 | *mode = MODE_ITYPES; 133 | cerr << "Measuring ITYPES characteristics..." << endl; 134 | (*log) << "Measuring ITYPES characteristics..." << endl; 135 | break; 136 | 137 | case PPM: 138 | *mode = MODE_PPM; 139 | cerr << "Measuring PPM characteristics..." << endl; 140 | (*log) << "Measuring PPM characteristics..." << endl; 141 | break; 142 | 143 | case MICA_REG: 144 | *mode = MODE_REG; 145 | cerr << "Measuring REG characteristics..." << endl; 146 | (*log) << "Measuring REG characteristics..." << endl; 147 | break; 148 | 149 | case STRIDE: 150 | *mode = MODE_STRIDE; 151 | cerr << "Measuring STRIDE characteristics..." << endl; 152 | (*log) << "Measuring STRIDE characteristics..." << endl; 153 | break; 154 | 155 | case MEMFOOTPRINT: 156 | *mode = MODE_MEMFOOTPRINT; 157 | cerr << "Measuring MEMFOOTPRINT characteristics..." << endl; 158 | (*log) << "Measuring MEMFOOTPRINT characteristics..." << endl; 159 | break; 160 | 161 | case MEMSTACKDIST: 162 | *mode = MODE_MEMSTACKDIST; 163 | cerr << "Measuring MEMSTACKDIST characteristics..." << endl; 164 | (*log) << "Measuring MEMSTACKDIST characteristics..." << endl; 165 | break; 166 | 167 | case CUSTOM: 168 | *mode = MODE_CUSTOM; 169 | (*log) << "Measuring CUSTOM characteristics..." << endl; 170 | break; 171 | 172 | default: 173 | (*log) << endl << "ERROR: Unknown analysis type chosen!" << endl; 174 | cerr << "Known analysis types:" << endl; 175 | for(i=0; i < ANA_TYPE_CNT; i++){ 176 | cerr << "\t" << analysis_types_str[i] << endl; 177 | } 178 | break; 179 | } 180 | break; 181 | 182 | case INTERVAL_SIZE: 183 | cerr << "interval size: " << val << endl; 184 | (*log) << "interval size: " << val << endl; 185 | 186 | if(strcmp(val, "full") == 0){ 187 | *intervalSize = -1; 188 | cerr << "Returning data for full execution..." << endl; 189 | (*log) << "Returning data for full execution..." << endl; 190 | } 191 | else{ 192 | *intervalSize = (INT64) atoll(val); 193 | cerr << "Returning data for each interval of " << *intervalSize << " instructions..." << endl; 194 | (*log) << "Returning data for each interval of " << *intervalSize << " instructions..." << endl; 195 | } 196 | break; 197 | 198 | case ILP_SIZE: 199 | 200 | *_ilp_win_size = (UINT32)atoi(val); 201 | cerr << "ILP window size: " << *_ilp_win_size << endl; 202 | (*log) << "ILP window size: " << *_ilp_win_size << endl; 203 | break; 204 | 205 | case _BLOCK_SIZE: 206 | *_block_size = (UINT32)atoi(val); 207 | cerr << "block size: 2^" << *_block_size << endl; 208 | (*log) << "block size: 2^" << *_block_size << endl; 209 | break; 210 | 211 | case _PAGE_SIZE: 212 | *_page_size = (UINT32)atoi(val); 213 | cerr << "page size: 2^" << *_page_size << endl; 214 | (*log) << "page size: 2^" << *_page_size << endl; 215 | break; 216 | 217 | case ITYPES_SPEC_FILE: 218 | *_itypes_spec_file = (char*)checked_malloc((strlen(val)+1)*sizeof(char)); 219 | strcpy(*_itypes_spec_file, val); 220 | cerr << "ITYPES spec file: " << *_itypes_spec_file << endl; 221 | (*log) << "ITYPES spec file: " << *_itypes_spec_file << endl; 222 | break; 223 | 224 | case APPEND_PID: 225 | if (strcmp(val, "yes")==0) 226 | { 227 | *append_pid = 1; 228 | cerr << "append pid: yes" << endl; 229 | (*log) << "append pid: yes" << endl; 230 | } 231 | else if (strcmp(val, "no")==0) 232 | { 233 | *append_pid = 0; 234 | cerr << "append pid: no" << endl; 235 | (*log) << "append pid: no" << endl; 236 | } 237 | else 238 | { 239 | cerr << "ERROR! append_pid can be either yes or no" << endl; 240 | (*log) << "ERROR! append_pid can be either yes or no" << endl; 241 | exit(1); 242 | } 243 | break; 244 | default: 245 | cerr << "ERROR: Unknown config parameter specified: " << param << " (" << val << ")" << endl; 246 | cerr << "Known config parameters:" << endl; 247 | (*log) << "ERROR: Unknown config parameter specified: " << param << " (" << val << ")" << endl; 248 | (*log) << "Known config parameters:" << endl; 249 | for(i=0; i < CONF_PAR_CNT; i++){ 250 | cerr << "\t" << config_params_str[i] << endl; 251 | (*log) << "\t" << config_params_str[i] << endl; 252 | } 253 | exit(1); 254 | break; 255 | } 256 | } 257 | cerr << "All done reading config" << endl; 258 | (*log) << "All done reading config" << endl; 259 | 260 | if(*mode == UNKNOWN_MODE){ 261 | cerr << "ERROR! No mode specified, the mica.conf file should specify the \"analysis_type\" config parameter." << endl; 262 | (*log) << "ERROR! No mode specified, the mica.conf file should specify the \"analysis_type\" config parameter." << endl; 263 | exit(1); 264 | } 265 | 266 | if(*mode == MODE_ILP_ONE && *_ilp_win_size == 0){ 267 | cerr << "ERROR! \"ilp_one\" mode was specified, but no window size (ilp_size) was found along with it!" << endl; 268 | (*log) << "ERROR! ERROR! \"ilp_one\" mode was specified, but no window size (ilp_size) was found along with it!" << endl; 269 | exit(1); 270 | } 271 | 272 | (*log).close(); 273 | 274 | free(param); 275 | free(val); 276 | 277 | } 278 | -------------------------------------------------------------------------------- /mica_init.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of MICA, a Pin tool to collect 3 | * microarchitecture-independent program characteristics using the Pin 4 | * instrumentation framework. 5 | * 6 | * Please see the README.txt file distributed with the MICA release for more 7 | * information. 8 | */ 9 | 10 | #include "mica.h" 11 | #include "mica_ilp.h" 12 | #include "mica_itypes.h" 13 | #include "mica_ppm.h" 14 | #include "mica_reg.h" 15 | #include "mica_stride.h" 16 | #include "mica_memfootprint.h" 17 | #include "mica_memstackdist.h" 18 | 19 | enum MODE { UNKNOWN_MODE, MODE_ALL, MODE_ILP, MODE_ILP_ONE, MODE_ITYPES, MODE_PPM, MODE_REG, MODE_STRIDE, MODE_MEMFOOTPRINT, MODE_MEMSTACKDIST, MODE_CUSTOM }; 20 | 21 | void setup_mica_log(ofstream *log); 22 | 23 | void read_config(ofstream *log, INT64* interval_size, MODE* mode, UINT32* _ilp_win_size, UINT32* _block_size, UINT32* _page_size, char** _itypes_spec_file, int* append_pid); 24 | -------------------------------------------------------------------------------- /mica_itypes.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of MICA, a Pin tool to collect 3 | * microarchitecture-independent program characteristics using the Pin 4 | * instrumentation framework. 5 | * 6 | * Please see the README.txt file distributed with the MICA release for more 7 | * information. 8 | */ 9 | 10 | #include "pin.H" 11 | 12 | /* MICA includes */ 13 | #include "mica_utils.h" 14 | #include "mica_itypes.h" 15 | 16 | /* Global variables */ 17 | 18 | extern INT64 interval_size; 19 | extern INT64 interval_ins_count; 20 | extern INT64 interval_ins_count_for_hpc_alignment; 21 | extern INT64 total_ins_count; 22 | extern INT64 total_ins_count_for_hpc_alignment; 23 | extern char* _itypes_spec_file; 24 | 25 | ofstream output_file_itypes; 26 | 27 | identifier** group_identifiers; 28 | INT64* group_ids_cnt; 29 | INT64* group_counts; 30 | INT64 number_of_groups; 31 | 32 | INT64 other_ids_cnt; 33 | INT64 other_ids_max_cnt; 34 | identifier* other_group_identifiers; 35 | 36 | /* counter functions */ 37 | ADDRINT itypes_instr_intervals(){ 38 | return (ADDRINT)(interval_ins_count_for_hpc_alignment == interval_size); 39 | }; 40 | 41 | VOID itypes_instr_interval_output(){ 42 | int i; 43 | output_file_itypes.open(mkfilename("itypes_phases_int"), ios::out|ios::app); 44 | output_file_itypes << interval_size; 45 | for(i=0; i < number_of_groups+1; i++){ 46 | output_file_itypes << " " << group_counts[i]; 47 | } 48 | output_file_itypes << endl; 49 | output_file_itypes.close(); 50 | } 51 | 52 | VOID itypes_instr_interval_reset(){ 53 | int i; 54 | for(i=0; i < number_of_groups+1; i++){ 55 | group_counts[i] = 0; 56 | } 57 | } 58 | 59 | VOID itypes_instr_interval(){ 60 | 61 | itypes_instr_interval_output(); 62 | itypes_instr_interval_reset(); 63 | interval_ins_count = 0; 64 | interval_ins_count_for_hpc_alignment = 0; 65 | } 66 | 67 | VOID itypes_count(UINT32 gid){ 68 | group_counts[gid]++; 69 | }; 70 | 71 | // initialize default groups 72 | VOID init_itypes_default_groups(){ 73 | 74 | number_of_groups = 12; 75 | 76 | group_identifiers = (identifier**)checked_malloc((number_of_groups+1)*sizeof(identifier*)); 77 | group_ids_cnt = (INT64*)checked_malloc((number_of_groups+1)*sizeof(INT64)); 78 | group_counts = (INT64*)checked_malloc((number_of_groups+1)*sizeof(INT64)); 79 | for(int i=0; i < number_of_groups+1; i++){ 80 | group_counts[i] = 0; 81 | } 82 | 83 | // memory reads 84 | group_ids_cnt[0] = 1; 85 | group_identifiers[0] = (identifier*)checked_malloc(group_ids_cnt[0]*sizeof(identifier)); 86 | group_identifiers[0][0].type = identifier_type::ID_TYPE_SPECIAL; 87 | group_identifiers[0][0].str = checked_strdup("mem_read"); 88 | 89 | // memory writes 90 | group_ids_cnt[1] = 1; 91 | group_identifiers[1] = (identifier*)checked_malloc(group_ids_cnt[1]*sizeof(identifier)); 92 | group_identifiers[1][0].type = identifier_type::ID_TYPE_SPECIAL; 93 | group_identifiers[1][0].str = checked_strdup("mem_write"); 94 | 95 | // control flow instructions 96 | group_ids_cnt[2] = 5; 97 | group_identifiers[2] = (identifier*)checked_malloc(group_ids_cnt[2]*sizeof(identifier)); 98 | group_identifiers[2][0].type = identifier_type::ID_TYPE_CATEGORY; 99 | group_identifiers[2][0].str = checked_strdup("COND_BR"); 100 | group_identifiers[2][1].type = identifier_type::ID_TYPE_CATEGORY; 101 | group_identifiers[2][1].str = checked_strdup("UNCOND_BR"); 102 | group_identifiers[2][2].type = identifier_type::ID_TYPE_OPCODE; 103 | group_identifiers[2][2].str = checked_strdup("LEAVE"); 104 | group_identifiers[2][3].type = identifier_type::ID_TYPE_OPCODE; 105 | group_identifiers[2][3].str = checked_strdup("RET_NEAR"); 106 | group_identifiers[2][4].type = identifier_type::ID_TYPE_OPCODE; 107 | group_identifiers[2][4].str = checked_strdup("CALL_NEAR"); 108 | 109 | // arithmetic instructions (integer) 110 | group_ids_cnt[3] = 5; 111 | group_identifiers[3] = (identifier*)checked_malloc(group_ids_cnt[3]*sizeof(identifier)); 112 | group_identifiers[3][0].type = identifier_type::ID_TYPE_CATEGORY; 113 | group_identifiers[3][0].str = checked_strdup("LOGICAL"); 114 | group_identifiers[3][1].type = identifier_type::ID_TYPE_CATEGORY; 115 | group_identifiers[3][1].str = checked_strdup("DATAXFER"); 116 | group_identifiers[3][2].type = identifier_type::ID_TYPE_CATEGORY; 117 | group_identifiers[3][2].str = checked_strdup("BINARY"); 118 | group_identifiers[3][3].type = identifier_type::ID_TYPE_CATEGORY; 119 | group_identifiers[3][3].str = checked_strdup("FLAGOP"); 120 | group_identifiers[3][4].type = identifier_type::ID_TYPE_CATEGORY; 121 | group_identifiers[3][4].str = checked_strdup("BITBYTE"); 122 | 123 | // floating point instructions 124 | group_ids_cnt[4] = 2; 125 | group_identifiers[4] = (identifier*)checked_malloc(group_ids_cnt[4]*sizeof(identifier)); 126 | group_identifiers[4][0].type = identifier_type::ID_TYPE_CATEGORY; 127 | group_identifiers[4][0].str = checked_strdup("X87_ALU"); 128 | group_identifiers[4][1].type = identifier_type::ID_TYPE_CATEGORY; 129 | group_identifiers[4][1].str = checked_strdup("FCMOV"); 130 | 131 | // pop/push instructions (stack usage) 132 | group_ids_cnt[5] = 2; 133 | group_identifiers[5] = (identifier*)checked_malloc(group_ids_cnt[5]*sizeof(identifier)); 134 | group_identifiers[5][0].type = identifier_type::ID_TYPE_CATEGORY; 135 | group_identifiers[5][0].str = checked_strdup("POP"); 136 | group_identifiers[5][1].type = identifier_type::ID_TYPE_CATEGORY; 137 | group_identifiers[5][1].str = checked_strdup("PUSH"); 138 | 139 | // [!] shift instructions (bitwise) 140 | group_ids_cnt[6] = 1; 141 | group_identifiers[6] = (identifier*)checked_malloc(group_ids_cnt[6]*sizeof(identifier)); 142 | group_identifiers[6][0].type = identifier_type::ID_TYPE_CATEGORY; 143 | group_identifiers[6][0].str = checked_strdup("SHIFT"); 144 | 145 | // [!] string instructions 146 | group_ids_cnt[7] = 1; 147 | group_identifiers[7] = (identifier*)checked_malloc(group_ids_cnt[7]*sizeof(identifier)); 148 | group_identifiers[7][0].type = identifier_type::ID_TYPE_CATEGORY; 149 | group_identifiers[7][0].str = checked_strdup("STRINGOP"); 150 | 151 | // [!] MMX/SSE instructions 152 | group_ids_cnt[8] = 2; 153 | group_identifiers[8] = (identifier*)checked_malloc(group_ids_cnt[8]*sizeof(identifier)); 154 | group_identifiers[8][0].type = identifier_type::ID_TYPE_CATEGORY; 155 | group_identifiers[8][0].str = checked_strdup("MMX"); 156 | group_identifiers[8][1].type = identifier_type::ID_TYPE_CATEGORY; 157 | group_identifiers[8][1].str = checked_strdup("SSE"); 158 | 159 | // other (interrupts, rotate instructions, semaphore, conditional move, system) 160 | group_ids_cnt[9] = 8; 161 | group_identifiers[9] = (identifier*)checked_malloc(group_ids_cnt[9]*sizeof(identifier)); 162 | group_identifiers[9][0].type = identifier_type::ID_TYPE_CATEGORY; 163 | group_identifiers[9][0].str = checked_strdup("INTERRUPT"); 164 | group_identifiers[9][1].type = identifier_type::ID_TYPE_CATEGORY; 165 | group_identifiers[9][1].str = checked_strdup("ROTATE"); 166 | group_identifiers[9][2].type = identifier_type::ID_TYPE_CATEGORY; 167 | group_identifiers[9][2].str = checked_strdup("SEMAPHORE"); 168 | group_identifiers[9][3].type = identifier_type::ID_TYPE_CATEGORY; 169 | group_identifiers[9][3].str = checked_strdup("CMOV"); 170 | group_identifiers[9][4].type = identifier_type::ID_TYPE_CATEGORY; 171 | group_identifiers[9][4].str = checked_strdup("SYSTEM"); 172 | group_identifiers[9][5].type = identifier_type::ID_TYPE_CATEGORY; 173 | group_identifiers[9][5].str = checked_strdup("MISC"); 174 | group_identifiers[9][6].type = identifier_type::ID_TYPE_CATEGORY; 175 | group_identifiers[9][6].str = checked_strdup("PREFETCH"); 176 | group_identifiers[9][7].type = identifier_type::ID_TYPE_CATEGORY; 177 | group_identifiers[9][7].str = checked_strdup("SYSCALL"); 178 | 179 | // [!] NOP instructions 180 | group_ids_cnt[10] = 2; 181 | group_identifiers[10] = (identifier*)checked_malloc(group_ids_cnt[10]*sizeof(identifier)); 182 | group_identifiers[10][0].type = identifier_type::ID_TYPE_CATEGORY; 183 | group_identifiers[10][0].str = checked_strdup("WIDENOP"); 184 | group_identifiers[10][1].type = identifier_type::ID_TYPE_CATEGORY; 185 | group_identifiers[10][1].str = checked_strdup("NOP"); 186 | 187 | // register transfer instructions (move from a register to another register) 188 | group_ids_cnt[11] = 1; 189 | group_identifiers[11] = (identifier*)checked_malloc(group_ids_cnt[11]*sizeof(identifier)); 190 | group_identifiers[11][0].type = identifier_type::ID_TYPE_SPECIAL; 191 | group_identifiers[11][0].str = checked_strdup("reg_transfer"); 192 | } 193 | 194 | /* initializing */ 195 | VOID init_itypes(){ 196 | 197 | int i, j; 198 | int gid, sgid; 199 | char type[100]; 200 | char str[100]; 201 | string line; 202 | 203 | /* try and open instruction groups specification file */ 204 | if(_itypes_spec_file != NULL){ 205 | ifstream f(_itypes_spec_file); 206 | if(f){ 207 | // count number of groups 208 | number_of_groups = 0; 209 | while( getline(f,line)){ 210 | sscanf(line.c_str(), "%d, %d, %[^,], %[^\n]\n", &gid, &sgid, type, str); 211 | if(gid > number_of_groups) 212 | number_of_groups++; 213 | } 214 | f.close(); 215 | number_of_groups++; 216 | cerr << "==> found " << number_of_groups << " groups" << endl; 217 | 218 | group_identifiers = (identifier**)checked_malloc((number_of_groups+1)*sizeof(identifier*)); 219 | group_ids_cnt = (INT64*)checked_malloc((number_of_groups+1)*sizeof(INT64)); 220 | group_counts = (INT64*)checked_malloc((number_of_groups+1)*sizeof(INT64)); 221 | for(i=0; i < number_of_groups+1; i++){ 222 | group_counts[i] = 0; 223 | } 224 | 225 | // count number of subgroups per group 226 | f.open(_itypes_spec_file); 227 | i=0; 228 | while( getline(f,line)){ 229 | sscanf(line.c_str(), "%d, %d, %[^,], %[^\n]\n", &gid, &sgid, type, str); 230 | if(gid == i){ 231 | group_ids_cnt[i]++; 232 | } 233 | else{ 234 | group_identifiers[i] = (identifier*)checked_malloc(group_ids_cnt[i]*sizeof(identifier)); 235 | i++; 236 | group_ids_cnt[i]++; 237 | } 238 | } 239 | group_identifiers[i] = (identifier*)checked_malloc(group_ids_cnt[i]*sizeof(identifier)); 240 | f.close(); 241 | 242 | // save subgroup types and identifiers 243 | f.open(_itypes_spec_file); 244 | i=0; 245 | while( getline(f,line)){ 246 | sscanf(line.c_str(), "%d, %d, %[^,], %[^\n]\n", &gid, &sgid, type, str); 247 | if(strcmp(type, "CATEGORY") == 0){ 248 | group_identifiers[gid][sgid].type = identifier_type::ID_TYPE_CATEGORY; 249 | } 250 | else{ 251 | if(strcmp(type, "OPCODE") == 0){ 252 | group_identifiers[gid][sgid].type = identifier_type::ID_TYPE_OPCODE; 253 | } 254 | else{ 255 | if(strcmp(type, "SPECIAL") == 0){ 256 | group_identifiers[gid][sgid].type = identifier_type::ID_TYPE_SPECIAL; 257 | } 258 | else{ 259 | cerr << "ERROR! Unknown subgroup type found (\"" << type << "\")." << endl; 260 | cerr << " Known subgroup types: {CATEGORY, OPCODE, SPECIAL}." << endl; 261 | exit(-1); 262 | } 263 | } 264 | } 265 | group_identifiers[gid][sgid].str = checked_strdup(str); 266 | } 267 | f.close(); 268 | 269 | // print out groups read 270 | for(i=0; i < number_of_groups; i++){ 271 | cerr << " group " << i << " (#: " << group_ids_cnt[i] << "): "; 272 | for(j=0; j < group_ids_cnt[i]; j++){ 273 | cerr << group_identifiers[i][j].str << " "; 274 | switch(group_identifiers[i][j].type){ 275 | case identifier_type::ID_TYPE_CATEGORY: 276 | cerr << "[CAT]; "; 277 | break; 278 | case identifier_type::ID_TYPE_OPCODE: 279 | cerr << "[OPCODE]; "; 280 | break; 281 | case identifier_type::ID_TYPE_SPECIAL: 282 | cerr << "[SPECIAL]; "; 283 | break; 284 | default: 285 | cerr << "ERROR! Unknown subgroup type found for [" << i << "][" << j << "] (\"" << group_identifiers[i][j].type << "\")." << endl; 286 | cerr << " Known subgroup types: {CATEGORY, OPCODE, SPECIAL}." << endl; 287 | exit(-1); 288 | break; 289 | } 290 | } 291 | cerr << endl; 292 | } 293 | } 294 | else{ 295 | cerr << "ERROR! Failed to open file \"" << _itypes_spec_file << "\" containing instruction groups specification." << endl; 296 | exit(-1); 297 | } 298 | } 299 | else{ 300 | // if no specification file was found, just use defaults (compatible with MICA v0.23 and older) 301 | init_itypes_default_groups(); 302 | } 303 | 304 | // allocate space for identifiers of 'other' group 305 | other_ids_cnt = 0; 306 | other_ids_max_cnt = 2; 307 | other_group_identifiers = (identifier*)checked_malloc(other_ids_max_cnt*sizeof(identifier)); 308 | 309 | // (initializing total instruction counts is done in mica.cpp) 310 | 311 | if(interval_size != -1){ 312 | output_file_itypes.open(mkfilename("itypes_phases_int"), ios::out|ios::trunc); 313 | output_file_itypes.close(); 314 | } 315 | } 316 | 317 | /* instrumenting (instruction level) */ 318 | VOID instrument_itypes(INS ins, VOID* v){ 319 | 320 | int i,j; 321 | char cat[50]; 322 | char opcode[50]; 323 | strcpy(cat,CATEGORY_StringShort(INS_Category(ins)).c_str()); 324 | strcpy(opcode,INS_Mnemonic(ins).c_str()); 325 | BOOL categorized = false; 326 | 327 | // go over all groups, increase group count if instruction matches that group 328 | // group counts are increased at most once per instruction executed, 329 | // even if the instruction matches multiple identifiers in that group 330 | for(i=0; i < number_of_groups; i++){ 331 | for(j=0; j < group_ids_cnt[i]; j++){ 332 | if(group_identifiers[i][j].type == identifier_type::ID_TYPE_CATEGORY){ 333 | if(strcmp(group_identifiers[i][j].str, cat) == 0){ 334 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count, IARG_UINT32, i, IARG_END); 335 | categorized = true; 336 | break; 337 | } 338 | } 339 | else{ 340 | if(group_identifiers[i][j].type == identifier_type::ID_TYPE_OPCODE){ 341 | if(strcmp(group_identifiers[i][j].str, opcode) == 0){ 342 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count, IARG_UINT32, i, IARG_END); 343 | categorized = true; 344 | break; 345 | } 346 | } 347 | else{ 348 | if(group_identifiers[i][j].type == identifier_type::ID_TYPE_SPECIAL){ 349 | if(strcmp(group_identifiers[i][j].str, "mem_read") == 0 && INS_IsMemoryRead(ins) ){ 350 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count, IARG_UINT32, i, IARG_END); 351 | categorized = true; 352 | break; 353 | } 354 | else{ 355 | if(strcmp(group_identifiers[i][j].str, "mem_write") == 0 && INS_IsMemoryWrite(ins) ){ 356 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_count, IARG_UINT32, i, IARG_END); 357 | categorized = true; 358 | break; 359 | } 360 | else if(strcmp(group_identifiers[i][j].str, "reg_transfer") == 0 && INS_IsMov(ins) ){ 361 | UINT32 flag=0,n; 362 | n=INS_OperandCount(ins); 363 | for(UINT32 i=0;i= other_ids_max_cnt){ 403 | other_ids_max_cnt *= 2; 404 | other_group_identifiers = (identifier*)checked_realloc(other_group_identifiers, other_ids_max_cnt*sizeof(identifier)); 405 | } 406 | } 407 | 408 | /* inserting calls for counting instructions is done in mica.cpp */ 409 | if(interval_size != -1){ 410 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_instr_intervals,IARG_END); 411 | /* only called if interval is 'full' */ 412 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)itypes_instr_interval,IARG_END); 413 | } 414 | } 415 | 416 | /* finishing... */ 417 | VOID fini_itypes(INT32 code, VOID* v){ 418 | int i; 419 | 420 | if(interval_size == -1){ 421 | output_file_itypes.open(mkfilename("itypes_full_int"), ios::out|ios::trunc); 422 | output_file_itypes << total_ins_count_for_hpc_alignment << " " << total_ins_count; 423 | for(i=0; i < number_of_groups; i++){ 424 | output_file_itypes << " " << group_counts[i]; 425 | } 426 | output_file_itypes << endl; 427 | } 428 | else{ 429 | output_file_itypes.open(mkfilename("itypes_phases_int"), ios::out|ios::app); 430 | output_file_itypes << interval_ins_count; 431 | for(i=0; i < number_of_groups+1; i++){ 432 | output_file_itypes << " " << group_counts[i]; 433 | } 434 | output_file_itypes << endl; 435 | } 436 | //output_file_itypes << "number of instructions: " << total_ins_count_for_hpc_alignment << endl; 437 | output_file_itypes << " "; 438 | output_file_itypes.close(); 439 | 440 | // print instruction categories in 'other' group of instructions 441 | ofstream output_file_other_group_categories; 442 | output_file_other_group_categories.open("itypes_other_group_categories.txt", ios::out|ios::trunc); 443 | for(i=0; i < other_ids_cnt; i++){ 444 | output_file_other_group_categories << other_group_identifiers[i].str << endl; 445 | } 446 | } 447 | -------------------------------------------------------------------------------- /mica_itypes.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of MICA, a Pin tool to collect 3 | * microarchitecture-independent program characteristics using the Pin 4 | * instrumentation framework. 5 | * 6 | * Please see the README.txt file distributed with the MICA release for more 7 | * information. 8 | */ 9 | 10 | #include "mica.h" 11 | 12 | #ifndef MICA_ITYPES_H 13 | #define MICA_ITYPES_H 14 | 15 | typedef struct identifier_type{ 16 | // type of identifier 17 | // SPECIAL includes stuff like memory reads/writes 18 | enum {ID_TYPE_CATEGORY = 1, ID_TYPE_OPCODE, ID_TYPE_SPECIAL} type; 19 | // string identifier for category/opcode 20 | char* str; 21 | } identifier; 22 | 23 | VOID init_itypes(); 24 | VOID init_itypes_default_groups(); 25 | 26 | VOID instrument_itypes(INS ins, VOID* v); 27 | VOID instrument_itypes_bbl(TRACE trace, VOID* v); 28 | VOID fini_itypes(INT32 code, VOID* v); 29 | 30 | 31 | VOID itypes_count(UINT32 gid); 32 | 33 | VOID itypes_instr_interval_output(); 34 | VOID itypes_instr_interval_reset(); 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /mica_memfootprint.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of MICA, a Pin tool to collect 3 | * microarchitecture-independent program characteristics using the Pin 4 | * instrumentation framework. 5 | * 6 | * Please see the README.txt file distributed with the MICA release for more 7 | * information. 8 | */ 9 | 10 | #include "pin.H" 11 | 12 | /* MICA includes */ 13 | #include "mica_utils.h" 14 | #include "mica_memfootprint.h" 15 | 16 | /* Global variables */ 17 | 18 | extern INT64 interval_size; 19 | extern INT64 interval_ins_count; 20 | extern INT64 interval_ins_count_for_hpc_alignment; 21 | extern INT64 total_ins_count; 22 | extern INT64 total_ins_count_for_hpc_alignment; 23 | 24 | extern UINT32 _block_size; 25 | extern UINT32 _page_size; 26 | 27 | static UINT32 memfootprint_block_size; 28 | static UINT32 page_size; 29 | 30 | static ofstream output_file_memfootprint; 31 | 32 | static nlist* DmemCacheWorkingSetTable[MAX_MEM_TABLE_ENTRIES]; 33 | static nlist* DmemPageWorkingSetTable[MAX_MEM_TABLE_ENTRIES]; 34 | static nlist* ImemCacheWorkingSetTable[MAX_MEM_TABLE_ENTRIES]; 35 | static nlist* ImemPageWorkingSetTable[MAX_MEM_TABLE_ENTRIES]; 36 | 37 | 38 | static long long DmemCacheWSS() { 39 | long long DmemCacheWorkingSetSize = 0L; 40 | for (int i = 0; i < MAX_MEM_TABLE_ENTRIES; i++) { 41 | for (nlist *np = DmemCacheWorkingSetTable [i]; np != (nlist*) NULL; np = np->next) { 42 | for (ADDRINT j = 0; j < MAX_MEM_BLOCK; j++) { 43 | if ((np->mem)->numReferenced [j]) { 44 | DmemCacheWorkingSetSize++; 45 | } 46 | } 47 | } 48 | } 49 | return DmemCacheWorkingSetSize; 50 | } 51 | 52 | static long long ImemCacheWSS() { 53 | long long ImemCacheWorkingSetSize = 0L; 54 | for (int i = 0; i < MAX_MEM_TABLE_ENTRIES; i++) { 55 | for (nlist *np = ImemCacheWorkingSetTable [i]; np != (nlist*) NULL; np = np->next) { 56 | for (ADDRINT j = 0; j < MAX_MEM_BLOCK; j++) { 57 | if ((np->mem)->numReferenced [j]) { 58 | ImemCacheWorkingSetSize++; 59 | } 60 | } 61 | } 62 | } 63 | return ImemCacheWorkingSetSize; 64 | } 65 | 66 | static long long DmemPageWSS() { 67 | long long DmemPageWorkingSetSize = 0L; 68 | for (int i = 0; i < MAX_MEM_TABLE_ENTRIES; i++) { 69 | for (nlist *np = DmemPageWorkingSetTable [i]; np != (nlist*) NULL; np = np->next) { 70 | for (ADDRINT j = 0; j < MAX_MEM_BLOCK; j++) { 71 | if ((np->mem)->numReferenced [j]) { 72 | DmemPageWorkingSetSize++; 73 | } 74 | } 75 | } 76 | } 77 | return DmemPageWorkingSetSize; 78 | } 79 | 80 | static long long ImemPageWSS() { 81 | long long ImemPageWorkingSetSize = 0L; 82 | for (int i = 0; i < MAX_MEM_TABLE_ENTRIES; i++) { 83 | for (nlist *np = ImemPageWorkingSetTable [i]; np != (nlist*) NULL; np = np->next) { 84 | for (ADDRINT j = 0; j < MAX_MEM_BLOCK; j++) { 85 | if ((np->mem)->numReferenced [j]) { 86 | ImemPageWorkingSetSize++; 87 | } 88 | } 89 | } 90 | } 91 | return ImemPageWorkingSetSize; 92 | } 93 | 94 | /* initializing */ 95 | void init_memfootprint(){ 96 | int i; 97 | 98 | for (i = 0; i < MAX_MEM_TABLE_ENTRIES; i++) { 99 | DmemCacheWorkingSetTable[i] = (nlist*) NULL; 100 | DmemPageWorkingSetTable[i] = (nlist*) NULL; 101 | ImemCacheWorkingSetTable[i] = (nlist*) NULL; 102 | ImemPageWorkingSetTable[i] = (nlist*) NULL; 103 | } 104 | 105 | memfootprint_block_size = _block_size; 106 | page_size = _page_size; 107 | 108 | if(interval_size != -1){ 109 | output_file_memfootprint.open(mkfilename("memfootprint_phases_int"), ios::out|ios::trunc); 110 | output_file_memfootprint.close(); 111 | } 112 | } 113 | 114 | VOID memOp(ADDRINT effMemAddr, ADDRINT size){ 115 | if(size > 0){ 116 | ADDRINT a; 117 | ADDRINT addr, endAddr, upperAddr, indexInChunk; 118 | memNode* chunk; 119 | 120 | /* D-stream (64-byte) cache block memory footprint */ 121 | 122 | addr = effMemAddr >> memfootprint_block_size; 123 | endAddr = (effMemAddr + size - 1) >> memfootprint_block_size; 124 | 125 | for(a = addr; a <= endAddr; a++){ 126 | 127 | upperAddr = a >> LOG_MAX_MEM_BLOCK; 128 | indexInChunk = a ^ (upperAddr << LOG_MAX_MEM_BLOCK); 129 | 130 | chunk = lookup(DmemCacheWorkingSetTable, upperAddr); 131 | if(chunk == (memNode*)NULL) 132 | chunk = install(DmemCacheWorkingSetTable, upperAddr); 133 | 134 | //assert(indexInChunk >= 0 && indexInChunk < MAX_MEM_BLOCK); 135 | chunk->numReferenced[indexInChunk] = true; 136 | 137 | } 138 | 139 | /* D-stream (4KB) page block memory footprint */ 140 | 141 | addr = effMemAddr >> page_size; 142 | endAddr = (effMemAddr + size - 1) >> page_size; 143 | 144 | for(a = addr; a <= endAddr; a++){ 145 | 146 | upperAddr = a >> LOG_MAX_MEM_BLOCK; 147 | indexInChunk = a ^ (upperAddr << LOG_MAX_MEM_BLOCK); 148 | 149 | chunk = lookup(DmemPageWorkingSetTable, upperAddr); 150 | if(chunk == (memNode*)NULL) 151 | chunk = install(DmemPageWorkingSetTable, upperAddr); 152 | 153 | //assert(indexInChunk >= 0 && indexInChunk < MAX_MEM_BLOCK); 154 | chunk->numReferenced[indexInChunk] = true; 155 | 156 | } 157 | } 158 | } 159 | 160 | VOID instrMem(ADDRINT instrAddr, ADDRINT size){ 161 | 162 | if(size > 0){ 163 | ADDRINT a; 164 | ADDRINT addr, endAddr, upperAddr, indexInChunk; 165 | memNode* chunk; 166 | 167 | 168 | /* I-stream (64-byte) cache block memory footprint */ 169 | 170 | addr = instrAddr >> memfootprint_block_size; 171 | endAddr = (instrAddr + size - 1) >> memfootprint_block_size; 172 | 173 | for(a = addr; a <= endAddr; a++){ 174 | 175 | upperAddr = a >> LOG_MAX_MEM_BLOCK; 176 | indexInChunk = a ^ (upperAddr << LOG_MAX_MEM_BLOCK); 177 | 178 | chunk = lookup(ImemCacheWorkingSetTable, upperAddr); 179 | if(chunk == (memNode*)NULL) 180 | chunk = install(ImemCacheWorkingSetTable, upperAddr); 181 | 182 | //assert(indexInChunk >= 0 && indexInChunk < MAX_MEM_BLOCK); 183 | chunk->numReferenced[indexInChunk] = true; 184 | 185 | } 186 | 187 | /* I-stream (4KB) page block memory footprint */ 188 | 189 | addr = instrAddr >> page_size; 190 | endAddr = (instrAddr + size - 1) >> page_size; 191 | 192 | for(a = addr; a <= endAddr; a++){ 193 | 194 | upperAddr = a >> LOG_MAX_MEM_BLOCK; 195 | indexInChunk = a ^ (upperAddr << LOG_MAX_MEM_BLOCK); 196 | 197 | chunk = lookup(ImemPageWorkingSetTable, upperAddr); 198 | if(chunk == (memNode*)NULL) 199 | chunk = install(ImemPageWorkingSetTable, upperAddr); 200 | 201 | //assert(indexInChunk >= 0 && indexInChunk < MAX_MEM_BLOCK); 202 | chunk->numReferenced[indexInChunk] = true; 203 | } 204 | } 205 | } 206 | 207 | static VOID memfootprint_instr_full(ADDRINT instrAddr, ADDRINT size){ 208 | 209 | /* counting instructions is done in all_instr_full() */ 210 | 211 | instrMem(instrAddr, size); 212 | } 213 | 214 | static ADDRINT memfootprint_instr_intervals(ADDRINT instrAddr, ADDRINT size){ 215 | 216 | /* counting instructions is done in all_instr_intervals() */ 217 | 218 | instrMem(instrAddr, size); 219 | return (ADDRINT)(interval_ins_count_for_hpc_alignment == interval_size); 220 | } 221 | 222 | VOID memfootprint_instr_interval_output(){ 223 | 224 | output_file_memfootprint.open(mkfilename("memfootprint_phases_int"), ios::out|ios::app); 225 | 226 | long long DmemCacheWorkingSetSize = DmemCacheWSS(); 227 | long long DmemPageWorkingSetSize = DmemPageWSS(); 228 | long long ImemCacheWorkingSetSize = ImemCacheWSS(); 229 | long long ImemPageWorkingSetSize = ImemPageWSS(); 230 | 231 | output_file_memfootprint << DmemCacheWorkingSetSize << " " << DmemPageWorkingSetSize << " " << ImemCacheWorkingSetSize << " " << ImemPageWorkingSetSize << endl; 232 | output_file_memfootprint.close(); 233 | } 234 | 235 | VOID memfootprint_instr_interval_reset(){ 236 | /* clean used memory, to avoid memory shortage for long (CPU2006) benchmarks */ 237 | for(ADDRINT i=0; i < MAX_MEM_TABLE_ENTRIES; i++){ 238 | free_nlist(DmemCacheWorkingSetTable[i]); 239 | free_nlist(DmemPageWorkingSetTable[i]); 240 | free_nlist(ImemCacheWorkingSetTable[i]); 241 | free_nlist(ImemPageWorkingSetTable[i]); 242 | } 243 | } 244 | 245 | static VOID memfootprint_instr_interval(){ 246 | 247 | memfootprint_instr_interval_output(); 248 | memfootprint_instr_interval_reset(); 249 | interval_ins_count = 0; 250 | interval_ins_count_for_hpc_alignment = 0; 251 | } 252 | 253 | /* instrumenting (instruction level) */ 254 | VOID instrument_memfootprint(INS ins, VOID* v){ 255 | 256 | if(INS_IsMemoryRead(ins)){ 257 | 258 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)memOp, IARG_MEMORYREAD_EA, IARG_MEMORYREAD_SIZE, IARG_END); 259 | 260 | if(INS_HasMemoryRead2(ins)){ 261 | 262 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)memOp, IARG_MEMORYREAD2_EA, IARG_MEMORYREAD_SIZE, IARG_END); 263 | } 264 | } 265 | if(INS_IsMemoryWrite(ins)){ 266 | 267 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)memOp, IARG_MEMORYWRITE_EA, IARG_MEMORYWRITE_SIZE, IARG_END); 268 | } 269 | 270 | if(interval_size == -1) 271 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)memfootprint_instr_full, IARG_ADDRINT, INS_Address(ins), IARG_ADDRINT, (ADDRINT)INS_Size(ins), IARG_END); 272 | else{ 273 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)memfootprint_instr_intervals, IARG_ADDRINT, INS_Address(ins), IARG_ADDRINT, (ADDRINT)INS_Size(ins), IARG_END); 274 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)memfootprint_instr_interval, IARG_END); 275 | } 276 | } 277 | 278 | 279 | /* finishing... */ 280 | VOID fini_memfootprint(INT32 code, VOID* v){ 281 | 282 | long long DmemCacheWorkingSetSize = DmemCacheWSS(); 283 | long long DmemPageWorkingSetSize = DmemPageWSS(); 284 | long long ImemCacheWorkingSetSize = ImemCacheWSS(); 285 | long long ImemPageWorkingSetSize = ImemPageWSS(); 286 | 287 | if(interval_size == -1){ 288 | output_file_memfootprint.open(mkfilename("memfootprint_full_int"), ios::out|ios::trunc); 289 | } 290 | else{ 291 | output_file_memfootprint.open(mkfilename("memfootprint_phases_int"), ios::out|ios::app); 292 | } 293 | 294 | output_file_memfootprint << DmemCacheWorkingSetSize << " " << DmemPageWorkingSetSize << " " << ImemCacheWorkingSetSize << " " << ImemPageWorkingSetSize << endl; 295 | //output_file_memfootprint << "number of instructions: " << total_ins_count_for_hpc_alignment << endl; 296 | output_file_memfootprint << " "; 297 | output_file_memfootprint.close(); 298 | } 299 | -------------------------------------------------------------------------------- /mica_memfootprint.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of MICA, a Pin tool to collect 3 | * microarchitecture-independent program characteristics using the Pin 4 | * instrumentation framework. 5 | * 6 | * Please see the README.txt file distributed with the MICA release for more 7 | * information. 8 | */ 9 | 10 | #include "mica.h" 11 | 12 | void init_memfootprint(); 13 | VOID instrument_memfootprint(INS ins, VOID* v); 14 | VOID fini_memfootprint(INT32 code, VOID* v); 15 | 16 | VOID memOp(ADDRINT effMemAddr, ADDRINT size); 17 | VOID instrMem(ADDRINT instrAddr, ADDRINT size); 18 | 19 | VOID memfootprint_instr_interval_output(); 20 | VOID memfootprint_instr_interval_reset(); 21 | -------------------------------------------------------------------------------- /mica_memstackdist.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of MICA, a Pin tool to collect 3 | * microarchitecture-independent program characteristics using the Pin 4 | * instrumentation framework. 5 | * 6 | * Please see the README.txt file distributed with the MICA release for more 7 | * information. 8 | */ 9 | 10 | #include "pin.H" 11 | 12 | /* MICA includes */ 13 | #include "mica_utils.h" 14 | #include "mica_memstackdist.h" 15 | 16 | /* Global variables */ 17 | 18 | extern INT64 interval_size; 19 | extern INT64 interval_ins_count; 20 | extern INT64 interval_ins_count_for_hpc_alignment; 21 | extern INT64 total_ins_count; 22 | extern INT64 total_ins_count_for_hpc_alignment; 23 | 24 | extern UINT32 _block_size; 25 | 26 | static UINT32 memstackdist_block_size; 27 | 28 | static ofstream output_file_memstackdist; 29 | 30 | /* A single entry of the cache line reference stack. 31 | * below points to the entry below us in the stack 32 | * above points to the entry above us in the stack 33 | * block_addr is the cache line index of this entry 34 | * bucket is the number of the stack depth bucket where this entry belongs 35 | */ 36 | typedef struct stack_entry_type { 37 | struct stack_entry_type* below; 38 | struct stack_entry_type* above; 39 | ADDRINT block_addr; 40 | INT32 bucket; 41 | } stack_entry; 42 | 43 | /* A single entry of the hash table, contains an array of stack entries referenced by part of cache line index. */ 44 | typedef struct block_type_fast { 45 | ADDRINT id; 46 | stack_entry* stack_entries[MAX_MEM_ENTRIES]; 47 | struct block_type_fast* next; 48 | } block_fast; 49 | 50 | static stack_entry* stack_top; 51 | static UINT64 stack_size; 52 | 53 | static block_fast* hashTableCacheBlocks_fast[MAX_MEM_TABLE_ENTRIES]; 54 | static INT64 mem_ref_cnt; 55 | static INT64 cold_refs; 56 | 57 | /* Counters of accesses into each bucket. */ 58 | static INT64 buckets[BUCKET_CNT]; 59 | /* References to stack entries that are the oldest entries belonging to the particular bucket. 60 | * This is used to update bucket attributes of stack entries efficiently. Since the last 61 | * bucket is overflow bucket, last borderline entry should never be set. */ 62 | static stack_entry* borderline_stack_entries[BUCKET_CNT]; 63 | 64 | /* initializing */ 65 | void init_memstackdist(){ 66 | 67 | int i; 68 | 69 | /* initialize */ 70 | cold_refs = 0; 71 | for(i=0; i < BUCKET_CNT; i++){ 72 | buckets[i] = 0; 73 | borderline_stack_entries[i] = NULL; 74 | } 75 | mem_ref_cnt = 0; 76 | /* hash table */ 77 | for (i = 0; i < MAX_MEM_TABLE_ENTRIES; i++) { 78 | hashTableCacheBlocks_fast[i] = NULL; 79 | } 80 | /* access stack */ 81 | /* a dummy entry is inserted on the stack top to save some checks later */ 82 | /* since the dummy entry is not in the hash table, it should never be used */ 83 | stack_top = (stack_entry*) checked_malloc(sizeof(stack_entry)); 84 | stack_top->block_addr = 0; 85 | stack_top->above = NULL; 86 | stack_top->below = NULL; 87 | stack_top->bucket = 0; 88 | stack_size = 1; 89 | 90 | memstackdist_block_size = _block_size; 91 | 92 | if(interval_size != -1){ 93 | output_file_memstackdist.open(mkfilename("memstackdist_phases_int"), ios::out|ios::trunc); 94 | output_file_memstackdist.close(); 95 | } 96 | } 97 | 98 | /*VOID memstackdist_instr_full(){ 99 | // counting instructions is done in all_instr_full() 100 | 101 | }*/ 102 | 103 | static ADDRINT memstackdist_instr_intervals(){ 104 | 105 | /* counting instructions is done in all_instr_intervals() */ 106 | 107 | return (ADDRINT)(interval_ins_count_for_hpc_alignment == interval_size); 108 | } 109 | 110 | VOID memstackdist_instr_interval_output(){ 111 | int i; 112 | output_file_memstackdist.open(mkfilename("memstackdist_phases_int"), ios::out|ios::app); 113 | output_file_memstackdist << mem_ref_cnt << " " << cold_refs; 114 | for(i=0; i < BUCKET_CNT; i++){ 115 | output_file_memstackdist << " " << buckets[i]; 116 | } 117 | output_file_memstackdist << endl; 118 | output_file_memstackdist.close(); 119 | } 120 | 121 | VOID memstackdist_instr_interval_reset(){ 122 | int i; 123 | mem_ref_cnt = 0; 124 | cold_refs = 0; 125 | for(i=0; i < BUCKET_CNT; i++){ 126 | buckets[i] = 0; 127 | } 128 | } 129 | 130 | static VOID memstackdist_instr_interval(){ 131 | 132 | memstackdist_instr_interval_output(); 133 | memstackdist_instr_interval_reset(); 134 | interval_ins_count = 0; 135 | interval_ins_count_for_hpc_alignment = 0; 136 | } 137 | 138 | /* hash table support */ 139 | 140 | /** entry_lookup 141 | * 142 | * Finds an arrray of stack entry references for a given address key (upper part of address) in a hash table. 143 | */ 144 | stack_entry** entry_lookup(block_fast** table, ADDRINT key){ 145 | 146 | block_fast* b; 147 | 148 | for (b = table[key % MAX_MEM_TABLE_ENTRIES]; b != NULL; b = b->next){ 149 | if(b->id == key) 150 | return b->stack_entries; 151 | } 152 | 153 | return NULL; 154 | } 155 | 156 | /** entry_install 157 | * 158 | * Installs a new array of stack entry references for a given address key (upper part of address) in a hash table. 159 | */ 160 | static stack_entry** entry_install(block_fast** table, ADDRINT key){ 161 | 162 | block_fast* b; 163 | 164 | ADDRINT index = key % MAX_MEM_TABLE_ENTRIES; 165 | 166 | b = table[index]; 167 | 168 | if(b == NULL) { 169 | b = (block_fast*)checked_malloc(sizeof(block_fast)); 170 | table[index] = b; 171 | } 172 | else{ 173 | while(b->next != NULL){ 174 | b = b->next; 175 | } 176 | b->next = (block_fast*)checked_malloc(sizeof(block_fast)); 177 | b = b->next; 178 | } 179 | b->next = NULL; 180 | b->id = key; 181 | for(ADDRINT i = 0; i < MAX_MEM_ENTRIES; i++){ 182 | b->stack_entries[i] = NULL; 183 | } 184 | return b->stack_entries; 185 | } 186 | 187 | 188 | /* stack support */ 189 | 190 | #if 0 191 | /** stack_sanity_check 192 | * 193 | * Checks whether the stack structure is internally consistent. 194 | */ 195 | static VOID stack_sanity_check(){ 196 | 197 | UINT64 position = 0; 198 | INT32 bucket = 0; 199 | 200 | stack_entry *e = stack_top; 201 | 202 | if (e->above != NULL){ 203 | ERROR_MSG("Item above top of stack."); 204 | exit(1); 205 | } 206 | 207 | while (e != NULL){ 208 | 209 | // Check whether the stack entry has a correct bucket. 210 | if (e->bucket != bucket){ 211 | ERROR_MSG("Stack entry with invalid bucket."); 212 | exit(1); 213 | } 214 | 215 | // Check whether the stack entry is linked correctly. 216 | if (e->above && (e->above->below != e)){ 217 | ERROR_MSG("Incorrectly linked stack."); 218 | exit(1); 219 | } 220 | if (e->below && (e->below->above != e)){ 221 | ERROR_MSG("Incorrectly linked stack."); 222 | exit(1); 223 | } 224 | 225 | // Calculate which bucket we should be in next. 226 | // Never spill over the overflow bucket though. 227 | if (bucket < BUCKET_CNT - 1) 228 | { 229 | UINT64 borderline = ((UINT64) 1) << bucket; 230 | if (position == borderline){ 231 | if (borderline_stack_entries [bucket] != e){ 232 | ERROR_MSG("Incorrect bucket borderline."); 233 | exit(1); 234 | } 235 | bucket ++; 236 | } 237 | } 238 | 239 | // Go on through the entire stack. 240 | e = e->below; 241 | position++; 242 | } 243 | } 244 | #endif 245 | 246 | 247 | /** move_to_top_fast 248 | * 249 | * Moves the stack entry e corresponding to the address a to the top of stack. 250 | * The stack entry can be NULL, in which case a new stack entry is created. 251 | */ 252 | static VOID move_to_top_fast(stack_entry *e, ADDRINT a){ 253 | 254 | INT32 bucket; 255 | 256 | /* check if entry was accessed before */ 257 | if(e != NULL){ 258 | 259 | /* check to see if we already are at top of stack */ 260 | if(e->above != NULL){ 261 | 262 | // disconnect the entry from its current position on the stack 263 | if (e->below != NULL) e->below->above = e->above; 264 | e->above->below = e->below; 265 | 266 | // adjust all borderline entries above the entry touched (note that we can be sure those entries exist) 267 | // a borderline entry is an entry whose bucket will change when an item is inserted above it on the stack 268 | for(bucket=0; bucket < BUCKET_CNT && bucket < e->bucket; bucket++){ 269 | borderline_stack_entries[bucket]->bucket++; 270 | borderline_stack_entries[bucket] = borderline_stack_entries[bucket]->above; 271 | } 272 | // if the entry touched was a borderline entry, new borderline entry is the one above the touched one 273 | if(e == borderline_stack_entries[e->bucket]){ 274 | borderline_stack_entries[e->bucket] = borderline_stack_entries[e->bucket]->above; 275 | } 276 | 277 | // place new entry on top of LRU stack 278 | e->below = stack_top; 279 | e->above = NULL; 280 | stack_top->above = e; 281 | stack_top = e; 282 | e->bucket = 0; 283 | } 284 | /* else: if top of stack was referenced again, nothing to do! */ 285 | 286 | } 287 | else{ 288 | // allocate memory for new stack entry 289 | stack_entry* e = (stack_entry*) checked_malloc(sizeof(stack_entry)); 290 | 291 | // initialize with address and refer prev to top of stack 292 | e->block_addr = a; 293 | e->above = NULL; 294 | e->below = stack_top; 295 | e->bucket = 0; 296 | 297 | // adjust top of stack 298 | stack_top->above = e; 299 | stack_top = e; 300 | 301 | stack_size++; 302 | 303 | // adjust all borderline entries that exist up until the overflow bucket 304 | // (which really has no borderline entry since there is no next bucket) 305 | // we retain the number of the first free bucket for next code 306 | for(bucket=0; bucket < BUCKET_CNT - 1; bucket++){ 307 | if (borderline_stack_entries[bucket] == NULL) break; 308 | borderline_stack_entries[bucket]->bucket++; 309 | borderline_stack_entries[bucket] = borderline_stack_entries[bucket]->above; 310 | } 311 | 312 | // if the stack size has reached a boundary of a bucket, set the boundary entry for this bucket 313 | // the variable types are chosen deliberately large for overflow safety 314 | // at least they should not overflow sooner than stack_size anyway 315 | // overflow bucket boundar is never set 316 | if (bucket < BUCKET_CNT - 1) 317 | { 318 | UINT64 borderline_distance = ((UINT64) 2) << bucket; 319 | if(stack_size == borderline_distance){ 320 | // find the bottom of the stack by traversing from somewhere close to it 321 | stack_entry *stack_bottom; 322 | if (bucket) stack_bottom = borderline_stack_entries [bucket-1]; 323 | else stack_bottom = stack_top; 324 | while (stack_bottom->below) stack_bottom = stack_bottom->below; 325 | // the new borderline is the bottom of the stack 326 | borderline_stack_entries [bucket] = stack_bottom; 327 | } 328 | } 329 | } 330 | 331 | // stack_sanity_check(); 332 | } 333 | 334 | /* determine reuse distance (= number of unique cache blocks referenced since last time this cache was referenced) 335 | * reuse distance is tracked in move_to_top_fast (by climbing up the LRU stack entry-by-entry until top of stack is reached), 336 | * this function only returns the reuse distance calculated by move_to_top_fast */ 337 | 338 | static INT64 det_reuse_dist_bucket(stack_entry* e){ 339 | 340 | if(e != NULL) 341 | return e->bucket; 342 | else 343 | return -1; 344 | } 345 | 346 | /* register memory access (either read of write) determine which cache lines are touched */ 347 | VOID memstackdist_memRead(ADDRINT effMemAddr, ADDRINT size){ 348 | 349 | ADDRINT a, endAddr, addr, upperAddr, indexInChunk; 350 | stack_entry** chunk; 351 | stack_entry* entry_for_addr; 352 | 353 | /* Calculate index in cache addresses. The calculation does not 354 | * handle address overflows but those are unlikely to happen. */ 355 | addr = effMemAddr >> memstackdist_block_size; 356 | endAddr = (effMemAddr + size - 1) >> memstackdist_block_size; 357 | 358 | /* The hit is counted for all cache lines involved. */ 359 | for(a = addr; a <= endAddr; a++){ 360 | 361 | /* split the cache line address into hash key of chunk and index in chunk */ 362 | upperAddr = a >> LOG_MAX_MEM_ENTRIES; 363 | indexInChunk = a & MASK_MAX_MEM_ENTRIES; 364 | 365 | chunk = entry_lookup(hashTableCacheBlocks_fast, upperAddr); 366 | if(chunk == NULL) chunk = entry_install(hashTableCacheBlocks_fast, upperAddr); 367 | 368 | entry_for_addr = chunk[indexInChunk]; 369 | 370 | /* determine reuse distance for this access (if it has been accessed before) */ 371 | INT64 b = det_reuse_dist_bucket(entry_for_addr); 372 | 373 | if(b < 0) 374 | cold_refs++; 375 | else 376 | buckets[b]++; 377 | 378 | /* adjust LRU stack */ 379 | /* as a side effect, can allocate new entry, which could have been NULL so far */ 380 | move_to_top_fast(entry_for_addr, a); 381 | 382 | /* update hash table for new cache blocks */ 383 | if(chunk[indexInChunk] == NULL) chunk[indexInChunk] = stack_top; 384 | 385 | mem_ref_cnt++; 386 | } 387 | } 388 | 389 | VOID instrument_memstackdist(INS ins, VOID *v){ 390 | 391 | if( INS_IsMemoryRead(ins) ){ 392 | 393 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)memstackdist_memRead, IARG_MEMORYREAD_EA, IARG_MEMORYREAD_SIZE, IARG_END); 394 | 395 | if( INS_HasMemoryRead2(ins) ) 396 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)memstackdist_memRead, IARG_MEMORYREAD2_EA, IARG_MEMORYREAD_SIZE, IARG_END); 397 | } 398 | 399 | if(interval_size != -1){ 400 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)memstackdist_instr_intervals,IARG_END); 401 | /* only called if interval is 'full' */ 402 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)memstackdist_instr_interval,IARG_END); 403 | } 404 | } 405 | 406 | /* finishing... */ 407 | VOID fini_memstackdist(INT32 code, VOID* v){ 408 | 409 | int i; 410 | 411 | if(interval_size == -1){ 412 | output_file_memstackdist.open(mkfilename("memstackdist_full_int"), ios::out|ios::trunc); 413 | } 414 | else{ 415 | output_file_memstackdist.open(mkfilename("memstackdist_phases_int"), ios::out|ios::app); 416 | } 417 | output_file_memstackdist << mem_ref_cnt << " " << cold_refs; 418 | for(i=0; i < BUCKET_CNT; i++){ 419 | output_file_memstackdist << " " << buckets[i]; 420 | } 421 | //output_file_memstackdist << endl << "number of instructions: " << total_ins_count_for_hpc_alignment << endl; 422 | output_file_memstackdist << " "; 423 | output_file_memstackdist.close(); 424 | } 425 | -------------------------------------------------------------------------------- /mica_memstackdist.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of MICA, a Pin tool to collect 3 | * microarchitecture-independent program characteristics using the Pin 4 | * instrumentation framework. 5 | * 6 | * Please see the README.txt file distributed with the MICA release for more 7 | * information. 8 | */ 9 | 10 | #include "mica.h" 11 | 12 | void init_memstackdist(); 13 | VOID instrument_memstackdist(INS ins, VOID* v); 14 | VOID fini_memstackdist(INT32 code, VOID* v); 15 | 16 | VOID memstackdist_memRead(ADDRINT effMemAddr, ADDRINT size); 17 | VOID memstackdist_instr_interval_output(); 18 | VOID memstackdist_instr_interval_reset(); 19 | -------------------------------------------------------------------------------- /mica_ppm.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of MICA, a Pin tool to collect 3 | * microarchitecture-independent program characteristics using the Pin 4 | * instrumentation framework. 5 | * 6 | * Please see the README.txt file distributed with the MICA release for more 7 | * information. 8 | */ 9 | 10 | #include "pin.H" 11 | 12 | /* MICA includes */ 13 | #include "mica_ppm.h" 14 | #include "mica_utils.h" 15 | 16 | /* Global variables */ 17 | 18 | extern INT64 interval_size; 19 | extern INT64 interval_ins_count; 20 | extern INT64 interval_ins_count_for_hpc_alignment; 21 | extern INT64 total_ins_count; 22 | extern INT64 total_ins_count_for_hpc_alignment; 23 | 24 | ofstream output_file_ppm; 25 | 26 | BOOL lastInstBr; // was the last instruction a cond. branch instruction? 27 | ADDRINT nextAddr; // address of the instruction after the last cond.branch 28 | UINT32 numStatCondBranchInst; // number of static cond. branch instructions up until now (-> unique id for the cond. branch) 29 | //UINT32 lastBrId; // index of last cond. branch instruction 30 | INT64* transition_counts; 31 | char* local_taken; 32 | INT64* local_taken_counts; 33 | INT64* local_brCounts; 34 | ADDRINT* indices_condBr; 35 | UINT32 indices_condBr_size; 36 | /* incorrect predictions counters */ 37 | INT64 GAg_incorrect_pred[NUM_HIST_LENGTHS]; 38 | INT64 GAs_incorrect_pred[NUM_HIST_LENGTHS]; 39 | INT64 PAg_incorrect_pred[NUM_HIST_LENGTHS]; 40 | INT64 PAs_incorrect_pred[NUM_HIST_LENGTHS]; 41 | /* prediction for each of the 4 predictors */ 42 | INT32 GAg_pred_taken[NUM_HIST_LENGTHS]; 43 | INT32 GAs_pred_taken[NUM_HIST_LENGTHS]; 44 | INT32 PAg_pred_taken[NUM_HIST_LENGTHS]; 45 | INT32 PAs_pred_taken[NUM_HIST_LENGTHS]; 46 | /* size of local pattern history */ 47 | INT64 brHist_size; 48 | /* global/local history */ 49 | INT32 bhr; 50 | INT32* local_bhr; 51 | /* global/local pattern history tables */ 52 | char*** GAg_pht; 53 | char*** PAg_pht; 54 | char**** GAs_pht; 55 | char**** PAs_pht; 56 | /* check if page entries were touched (memory efficiency) */ 57 | char* GAs_touched; 58 | char* PAs_touched; 59 | /* prediction history */ 60 | int GAg_pred_hist[NUM_HIST_LENGTHS]; 61 | int PAg_pred_hist[NUM_HIST_LENGTHS]; 62 | int GAs_pred_hist[NUM_HIST_LENGTHS]; 63 | int PAs_pred_hist[NUM_HIST_LENGTHS]; 64 | 65 | /* initializing */ 66 | void init_ppm(){ 67 | 68 | UINT32 i,j; 69 | int k; 70 | 71 | /* initializing total instruction counts is done in mica.cpp */ 72 | 73 | brHist_size = 512; 74 | 75 | numStatCondBranchInst = 1; 76 | 77 | /* translation of instruction address to indices */ 78 | indices_condBr_size = 1024; 79 | indices_condBr = (ADDRINT*) checked_malloc(indices_condBr_size*sizeof(ADDRINT)); 80 | 81 | lastInstBr = false; 82 | 83 | /* global/local history */ 84 | bhr = 0; 85 | local_bhr = (int*) checked_malloc(brHist_size * sizeof(int)); 86 | 87 | /* GAg PPM predictor */ 88 | GAg_pht = (char***) checked_malloc(NUM_HIST_LENGTHS * sizeof(char**)); 89 | for(j = 0; j < NUM_HIST_LENGTHS; j++) { 90 | GAg_pht[j] = (char**) checked_malloc((history_lengths[j]+1)*sizeof(char*)); 91 | for(i = 0; i <= history_lengths[j]; i++){ 92 | GAg_pht[j][i] = (char*) checked_malloc((1 << i)*sizeof(char)); 93 | for(k = 0; k < (1 << i); k++) 94 | GAg_pht[j][i][k] = 0; 95 | } 96 | } 97 | 98 | /* PAg PPM predictor */ 99 | PAg_pht = (char***) checked_malloc(NUM_HIST_LENGTHS * sizeof(char**)); 100 | for(j = 0; j < NUM_HIST_LENGTHS; j++) { 101 | PAg_pht[j] = (char**) checked_malloc((history_lengths[j]+1)*sizeof(char*)); 102 | for(i = 0; i <= history_lengths[j]; i++){ 103 | PAg_pht[j][i] = (char*) checked_malloc((1 << i)*sizeof(char)); 104 | for(k = 0; k < (1 << i); k++) 105 | PAg_pht[j][i][k] = 0; 106 | } 107 | } 108 | 109 | /* GAs PPM predictor */ 110 | GAs_touched = (char*) checked_malloc(brHist_size * sizeof(char)); 111 | GAs_pht = (char****) checked_malloc(brHist_size * sizeof(char***)); 112 | 113 | /* PAs PPM predictor */ 114 | PAs_touched = (char*) checked_malloc(brHist_size * sizeof(char)); 115 | PAs_pht = (char****) checked_malloc(brHist_size * sizeof(char***)); 116 | 117 | transition_counts = (INT64*) checked_malloc(brHist_size * sizeof(INT64)); 118 | local_taken = (char*) checked_malloc(brHist_size * sizeof(char)); 119 | local_brCounts = (INT64*) checked_malloc(brHist_size * sizeof(INT64)); 120 | local_taken_counts = (INT64*) checked_malloc(brHist_size * sizeof(INT64)); 121 | 122 | for(i = 0; i < brHist_size; i++){ 123 | transition_counts[i] = 0; 124 | local_taken[i] = -1; 125 | local_brCounts[i] = 0; 126 | local_taken_counts[i] = 0; 127 | GAs_touched[i] = 0; 128 | PAs_touched[i] = 0; 129 | } 130 | 131 | for(j=0; j < NUM_HIST_LENGTHS; j++){ 132 | GAg_incorrect_pred[j] = 0; 133 | GAs_incorrect_pred[j] = 0; 134 | PAg_incorrect_pred[j] = 0; 135 | PAs_incorrect_pred[j] = 0; 136 | } 137 | 138 | if(interval_size != -1){ 139 | output_file_ppm.open(mkfilename("ppm_phases_int"), ios::out|ios::trunc); 140 | output_file_ppm.close(); 141 | } 142 | 143 | } 144 | 145 | /*VOID ppm_instr_full(){ 146 | }*/ 147 | 148 | ADDRINT ppm_instr_intervals(){ 149 | 150 | return (ADDRINT)(interval_ins_count_for_hpc_alignment == interval_size); 151 | } 152 | 153 | VOID ppm_instr_interval_output(){ 154 | int i; 155 | INT64 total_transition_count = 0; 156 | INT64 total_taken_count = 0; 157 | INT64 total_brCount = 0; 158 | 159 | output_file_ppm.open(mkfilename("ppm_phases_int"), ios::out|ios::app); 160 | 161 | output_file_ppm << interval_size; 162 | for(i = 0; i < NUM_HIST_LENGTHS; i++) 163 | output_file_ppm << " " << GAg_incorrect_pred[i] << " " << PAg_incorrect_pred[i] << " " << GAs_incorrect_pred[i] << " " << PAs_incorrect_pred[i]; 164 | 165 | for(i=0; i < brHist_size; i++){ 166 | if(local_brCounts[i] > 0){ 167 | if( transition_counts[i] > local_brCounts[i]/2) 168 | total_transition_count += local_brCounts[i]-transition_counts[i]; 169 | else 170 | total_transition_count += transition_counts[i]; 171 | 172 | if( local_taken_counts[i] > local_brCounts[i]/2) 173 | total_taken_count += local_brCounts[i] - local_taken_counts[i]; 174 | else 175 | total_taken_count += local_taken_counts[i]; 176 | total_brCount += local_brCounts[i]; 177 | } 178 | } 179 | output_file_ppm << " " << total_brCount << " " << total_transition_count << " " << total_taken_count << endl; 180 | output_file_ppm.close(); 181 | } 182 | 183 | VOID ppm_instr_interval_reset(){ 184 | 185 | int i; 186 | 187 | for(i = 0; i < NUM_HIST_LENGTHS; i++){ 188 | GAg_incorrect_pred[i] = 0; 189 | GAs_incorrect_pred[i] = 0; 190 | PAg_incorrect_pred[i] = 0; 191 | PAs_incorrect_pred[i] = 0; 192 | } 193 | for(i=0; i < brHist_size; i++){ 194 | local_brCounts[i] = 0; 195 | local_taken_counts[i] = 0; 196 | transition_counts[i] = 0; 197 | } 198 | } 199 | 200 | VOID ppm_instr_interval(){ 201 | 202 | 203 | ppm_instr_interval_output(); 204 | ppm_instr_interval_reset(); 205 | 206 | interval_ins_count = 0; 207 | interval_ins_count_for_hpc_alignment = 0; 208 | } 209 | 210 | /* double memory space for branch history size when needed */ 211 | VOID reallocate_brHist(){ 212 | 213 | INT32* int_ptr; 214 | char* char_ptr; 215 | char**** char4_ptr; 216 | INT64* int64_ptr; 217 | 218 | brHist_size = brHist_size*2; 219 | 220 | int_ptr = (INT32*) checked_realloc(local_bhr,brHist_size * sizeof(INT32)); 221 | /*if(int_ptr == (INT32*) NULL) { 222 | cerr << "Could not allocate memory" << endl; 223 | exit(1); 224 | }*/ 225 | local_bhr = int_ptr; 226 | 227 | char_ptr = (char*) checked_realloc(GAs_touched, brHist_size * sizeof(char)); 228 | /*if(char_ptr == (char*) NULL){ 229 | cerr << "Could not allocate memory" << endl; 230 | exit(1); 231 | }*/ 232 | GAs_touched = char_ptr; 233 | 234 | char4_ptr = (char****) checked_realloc(GAs_pht,brHist_size * sizeof(char***)); 235 | /*if(char4_ptr == (char****) NULL) { 236 | cerr << "Could not allocate memory" << endl; 237 | exit(1); 238 | }*/ 239 | GAs_pht = char4_ptr; 240 | 241 | char_ptr = (char*) checked_realloc(PAs_touched,brHist_size * sizeof(char)); 242 | /*if(char_ptr == (char*) NULL) { 243 | cerr << "Could not allocate memory" << endl; 244 | exit(1); 245 | }*/ 246 | PAs_touched = char_ptr; 247 | 248 | char4_ptr = (char****) checked_realloc(PAs_pht,brHist_size * sizeof(char***)); 249 | /*if(char4_ptr == (char****) NULL) { 250 | cerr << "Could not allocate memory" << endl; 251 | exit(1); 252 | }*/ 253 | PAs_pht = char4_ptr; 254 | 255 | char_ptr = (char*) checked_realloc(local_taken,brHist_size * sizeof(char)); 256 | /*if(char_ptr == (char*) NULL) { 257 | cerr << "Could not allocate memory" << endl; 258 | exit(1); 259 | }*/ 260 | local_taken = char_ptr; 261 | 262 | int64_ptr = (INT64*) realloc(transition_counts, brHist_size * sizeof(INT64)); 263 | /*if(int64_ptr == (INT64*)NULL) { 264 | cerr,"Could not allocate memory" << endl; 265 | exit(1); 266 | }*/ 267 | transition_counts = int64_ptr; 268 | 269 | int64_ptr = (INT64*) realloc(local_brCounts, brHist_size * sizeof(INT64)); 270 | /*if(int64_ptr == (INT64*)NULL) { 271 | cerr << "Could not allocate memory" << endl; 272 | exit(1); 273 | }*/ 274 | local_brCounts = int64_ptr; 275 | 276 | int64_ptr = (INT64*) realloc(local_taken_counts, brHist_size * sizeof(INT64)); 277 | /*if(int64_ptr == (INT64*)NULL) { 278 | cerr << "Could not allocate memory" << endl; 279 | exit(1); 280 | }*/ 281 | local_taken_counts = int64_ptr; 282 | } 283 | 284 | 285 | VOID condBr(UINT32 id, BOOL _t){ 286 | 287 | int i,j,k; 288 | int hist; 289 | BOOL taken = (_t != 0) ? 1 : 0; 290 | 291 | /* predict direction */ 292 | 293 | /* GAs PPM predictor lookup */ 294 | if(!GAs_touched[id]){ 295 | /* allocate PPM predictor */ 296 | 297 | GAs_touched[id] = 1; 298 | 299 | GAs_pht[id] = (char***) checked_malloc(NUM_HIST_LENGTHS * sizeof(char**)); 300 | for(j = 0; j < NUM_HIST_LENGTHS; j++){ 301 | GAs_pht[id][j] = (char**) checked_malloc((history_lengths[j]+1) * sizeof(char*)); 302 | for(i = 0; i <= (int)history_lengths[j]; i++){ 303 | GAs_pht[id][j][i] = (char*) checked_malloc((1 << i) * sizeof(char)); 304 | for(k = 0; k < (1<= 0; i--){ 332 | 333 | hist = bhr & (((int) 1 << i) -1); 334 | if(GAg_pht[j][i][hist] != 0){ 335 | GAg_pred_hist[j] = i; // used to only update predictor doing the prediction and higher order predictors (update exclusion) 336 | if(GAg_pht[j][i][hist] > 0) 337 | GAg_pred_taken[j] = 1; 338 | else 339 | GAg_pred_taken[j] = 0; 340 | break; 341 | } 342 | } 343 | 344 | /* PAg PPM predictor lookup */ 345 | for(i = (int)history_lengths[j]; i >= 0; i--){ 346 | hist = local_bhr[id] & (((int) 1 << i) -1); 347 | if(PAg_pht[j][i][hist] != 0){ 348 | PAg_pred_hist[j] = i; 349 | if(PAg_pht[j][i][hist] > 0) 350 | PAg_pred_taken[j] = 1; 351 | else 352 | PAg_pred_taken[j] = 0; 353 | break; 354 | } 355 | } 356 | 357 | /* GAs PPM predictor lookup */ 358 | for(i = (int)history_lengths[j]; i >= 0; i--){ 359 | hist = bhr & (((int) 1 << i) -1); 360 | if(GAs_pht[id][j][i][hist] != 0){ 361 | GAs_pred_hist[j] = i; 362 | if(GAs_pht[id][j][i][hist] > 0) 363 | GAs_pred_taken[j] = 1; 364 | else 365 | GAs_pred_taken[j] = 0; 366 | break; 367 | } 368 | } 369 | 370 | /* PAs PPM predictor lookup */ 371 | for(i = (int)history_lengths[j]; i >= 0; i--){ 372 | hist = local_bhr[id] & (((int) 1 << i) -1); 373 | if(PAs_pht[id][j][i][hist] != 0){ 374 | PAs_pred_hist[j] = i; 375 | if(PAs_pht[id][j][i][hist] > 0) 376 | PAs_pred_taken[j] = 1; 377 | else 378 | PAs_pred_taken[j] = 0; 379 | break; 380 | } 381 | } 382 | } 383 | 384 | /* transition/taken rate */ 385 | if(local_taken[id] > -1){ 386 | if(taken != local_taken[id]) 387 | transition_counts[id]++; 388 | } 389 | local_taken[id] = taken; 390 | local_brCounts[id]++; 391 | if(taken) 392 | local_taken_counts[id]++; 393 | 394 | for(j=0; j < NUM_HIST_LENGTHS; j++){ 395 | /* update statistics according to predictions */ 396 | if(taken != GAg_pred_taken[j]) 397 | GAg_incorrect_pred[j]++; 398 | if(taken != GAs_pred_taken[j]) 399 | GAs_incorrect_pred[j]++; 400 | if(taken != PAg_pred_taken[j]) 401 | PAg_incorrect_pred[j]++; 402 | if(taken != PAs_pred_taken[j]) 403 | PAs_incorrect_pred[j]++; 404 | 405 | /* using update exclusion: only update predictor doing the prediction and higher order predictors */ 406 | 407 | /* update GAg PPM pattern history tables */ 408 | for(i = (int)GAg_pred_hist[j]; i <= (int)history_lengths[j]; i++){ 409 | hist = bhr & ((1 << i) - 1); 410 | if(taken){ 411 | if(GAg_pht[j][i][hist] < 127) 412 | GAg_pht[j][i][hist]++; 413 | } 414 | else{ 415 | if(GAg_pht[j][i][hist] > -127) 416 | GAg_pht[j][i][hist]--; 417 | } 418 | /* avoid == 0 because that means 'not set' */ 419 | if(GAg_pht[j][i][hist] == 0){ 420 | if(taken){ 421 | GAg_pht[j][i][hist]++; 422 | } 423 | else{ 424 | GAg_pht[j][i][hist]--; 425 | } 426 | } 427 | } 428 | /* update PAg PPM pattern history tables */ 429 | for(i = (int)PAg_pred_hist[j]; i <= (int)history_lengths[j]; i++){ 430 | hist = local_bhr[id] & ((1 << i) - 1); 431 | if(taken){ 432 | if(PAg_pht[j][i][hist] < 127) 433 | PAg_pht[j][i][hist]++; 434 | } 435 | else{ 436 | if(PAg_pht[j][i][hist] > -127) 437 | PAg_pht[j][i][hist]--; 438 | } 439 | /* avoid == 0 because that means 'not set' */ 440 | if(PAg_pht[j][i][hist] == 0){ 441 | if(taken){ 442 | PAg_pht[j][i][hist]++; 443 | } 444 | else{ 445 | PAg_pht[j][i][hist]--; 446 | } 447 | } 448 | } 449 | /* update GAs PPM pattern history tables */ 450 | for(i = (int)GAs_pred_hist[j]; i <= (int)history_lengths[j]; i++){ 451 | hist = bhr & ((1 << i) - 1); 452 | if(taken){ 453 | if(GAs_pht[id][j][i][hist] < 127) 454 | GAs_pht[id][j][i][hist]++; 455 | } 456 | else{ 457 | if(GAs_pht[id][j][i][hist] > -127) 458 | GAs_pht[id][j][i][hist]--; 459 | } 460 | /* avoid == 0 because that means 'not set' */ 461 | if(GAs_pht[id][j][i][hist] == 0){ 462 | if(taken){ 463 | GAs_pht[id][j][i][hist]++; 464 | } 465 | else{ 466 | GAs_pht[id][j][i][hist]--; 467 | } 468 | } 469 | } 470 | /* update PAs PPM pattern history tables */ 471 | for(i = (int)PAs_pred_hist[j]; i <= (int)history_lengths[j]; i++){ 472 | hist = local_bhr[id] & ((1 << i) - 1); 473 | if(taken){ 474 | if(PAs_pht[id][j][i][hist] < 127) 475 | PAs_pht[id][j][i][hist]++; 476 | } 477 | else{ 478 | if(PAs_pht[id][j][i][hist] > -127) 479 | PAs_pht[id][j][i][hist]--; 480 | } 481 | /* avoid == 0 because that means 'not set' */ 482 | if(PAs_pht[id][j][i][hist] == 0){ 483 | if(taken){ 484 | PAs_pht[id][j][i][hist]++; 485 | } 486 | else{ 487 | PAs_pht[id][j][i][hist]--; 488 | } 489 | } 490 | } 491 | } 492 | 493 | /* update global history register */ 494 | bhr = bhr << 1; 495 | bhr |= taken; 496 | 497 | /* update local history */ 498 | local_bhr[id] = local_bhr[id] << 1; 499 | local_bhr[id] |= taken; 500 | } 501 | 502 | /* index for static conditional branch */ 503 | UINT32 index_condBr(ADDRINT ins_addr){ 504 | 505 | UINT64 i; 506 | for(i=0; i <= numStatCondBranchInst; i++){ 507 | if(indices_condBr[i] == ins_addr) 508 | return i; /* found */ 509 | } 510 | return 0; /* not found */ 511 | } 512 | 513 | /* register static conditional branch with some index */ 514 | void register_condBr(ADDRINT ins_addr){ 515 | 516 | ADDRINT* ptr; 517 | 518 | /* reallocation needed */ 519 | if(numStatCondBranchInst >= indices_condBr_size){ 520 | 521 | indices_condBr_size *= 2; 522 | ptr = (ADDRINT*) realloc(indices_condBr, indices_condBr_size*sizeof(ADDRINT)); 523 | /*if(ptr == (ADDRINT*)NULL){ 524 | cerr << "Could not allocate memory (realloc in register_condBr)!" << endl; 525 | exit(1); 526 | }*/ 527 | indices_condBr = ptr; 528 | 529 | } 530 | 531 | /* register instruction to index */ 532 | indices_condBr[numStatCondBranchInst++] = ins_addr; 533 | } 534 | 535 | // static int _count = 0; 536 | VOID instrument_ppm_cond_br(INS ins){ 537 | UINT32 index = index_condBr(INS_Address(ins)); 538 | if(index < 1){ 539 | 540 | /* We don't know the number of static conditional branch instructions up front, 541 | * so we double the size of the branch history tables as needed by calling this function */ 542 | if(numStatCondBranchInst >= brHist_size) 543 | reallocate_brHist(); 544 | 545 | index = numStatCondBranchInst; 546 | 547 | register_condBr(INS_Address(ins)); 548 | register_condBr(INS_Address(ins)); 549 | } 550 | 551 | const char* str = INS_Disassemble(ins).c_str(); 552 | const char* substr = "xbegin"; 553 | if (strncmp(str, substr, strlen(substr)) == 0){ 554 | printf("as of pin 3.4 -- I don't think we can parse xbegin so skipping...\n"); 555 | return; 556 | } 557 | substr = "xend"; 558 | if (strncmp(str, substr, strlen(substr)) == 0){ 559 | printf("as of pin 3.4 -- I don't think we can parse xend so skipping...\n"); 560 | return; 561 | } 562 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)condBr,IARG_UINT32, index, IARG_BRANCH_TAKEN, IARG_END); 563 | } 564 | 565 | /* instrumenting (instruction level) */ 566 | VOID instrument_ppm(INS ins, VOID* v){ 567 | 568 | char cat[50]; 569 | strcpy(cat,CATEGORY_StringShort(INS_Category(ins)).c_str()); 570 | 571 | if(strcmp(cat,"COND_BR") == 0){ 572 | instrument_ppm_cond_br(ins); 573 | } 574 | 575 | /* inserting calls for counting instructions (full) is done in mica.cpp */ 576 | 577 | if(interval_size != -1){ 578 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)ppm_instr_intervals,IARG_END); 579 | /* only called if interval is 'full' */ 580 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)ppm_instr_interval,IARG_END); 581 | } 582 | } 583 | 584 | 585 | /* finishing... */ 586 | VOID fini_ppm(INT32 code, VOID* v){ 587 | 588 | int i; 589 | 590 | if(interval_size == -1){ 591 | output_file_ppm.open(mkfilename("ppm_full_int"), ios::out|ios::trunc); 592 | //output_file_ppm << total_ins_count; 593 | } 594 | else{ 595 | output_file_ppm.open(mkfilename("ppm_phases_int"), ios::out|ios::app); 596 | //output_file_ppm << interval_ins_count; 597 | } 598 | for(i=0; i < NUM_HIST_LENGTHS; i++) 599 | output_file_ppm << GAg_incorrect_pred[i] << " " << PAg_incorrect_pred[i] << " " << GAs_incorrect_pred[i] << " " << PAs_incorrect_pred[i] << " "; 600 | 601 | INT64 total_transition_count = 0; 602 | INT64 total_taken_count = 0; 603 | INT64 total_brCount = 0; 604 | for(i=0; i < brHist_size; i++){ 605 | if(local_brCounts[i] > 0){ 606 | if( transition_counts[i] > local_brCounts[i]/2) 607 | total_transition_count += local_brCounts[i]-transition_counts[i]; 608 | else 609 | total_transition_count += transition_counts[i]; 610 | 611 | if( local_taken_counts[i] > local_brCounts[i]/2) 612 | total_taken_count += local_brCounts[i] - local_taken_counts[i]; 613 | else 614 | total_taken_count += local_taken_counts[i]; 615 | total_brCount += local_brCounts[i]; 616 | } 617 | } 618 | output_file_ppm << total_brCount << " " << total_transition_count << " " << total_taken_count << endl; 619 | //output_file_ppm << "number of instructions: " << total_ins_count_for_hpc_alignment << endl; 620 | output_file_ppm << " "; 621 | output_file_ppm.close(); 622 | } 623 | -------------------------------------------------------------------------------- /mica_ppm.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of MICA, a Pin tool to collect 3 | * microarchitecture-independent program characteristics using the Pin 4 | * instrumentation framework. 5 | * 6 | * Please see the README.txt file distributed with the MICA release for more 7 | * information. 8 | */ 9 | 10 | #include "mica.h" 11 | 12 | void init_ppm(); 13 | VOID instrument_ppm(INS ins, VOID* v); 14 | VOID fini_ppm(INT32 code, VOID* v); 15 | 16 | VOID instrument_ppm_cond_br(INS ins); 17 | VOID ppm_instr_interval_output(); 18 | VOID ppm_instr_interval_reset(); 19 | -------------------------------------------------------------------------------- /mica_reg.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of MICA, a Pin tool to collect 3 | * microarchitecture-independent program characteristics using the Pin 4 | * instrumentation framework. 5 | * 6 | * Please see the README.txt file distributed with the MICA release for more 7 | * information. 8 | */ 9 | 10 | #include "pin.H" 11 | 12 | /* MICA includes */ 13 | #include "mica_reg.h" 14 | 15 | /* Global variables */ 16 | 17 | extern INT64 interval_size; 18 | extern INT64 interval_ins_count; 19 | extern INT64 interval_ins_count_for_hpc_alignment; 20 | extern INT64 total_ins_count; 21 | extern INT64 total_ins_count_for_hpc_alignment; 22 | 23 | ofstream output_file_reg; 24 | 25 | UINT64* opCounts; // array which keeps track of number-of-operands-per-instruction stats 26 | BOOL* regRef; // register references 27 | INT64* PCTable; // production addresses of registers 28 | INT64* regUseCnt; // usage counters for each register 29 | INT64* regUseDistr; // distribution of register usage 30 | INT64* regAgeDistr; // distribution of register ages 31 | 32 | /* initializing */ 33 | void init_reg(){ 34 | 35 | int i; 36 | 37 | /* initializing total instruction counts is done in mica.cpp */ 38 | 39 | /* allocate memory */ 40 | opCounts = (UINT64*) checked_malloc(MAX_NUM_OPER * sizeof(UINT64)); 41 | regRef = (BOOL*) checked_malloc(MAX_NUM_REGS * sizeof(BOOL)); 42 | PCTable = (INT64*) checked_malloc(MAX_NUM_REGS * sizeof(INT64)); 43 | regUseCnt = (INT64*) checked_malloc(MAX_NUM_REGS * sizeof(INT64)); 44 | regUseDistr = (INT64*) checked_malloc(MAX_REG_USE * sizeof(INT64)); 45 | regAgeDistr = (INT64*) checked_malloc(MAX_COMM_DIST * sizeof(INT64)); 46 | 47 | /* initialize */ 48 | for(i = 0; i < MAX_NUM_OPER; i++){ 49 | opCounts[i] = 0; 50 | } 51 | for(i = 0; i < MAX_NUM_REGS; i++){ 52 | regRef[i] = false; 53 | PCTable[i] = 0; 54 | regUseCnt[i] = 0; 55 | } 56 | for(i = 0; i < MAX_REG_USE; i++){ 57 | regUseDistr[i] = 0; 58 | } 59 | for(i = 0; i < MAX_COMM_DIST; i++){ 60 | regAgeDistr[i] = 0; 61 | } 62 | 63 | if(interval_size != -1){ 64 | output_file_reg.open(mkfilename("reg_phases_int"), ios::out|ios::trunc); 65 | output_file_reg.close(); 66 | } 67 | } 68 | 69 | /* read register operand */ 70 | VOID readRegOp_reg(UINT32 regId){ 71 | 72 | /* *** REG *** */ 73 | 74 | 75 | /* register age */ 76 | INT64 age = total_ins_count - PCTable[regId]; // dependency distance 77 | if(age >= MAX_COMM_DIST){ 78 | age = MAX_COMM_DIST - 1; // trim if needed 79 | } 80 | //assert(age >= 0); 81 | regAgeDistr[age]++; 82 | 83 | /* register usage */ 84 | regUseCnt[regId]++; 85 | regRef[regId] = 1; // (operand) register was referenced 86 | } 87 | 88 | VOID writeRegOp_reg(UINT32 regId){ 89 | 90 | /* *** REG *** */ 91 | UINT32 num; 92 | 93 | /* if register was referenced before, adjust use distribution */ 94 | if(regRef[regId]){ 95 | num = regUseCnt[regId]; 96 | if(num >= MAX_REG_USE) // trim if needed 97 | num = MAX_REG_USE - 1; 98 | //assert(num >= 0); 99 | regUseDistr[num]++; 100 | } 101 | 102 | /* reset register stuff because of new value produced */ 103 | 104 | PCTable[regId] = total_ins_count; // last production = now 105 | regUseCnt[regId] = 0; // new value is never used (yet) 106 | regRef[regId] = true; // (destination) register was referenced (for tracking use distribution) 107 | } 108 | 109 | VOID reg_instr_full(VOID* _e){ 110 | 111 | /* counting instructions is done in all_instr_full() */ 112 | 113 | ins_buffer_entry* e = (ins_buffer_entry*)_e; 114 | 115 | INT32 i; 116 | 117 | for(i=0; i < e->regReadCnt; i++){ 118 | readRegOp_reg((UINT32)e->regsRead[i]); 119 | } 120 | for(i=0; i < e->regWriteCnt; i++){ 121 | writeRegOp_reg((UINT32)e->regsWritten[i]); 122 | } 123 | 124 | opCounts[e->regOpCnt]++; 125 | } 126 | 127 | ADDRINT reg_instr_intervals(VOID* _e) { 128 | 129 | /* counting instructions is done in all_instr_intervals() */ 130 | 131 | ins_buffer_entry* e = (ins_buffer_entry*)_e; 132 | 133 | INT32 i; 134 | 135 | for(i=0; i < e->regReadCnt; i++){ 136 | readRegOp_reg((UINT32)e->regsRead[i]); 137 | } 138 | for(i=0; i < e->regWriteCnt; i++){ 139 | writeRegOp_reg((UINT32)e->regsWritten[i]); 140 | } 141 | 142 | opCounts[e->regOpCnt]++; 143 | 144 | return (ADDRINT) (interval_ins_count_for_hpc_alignment == interval_size); 145 | } 146 | 147 | VOID reg_instr_interval_output(){ 148 | int i; 149 | 150 | output_file_reg.open(mkfilename("reg_phases_int"), ios::out|ios::app); 151 | 152 | UINT64 totNumOps = 0; 153 | UINT64 num; 154 | 155 | /* total number of operands */ 156 | for(i = 1; i < MAX_NUM_OPER; i++){ 157 | totNumOps += opCounts[i]*i; 158 | } 159 | output_file_reg << interval_size << " " << totNumOps; 160 | 161 | /* average degree of use */ 162 | num = 0; 163 | for(i = 0; i < MAX_REG_USE; i++){ 164 | num += regUseDistr[i]; 165 | } 166 | output_file_reg << " " << num; 167 | num = 0; 168 | for(i = 0; i < MAX_REG_USE; i++){ 169 | num += i * regUseDistr[i]; 170 | } 171 | output_file_reg << " " << num; 172 | 173 | /* register dependency distributions */ 174 | num = 0; 175 | for(i = 0; i < MAX_COMM_DIST; i++){ 176 | num += regAgeDistr[i]; 177 | } 178 | output_file_reg << " " << num; 179 | num = 0; 180 | for(i = 0; i < MAX_COMM_DIST; i++){ 181 | num += regAgeDistr[i]; 182 | if( (i == 1) || (i == 2) || (i == 4) || (i == 8) || (i == 16) || (i == 32) || (i == 64)){ 183 | output_file_reg << " " << num; 184 | } 185 | } 186 | output_file_reg << endl; 187 | 188 | output_file_reg.close(); 189 | } 190 | 191 | VOID reg_instr_interval_reset(){ 192 | 193 | int i; 194 | 195 | for(i = 0; i < MAX_NUM_OPER; i++){ 196 | opCounts[i] = 0; 197 | } 198 | /* do NOT reset register use counts or register definition addresses 199 | * that should only be done when the register is written to */ 200 | /* for(i = 0; i < MAX_NUM_REGS; i++){ 201 | regRef[i] = false; 202 | PCTable[i] = 0; 203 | regUseCnt[i] = 0; 204 | } */ 205 | for(i = 0; i < MAX_REG_USE; i++){ 206 | regUseDistr[i] = 0; 207 | } 208 | for(i = 0; i < MAX_COMM_DIST; i++){ 209 | regAgeDistr[i] = 0; 210 | } 211 | } 212 | 213 | VOID reg_instr_interval() { 214 | 215 | reg_instr_interval_output(); 216 | reg_instr_interval_reset(); 217 | interval_ins_count = 0; 218 | interval_ins_count_for_hpc_alignment = 0; 219 | 220 | } 221 | 222 | VOID instrument_reg(INS ins, ins_buffer_entry* e){ 223 | 224 | 225 | UINT32 i, maxNumRegsProd, maxNumRegsCons, regReadCnt, regWriteCnt, opCnt, regOpCnt; 226 | REG reg; 227 | 228 | if(!e->setRead){ 229 | 230 | maxNumRegsCons = INS_MaxNumRRegs(ins); // maximum number of register consumations (reads) 231 | 232 | regReadCnt = 0; 233 | for(i = 0; i < maxNumRegsCons; i++){ // finding all register operands which are read 234 | reg = INS_RegR(ins,i); 235 | //assert(((UINT32)reg) < MAX_NUM_REGS); 236 | /* only consider valid general-purpose registers (any bit-width) and floating-point registers, 237 | * i.e. exlude branch, segment and pin registers, among others */ 238 | if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){ 239 | regReadCnt++; 240 | } 241 | } 242 | 243 | e->regReadCnt = regReadCnt; 244 | e->regsRead = (REG*) checked_malloc(regReadCnt*sizeof(REG)); 245 | 246 | regReadCnt = 0; 247 | for(i = 0; i < maxNumRegsCons; i++){ // finding all register operands which are read 248 | reg = INS_RegR(ins,i); 249 | //assert(((UINT32)reg) < MAX_NUM_REGS); 250 | /* only consider valid general-purpose registers (any bit-width) and floating-point registers, 251 | * i.e. exlude branch, segment and pin registers, among others */ 252 | if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){ 253 | e->regsRead[regReadCnt++] = reg; 254 | } 255 | } 256 | e->setRead = true; 257 | } 258 | if(!e->setWritten){ 259 | 260 | maxNumRegsProd = INS_MaxNumWRegs(ins); 261 | 262 | regWriteCnt = 0; 263 | for(i=0; i < maxNumRegsProd; i++){ 264 | 265 | reg = INS_RegW(ins, i); 266 | //assert(((UINT32)reg) < MAX_NUM_REGS); 267 | /* only consider valid general-purpose registers (any bit-width) and floating-point registers, 268 | * i.e. exlude branch, segment and pin registers, among others */ 269 | if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){ 270 | regWriteCnt++; 271 | } 272 | } 273 | 274 | e->regWriteCnt = regWriteCnt; 275 | e->regsWritten = (REG*)checked_malloc(regWriteCnt*sizeof(REG)); 276 | 277 | regWriteCnt = 0; 278 | for(i=0; i < maxNumRegsProd; i++){ 279 | 280 | reg = INS_RegW(ins, i); 281 | //assert(((UINT32)reg) < MAX_NUM_REGS); 282 | /* only consider valid general-purpose registers (any bit-width) and floating-point registers, 283 | * i.e. exlude branch, segment and pin registers, among others */ 284 | if(REG_valid(reg) && (REG_is_fr(reg) || REG_is_mm(reg) || REG_is_xmm(reg) || REG_is_gr(reg) || REG_is_gr8(reg) || REG_is_gr16(reg) || REG_is_gr32(reg) || REG_is_gr64(reg))){ 285 | e->regsWritten[regWriteCnt++] = reg; 286 | } 287 | } 288 | 289 | 290 | e->setWritten = true; 291 | } 292 | 293 | if(!e->setRegOpCnt){ 294 | regOpCnt = 0; 295 | opCnt = INS_OperandCount(ins); 296 | for(i = 0; i < opCnt; i++){ 297 | if(INS_OperandIsReg(ins,i)) 298 | regOpCnt++; 299 | } 300 | /*if(regOpCnt >= MAX_NUM_OPER){ 301 | cerr << "BOOM! -> MAX_NUM_OPER is exceeded! (" << regOpCnt << ")" << endl; 302 | exit(1); 303 | }*/ 304 | e->regOpCnt = regOpCnt; 305 | e->setRegOpCnt = true; 306 | } 307 | 308 | if(interval_size == -1){ 309 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)reg_instr_full, IARG_PTR, (void*)e, IARG_END); 310 | } 311 | else{ 312 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)reg_instr_intervals, IARG_PTR, (void*)e, IARG_END); 313 | /* only called if interval is full */ 314 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)reg_instr_interval, IARG_END); 315 | } 316 | } 317 | 318 | /* finishing... */ 319 | VOID fini_reg(INT32 code, VOID* v){ 320 | 321 | if(interval_size == -1){ 322 | output_file_reg.open(mkfilename("reg_full_int"), ios::out|ios::trunc); 323 | //output_file_reg << total_ins_count; 324 | } 325 | else{ 326 | output_file_reg.open(mkfilename("reg_phases_int"), ios::out|ios::app); 327 | //output_file_reg << interval_ins_count; 328 | } 329 | 330 | int i; 331 | UINT64 totNumOps = 0; 332 | UINT64 num; 333 | /* total number of operands */ 334 | for(i = 1; i < MAX_NUM_OPER; i++){ 335 | totNumOps += opCounts[i]*i; 336 | } 337 | output_file_reg << totNumOps; 338 | 339 | // ** average degree of use ** 340 | num = 0; 341 | for(i = 0; i < MAX_REG_USE; i++){ 342 | num += regUseDistr[i]; 343 | } 344 | output_file_reg << " " << num; 345 | num = 0; 346 | for(i = 0; i < MAX_REG_USE; i++){ 347 | num += i * regUseDistr[i]; 348 | } 349 | output_file_reg << " " << num; 350 | 351 | // ** register dependency distributions ** 352 | num = 0; 353 | for(i = 0; i < MAX_COMM_DIST; i++){ 354 | num += regAgeDistr[i]; 355 | } 356 | output_file_reg << " " << num; 357 | num = 0; 358 | for(i = 0; i < MAX_COMM_DIST; i++){ 359 | num += regAgeDistr[i]; 360 | if( (i == 1) || (i == 2) || (i == 4) || (i == 8) || (i == 16) || (i == 32) || (i == 64)){ 361 | output_file_reg << " " << num; 362 | } 363 | } 364 | output_file_reg << endl; 365 | //output_file_reg << "number of instructions: " << total_ins_count_for_hpc_alignment << endl; 366 | output_file_reg << " "; 367 | output_file_reg.close(); 368 | } 369 | -------------------------------------------------------------------------------- /mica_reg.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of MICA, a Pin tool to collect 3 | * microarchitecture-independent program characteristics using the Pin 4 | * instrumentation framework. 5 | * 6 | * Please see the README.txt file distributed with the MICA release for more 7 | * information. 8 | */ 9 | 10 | #include "mica.h" 11 | #include "mica_utils.h" 12 | 13 | void init_reg(); 14 | VOID instrument_reg(INS ins, ins_buffer_entry* e); 15 | VOID fini_reg(INT32 code, VOID* v); 16 | 17 | VOID reg_instr_full(VOID* _e); 18 | ADDRINT reg_instr_intervals(VOID* _e); 19 | VOID reg_instr_interval_output(); 20 | VOID reg_instr_interval_reset(); 21 | 22 | -------------------------------------------------------------------------------- /mica_stride.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of MICA, a Pin tool to collect 3 | * microarchitecture-independent program characteristics using the Pin 4 | * instrumentation framework. 5 | * 6 | * Please see the README.txt file distributed with the MICA release for more 7 | * information. 8 | */ 9 | 10 | #include "pin.H" 11 | 12 | /* MICA includes */ 13 | #include "mica_utils.h" 14 | #include "mica_stride.h" 15 | 16 | /* Global variables */ 17 | 18 | extern INT64 interval_size; 19 | extern INT64 interval_ins_count; 20 | extern INT64 interval_ins_count_for_hpc_alignment; 21 | extern INT64 total_ins_count; 22 | extern INT64 total_ins_count_for_hpc_alignment; 23 | 24 | ofstream output_file_stride; 25 | 26 | UINT64 numRead, numWrite; 27 | UINT32 readIndex; 28 | UINT32 writeIndex; 29 | ADDRINT* instrRead; 30 | ADDRINT* instrWrite; 31 | UINT64 numInstrsAnalyzed; 32 | UINT64 numReadInstrsAnalyzed; 33 | UINT64 numWriteInstrsAnalyzed; 34 | UINT64 localReadDistrib[MAX_DISTR]; 35 | UINT64 globalReadDistrib[MAX_DISTR]; 36 | UINT64 localWriteDistrib[MAX_DISTR]; 37 | UINT64 globalWriteDistrib[MAX_DISTR]; 38 | ADDRINT lastReadAddr; 39 | ADDRINT lastWriteAddr; 40 | ADDRINT* indices_memRead; 41 | UINT32 indices_memRead_size; 42 | ADDRINT* indices_memWrite; 43 | UINT32 indices_memWrite_size; 44 | 45 | 46 | /* initializing */ 47 | void init_stride(){ 48 | 49 | int i; 50 | 51 | /* initializing total instruction counts is done in mica.cpp */ 52 | 53 | /* initial sizes */ 54 | numRead = 1024; 55 | numWrite = 1024; 56 | 57 | /* allocate memory */ 58 | instrRead = (ADDRINT*) checked_malloc(numRead * sizeof(ADDRINT)); 59 | instrWrite = (ADDRINT*) checked_malloc(numWrite * sizeof(ADDRINT)); 60 | 61 | /* initialize */ 62 | readIndex = 1; 63 | writeIndex = 1; 64 | for (i = 0; i < (int)numRead; i++) 65 | instrRead[i] = 0; 66 | for (i = 0; i < (int)numWrite; i++) 67 | instrWrite[i] = 0; 68 | lastReadAddr = 0; 69 | lastWriteAddr = 0; 70 | for (i = 0; i < MAX_DISTR; i++) { 71 | localReadDistrib[i] = 0; 72 | localWriteDistrib[i] = 0; 73 | globalReadDistrib[i] = 0; 74 | globalWriteDistrib[i] = 0; 75 | } 76 | numInstrsAnalyzed = 0; 77 | numReadInstrsAnalyzed = 0; 78 | numWriteInstrsAnalyzed = 0; 79 | 80 | indices_memRead_size = 1024; 81 | indices_memRead = (ADDRINT*) checked_malloc(indices_memRead_size*sizeof(ADDRINT)); 82 | for (i = 0; i < (int)indices_memRead_size; i++) 83 | indices_memRead[i] = 0; 84 | 85 | indices_memWrite_size = 1024; 86 | indices_memWrite = (ADDRINT*) checked_malloc(indices_memWrite_size*sizeof(ADDRINT)); 87 | for (i = 0; i < (int)indices_memWrite_size; i++) 88 | indices_memWrite[i] = 0; 89 | 90 | if(interval_size != -1){ 91 | output_file_stride.open(mkfilename("stride_phases_int"), ios::out|ios::trunc); 92 | output_file_stride.close(); 93 | } 94 | } 95 | 96 | /*VOID stride_instr_full(){ 97 | }*/ 98 | 99 | ADDRINT stride_instr_intervals(){ 100 | /* counting instructions is done in all_instr_intervals() */ 101 | 102 | return (ADDRINT) (interval_ins_count_for_hpc_alignment == interval_size); 103 | } 104 | 105 | VOID stride_instr_interval_output(){ 106 | int i; 107 | 108 | UINT64 cum; 109 | 110 | output_file_stride.open(mkfilename("stride_phases_int"), ios::out|ios::app); 111 | 112 | output_file_stride << numReadInstrsAnalyzed; 113 | /* local read distribution */ 114 | cum = 0; 115 | for(i = 0; i < MAX_DISTR; i++){ 116 | cum += localReadDistrib[i]; 117 | if( (i == 0) || (i == 8) || (i == 64) || (i == 512) || (i == 4096) || (i == 32768) || (i == 262144) ){ 118 | output_file_stride << " " << cum; 119 | } 120 | if(i == 262144) 121 | break; 122 | } 123 | /* global read distribution */ 124 | cum = 0; 125 | for(i = 0; i < MAX_DISTR; i++){ 126 | cum += globalReadDistrib[i]; 127 | if( (i == 0) || (i == 8) || (i == 64) || (i == 512) || (i == 4096) || (i == 32768) || (i == 262144) ){ 128 | output_file_stride << " " << cum; 129 | } 130 | if(i == 262144) 131 | break; 132 | } 133 | output_file_stride << " " << numWriteInstrsAnalyzed; 134 | /* local write distribution */ 135 | cum = 0; 136 | for(i = 0; i < MAX_DISTR; i++){ 137 | cum += localWriteDistrib[i]; 138 | if( (i == 0) || (i == 8) || (i == 64) || (i == 512) || (i == 4096) || (i == 32768) || (i == 262144) ){ 139 | output_file_stride << " " << cum; 140 | } 141 | if(i == 262144) 142 | break; 143 | } 144 | /* global write distribution */ 145 | cum = 0; 146 | for(i = 0; i < MAX_DISTR; i++){ 147 | cum += globalWriteDistrib[i]; 148 | if( (i == 0) || (i == 8) || (i == 64) || (i == 512) || (i == 4096) || (i == 32768) ){ 149 | output_file_stride << " " << cum; 150 | } 151 | if(i == 262144){ 152 | output_file_stride << " " << cum << endl; 153 | break; 154 | } 155 | } 156 | output_file_stride.close(); 157 | } 158 | 159 | VOID stride_instr_interval_reset(){ 160 | int i; 161 | 162 | for (i = 0; i < MAX_DISTR; i++) { 163 | localReadDistrib [i] = 0; 164 | localWriteDistrib [i] = 0; 165 | globalReadDistrib [i] = 0; 166 | globalWriteDistrib [i] = 0; 167 | } 168 | numInstrsAnalyzed = 0; 169 | numReadInstrsAnalyzed = 0; 170 | numWriteInstrsAnalyzed = 0; 171 | interval_ins_count = 0; 172 | interval_ins_count_for_hpc_alignment = 0; 173 | } 174 | 175 | void stride_instr_interval(){ 176 | 177 | stride_instr_interval_output(); 178 | stride_instr_interval_reset(); 179 | } 180 | 181 | /* Finds indices for instruction at some address, given some list of index-instruction pairs 182 | * Note: the 'nth_occur' argument is needed because a single instruction can have two read memory operands (which both have a different index) */ 183 | UINT32 index_memRead_stride(int nth_occur, ADDRINT ins_addr){ 184 | 185 | UINT32 i; 186 | int j=0; 187 | for(i=1; i <= readIndex; i++){ 188 | if(indices_memRead[i] == ins_addr) 189 | j++; 190 | if(j==nth_occur) 191 | return i; /* found */ 192 | } 193 | return 0; /* not found */ 194 | } 195 | 196 | /* We don't know the static number of read/write operations until 197 | * the entire program has executed, hence we dynamically allocate the arrays */ 198 | VOID reallocate_readArray_stride(){ 199 | 200 | ADDRINT* ptr; 201 | 202 | numRead *= 2; 203 | 204 | ptr = (ADDRINT*) checked_realloc(instrRead, numRead * sizeof(ADDRINT)); 205 | /*if (ptr == (ADDRINT*) NULL) { 206 | cerr << "Not enough memory (in reallocate_readArray_stride)" << endl; 207 | exit(1); 208 | }*/ 209 | instrRead = ptr; 210 | } 211 | 212 | UINT32 index_memWrite_stride(ADDRINT ins_addr){ 213 | 214 | UINT32 i; 215 | for(i=1; i <= writeIndex; i++){ 216 | if(indices_memWrite[i] == ins_addr) 217 | return i; /* found */ 218 | } 219 | return 0; /* not found */ 220 | } 221 | 222 | 223 | VOID reallocate_writeArray_stride(){ 224 | 225 | ADDRINT* ptr; 226 | 227 | numWrite *= 2; 228 | 229 | ptr = (ADDRINT*) checked_realloc(instrWrite, numWrite * sizeof(ADDRINT)); 230 | /*if (ptr == (ADDRINT*) NULL) { 231 | cerr << "Not enough memory (in reallocate_writeArray_stride)" << endl; 232 | exit(1); 233 | }*/ 234 | instrWrite = ptr; 235 | } 236 | 237 | void register_memRead_stride(ADDRINT ins_addr){ 238 | 239 | ADDRINT* ptr; 240 | 241 | /* reallocation needed */ 242 | if(readIndex >= indices_memRead_size){ 243 | 244 | indices_memRead_size *= 2; 245 | ptr = (ADDRINT*) realloc(indices_memRead, indices_memRead_size*sizeof(ADDRINT)); 246 | /*if(ptr == (ADDRINT*)NULL){ 247 | cerr << "Could not allocate memory (realloc in register_readMem)!" << endl; 248 | exit(1); 249 | }*/ 250 | indices_memRead = ptr; 251 | 252 | } 253 | 254 | /* register instruction to index */ 255 | indices_memRead[readIndex++] = ins_addr; 256 | } 257 | 258 | void register_memWrite_stride(ADDRINT ins_addr){ 259 | 260 | ADDRINT* ptr; 261 | 262 | /* reallocation needed */ 263 | if(writeIndex >= indices_memWrite_size){ 264 | 265 | indices_memWrite_size *= 2; 266 | ptr = (ADDRINT*) realloc(indices_memWrite, indices_memWrite_size*sizeof(ADDRINT)); 267 | /*if(ptr == (ADDRINT*)NULL){ 268 | cerr << "Could not allocate memory (realloc in register_writeMem)!" << endl; 269 | exit(1); 270 | }*/ 271 | indices_memWrite = ptr; 272 | 273 | } 274 | 275 | /* register instruction to index */ 276 | indices_memWrite[writeIndex++] = ins_addr; 277 | } 278 | 279 | VOID readMem_stride(UINT32 index, ADDRINT effAddr, ADDRINT size){ 280 | 281 | ADDRINT stride; 282 | 283 | numReadInstrsAnalyzed++; 284 | 285 | /* local stride */ 286 | /* avoid negative values, has to be done like this (not stride < 0 => stride = -stride (avoid problems with unsigned values)) */ 287 | if(effAddr > instrRead[index]) 288 | stride = effAddr - instrRead[index]; 289 | else 290 | stride = instrRead[index] - effAddr; 291 | if(stride >= MAX_DISTR){ 292 | stride = MAX_DISTR-1; // trim if needed 293 | } 294 | 295 | localReadDistrib[stride]++; 296 | instrRead[index] = effAddr + size - 1; 297 | 298 | /* global stride */ 299 | /* avoid negative values, has to be done like this (not stride < 0 => stride = -stride (avoid problems with unsigned values)) */ 300 | if(effAddr > lastReadAddr) 301 | stride = effAddr - lastReadAddr; 302 | else 303 | stride = lastReadAddr - effAddr; 304 | if(stride >= MAX_DISTR){ 305 | stride = MAX_DISTR-1; // trim if needed 306 | } 307 | 308 | globalReadDistrib[stride]++; 309 | lastReadAddr = effAddr + size - 1; 310 | } 311 | 312 | VOID writeMem_stride(UINT32 index, ADDRINT effAddr, ADDRINT size){ 313 | 314 | ADDRINT stride; 315 | 316 | numWriteInstrsAnalyzed++; 317 | 318 | /* local stride */ 319 | /* avoid negative values, has to be doen like this (not stride < 0 => stride = -stride) */ 320 | if(effAddr > instrWrite[index]) 321 | stride = effAddr - instrWrite[index]; 322 | else 323 | stride = instrWrite[index] - effAddr; 324 | if(stride >= MAX_DISTR){ 325 | stride = MAX_DISTR-1; // trim if needed 326 | } 327 | 328 | localWriteDistrib[stride]++; 329 | instrWrite[index] = effAddr + size - 1; 330 | 331 | /* global stride */ 332 | /* avoid negative values, has to be doen like this (not stride < 0 => stride = -stride) */ 333 | if(effAddr > lastWriteAddr) 334 | stride = effAddr - lastWriteAddr; 335 | else 336 | stride = lastWriteAddr - effAddr; 337 | if(stride >= MAX_DISTR){ 338 | stride = MAX_DISTR-1; // trim if needed 339 | } 340 | 341 | globalWriteDistrib[stride]++; 342 | lastWriteAddr = effAddr + size - 1; 343 | } 344 | 345 | UINT32 stride_index_memRead1(ADDRINT a){ 346 | 347 | UINT32 index = index_memRead_stride(1, a); 348 | if(index < 1){ 349 | if(readIndex >= numRead){ 350 | reallocate_readArray_stride(); 351 | } 352 | index = readIndex; 353 | 354 | register_memRead_stride(a); 355 | } 356 | return index; 357 | } 358 | 359 | UINT32 stride_index_memRead2(ADDRINT a){ 360 | UINT32 index = index_memRead_stride(2, a); 361 | if(index < 1){ 362 | if(readIndex >= numRead){ 363 | reallocate_readArray_stride(); 364 | } 365 | index = readIndex; 366 | 367 | register_memRead_stride(a); 368 | } 369 | return index; 370 | } 371 | 372 | UINT32 stride_index_memWrite(ADDRINT a){ 373 | UINT32 index = index_memWrite_stride(a); 374 | if(index < 1){ 375 | if(writeIndex >= numWrite) 376 | reallocate_writeArray_stride(); 377 | index = writeIndex; 378 | register_memWrite_stride(a); 379 | } 380 | return index; 381 | } 382 | 383 | /* instrumenting (instruction level) */ 384 | VOID instrument_stride(INS ins, VOID* v){ 385 | 386 | UINT32 index; 387 | 388 | if( INS_IsMemoryRead(ins) ){ // instruction has memory read operand 389 | 390 | index = stride_index_memRead1(INS_Address(ins)); 391 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)readMem_stride, IARG_UINT32, index, IARG_MEMORYREAD_EA, IARG_MEMORYREAD_SIZE, IARG_END); 392 | 393 | if( INS_HasMemoryRead2(ins) ){ // second memory read operand 394 | 395 | index = stride_index_memRead2(INS_Address(ins)); 396 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)readMem_stride, IARG_UINT32, index, IARG_MEMORYREAD2_EA, IARG_MEMORYREAD_SIZE, IARG_END); 397 | } 398 | } 399 | 400 | if( INS_IsMemoryWrite(ins) ){ // instruction has memory write operand 401 | index = stride_index_memWrite(INS_Address(ins)); 402 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)writeMem_stride, IARG_UINT32, index, IARG_MEMORYWRITE_EA, IARG_MEMORYWRITE_SIZE, IARG_END); 403 | 404 | } 405 | 406 | /* inserting calls for counting instructions (full) is done in mica.cpp */ 407 | 408 | if(interval_size != -1){ 409 | INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)stride_instr_intervals, IARG_END); 410 | INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)stride_instr_interval, IARG_END); 411 | } 412 | } 413 | 414 | /* finishing... */ 415 | VOID fini_stride(INT32 code, VOID* v){ 416 | 417 | int i; 418 | 419 | UINT64 cum; 420 | 421 | if(interval_size == -1){ 422 | output_file_stride.open(mkfilename("stride_full_int"), ios::out|ios::trunc); 423 | } 424 | else{ 425 | output_file_stride.open(mkfilename("stride_phases_int"), ios::out|ios::app); 426 | } 427 | output_file_stride << numReadInstrsAnalyzed; 428 | /* local read distribution */ 429 | cum = 0; 430 | for(i = 0; i < MAX_DISTR; i++){ 431 | cum += localReadDistrib[i]; 432 | if( (i == 0) || (i == 8) || (i == 64) || (i == 512) || (i == 4096) || (i == 32768) || (i == 262144) ){ 433 | output_file_stride << " " << cum; 434 | } 435 | if(i == 262144) 436 | break; 437 | } 438 | /* global read distribution */ 439 | cum = 0; 440 | for(i = 0; i < MAX_DISTR; i++){ 441 | cum += globalReadDistrib[i]; 442 | if( (i == 0) || (i == 8) || (i == 64) || (i == 512) || (i == 4096) || (i == 32768) || (i == 262144) ){ 443 | output_file_stride << " " << cum; 444 | } 445 | if(i == 262144) 446 | break; 447 | } 448 | output_file_stride << " " << numWriteInstrsAnalyzed; 449 | /* local write distribution */ 450 | cum = 0; 451 | for(i = 0; i < MAX_DISTR; i++){ 452 | cum += localWriteDistrib[i]; 453 | if( (i == 0) || (i == 8) || (i == 64) || (i == 512) || (i == 4096) || (i == 32768) || (i == 262144) ){ 454 | output_file_stride << " " << cum; 455 | } 456 | if(i == 262144) 457 | break; 458 | } 459 | /* global write distribution */ 460 | cum = 0; 461 | for(i = 0; i < MAX_DISTR; i++){ 462 | cum += globalWriteDistrib[i]; 463 | if( (i == 0) || (i == 8) || (i == 64) || (i == 512) || (i == 4096) || (i == 32768) ){ 464 | output_file_stride << " " << cum; 465 | } 466 | if(i == 262144){ 467 | output_file_stride << " " << cum << endl; 468 | break; 469 | } 470 | } 471 | //output_file_stride << "number of instructions: " << total_ins_count_for_hpc_alignment << endl; 472 | output_file_stride.close(); 473 | } 474 | -------------------------------------------------------------------------------- /mica_stride.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of MICA, a Pin tool to collect 3 | * microarchitecture-independent program characteristics using the Pin 4 | * instrumentation framework. 5 | * 6 | * Please see the README.txt file distributed with the MICA release for more 7 | * information. 8 | */ 9 | 10 | #include "mica.h" 11 | 12 | void init_stride(); 13 | VOID instrument_stride(INS ins, VOID* v); 14 | VOID fini_stride(INT32 code, VOID* v); 15 | 16 | UINT32 stride_index_memRead1(ADDRINT a); 17 | UINT32 stride_index_memRead2(ADDRINT a); 18 | UINT32 stride_index_memWrite(ADDRINT a); 19 | 20 | VOID readMem_stride(UINT32 index, ADDRINT effAddr, ADDRINT size); 21 | VOID writeMem_stride(UINT32 index, ADDRINT effAdrr, ADDRINT size); 22 | 23 | VOID stride_instr_interval_output(); 24 | VOID stride_instr_interval_reset(); 25 | -------------------------------------------------------------------------------- /mica_utils.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of MICA, a Pin tool to collect 3 | * microarchitecture-independent program characteristics using the Pin 4 | * instrumentation framework. 5 | * 6 | * Please see the README.txt file distributed with the MICA release for more 7 | * information. 8 | */ 9 | 10 | /* MICA includes */ 11 | #include "mica_utils.h" 12 | 13 | /* lookup memNode for key in table 14 | * returns NULL is no such memNode is found 15 | */ 16 | memNode* lookup(nlist** table, ADDRINT key){ 17 | 18 | nlist* np; 19 | 20 | for (np = table[key % MAX_MEM_TABLE_ENTRIES]; np != (nlist*)NULL; np = np->next){ 21 | if(np-> id == key) 22 | return np->mem; 23 | } 24 | 25 | return (memNode*)NULL; 26 | } 27 | 28 | /* install new memNode in table */ 29 | memNode* install(nlist** table, ADDRINT key){ 30 | 31 | nlist* np; 32 | ADDRINT index; 33 | 34 | index = key % MAX_MEM_TABLE_ENTRIES; 35 | 36 | np = table[index]; 37 | 38 | if(np == (nlist*)NULL) { 39 | np = (nlist*)checked_malloc(sizeof(nlist)); 40 | table[index] = np; 41 | } 42 | else{ 43 | while(np->next != (nlist*)NULL){ 44 | np = np->next; 45 | } 46 | np->next = (nlist*)checked_malloc(sizeof(nlist)); 47 | np = np->next; 48 | } 49 | np->next = (nlist*)NULL; 50 | np->id = key; 51 | np->mem = (memNode*)checked_malloc(sizeof(memNode)); 52 | for(ADDRINT i = 0; i < MAX_MEM_ENTRIES; i++){ 53 | (np->mem)->timeAvailable[i] = 0; 54 | } 55 | for(ADDRINT i = 0; i < MAX_MEM_BLOCK; i++){ 56 | (np->mem)->numReferenced[i] = false; 57 | } 58 | return (np->mem); 59 | } 60 | 61 | /** 62 | * Free a nlist and set the pointer to NULL. 63 | */ 64 | void free_nlist(nlist*& np) { 65 | nlist* np_rm; 66 | while(np != (nlist*)NULL){ 67 | np_rm = np; 68 | np = np->next; 69 | free(np_rm->mem); 70 | free(np_rm); 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /mica_utils.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of MICA, a Pin tool to collect 3 | * microarchitecture-independent program characteristics using the Pin 4 | * instrumentation framework. 5 | * 6 | * Please see the README.txt file distributed with the MICA release for more 7 | * information. 8 | */ 9 | 10 | #include "mica.h" 11 | 12 | #ifndef MICA_UTILS 13 | 14 | #define MICA_UTILS 15 | 16 | 17 | /* *** utility functions *** */ 18 | 19 | #define WRAP(x) #x 20 | #define REWRAP(x) WRAP(x) 21 | #define LOCATION __BASE_FILE__ ":" __FILE__ ":" REWRAP(__LINE__) 22 | 23 | #define checked_malloc(size) ({ void *result = malloc (size); if (__builtin_expect (!result, false)) { ERROR_MSG ("Out of memory at " LOCATION "."); exit (1); }; result; }) 24 | #define checked_strdup(string) ({ char *result = strdup (string); if (__builtin_expect (!result, false)) { ERROR_MSG ("Out of memory at " LOCATION "."); exit (1); }; result; }) 25 | #define checked_realloc(ptr, size) ({ void *result = realloc (ptr, size); if (__builtin_expect (!result, false)) { ERROR_MSG ("Out of memory at " LOCATION "."); exit (1); }; result; }) 26 | 27 | 28 | /* *** struct definitions *** */ 29 | 30 | /* memory node struct */ 31 | typedef struct memNode_type{ 32 | /* ilp */ 33 | INT32 timeAvailable[MAX_MEM_ENTRIES]; 34 | /* memfootprint */ 35 | bool numReferenced [MAX_MEM_BLOCK]; 36 | } memNode; 37 | 38 | /* linked list struct */ 39 | typedef struct nlist_type { 40 | ADDRINT id; 41 | memNode* mem; 42 | struct nlist_type* next; 43 | } nlist; 44 | 45 | memNode* lookup(nlist** table, ADDRINT key); 46 | memNode* install(nlist** table, ADDRINT key); 47 | void free_nlist(nlist*& np); 48 | 49 | typedef struct ins_buffer_entry_type { 50 | ADDRINT insAddr; 51 | BOOL setRead; 52 | BOOL setWritten; 53 | BOOL setRegOpCnt; 54 | INT32 regOpCnt; 55 | INT32 regReadCnt; 56 | REG* regsRead; 57 | INT32 regWriteCnt; 58 | REG* regsWritten; 59 | ins_buffer_entry_type* next; 60 | } ins_buffer_entry; 61 | 62 | #endif 63 | -------------------------------------------------------------------------------- /tableGen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Amir H. Ashouri - 2017 4 | # (www.eecg.toronto.edu/~aashouri/) 5 | # This script looks for all MICA output files corresponds to a pid and generates a MICA table. The first row is the header and is added as well. 6 | # Tested with MICA v0.40 7 | 8 | benchmarks=* 9 | 10 | echo -n "APPLICATION_NAME DATASET totInstruction ILP32 ILP64 ILP128 ILP256 total_ins_count_for_hpc_alignment totInstruction mem-read mem-write control-flow arithmetic floating-point stack shift string sse other nop InstrFootprint64 InstrFootprint4k DataFootprint64 DataFootprint4k mem_access memReuseDist0-2 memReuseDist2-4 memReuseDist4-8 memReuseDist8-16 memReuseDist16-32 memReuseDist32-64 memReuseDist64-128 memReuseDist128-256 memReuseDist256-512 memReuseDist512-1k memReuseDist1k-2k memReuseDist2k-4k memReuseDist4k-8k memReuseDist8k-16k memReuseDist16k-32k memReuseDist32k-64k memReuseDist64k-128k memReuseDist128k-256k memReuseDist256k-512k memReuseDist512k-00 GAg_mispred_cnt_4bits PAg_mispred_cnt_4bits GAs_mispred_cnt_4bits PAs_mispred_cnt_4bits GAg_mispred_cnt_8bits PAg_mispred_cnt_8bits GAs_mispred_cnt_8bits PAs_mispred_cnt_8bits GAg_mispred_cnt_12bits PAg_mispred_cnt_12bits GAs_mispred_cnt_12bits PAs_mispred_cnt_12bits total_brCount total_transactionCount total_takenCount total_num_ops instr_reg_cnt total_reg_use_cnt total_reg_age reg_age_cnt_1 reg_age_cnt_2 reg_age_cnt_4 reg_age_cnt_8 reg_age_cnt_16 reg_age_cnt_32 reg_age_cnt_64 mem_read_cnt mem_read_local_stride_0 mem_read_local_stride_8 mem_read_local_stride_64 mem_read_local_stride_512 mem_read_local_stride_4096 mem_read_local_stride_32768 mem_read_local_stride_262144 mem_read_global_stride_0 mem_read_global_stride_8 mem_read_global_stride_64 mem_read_global_stride_512 mem_read_global_stride_4096 mem_read_global_stride_32768 mem_read_global_stride_262144 mem_write_cnt mem_write_local_stride_0 mem_write_local_stride_8 mem_write_local_stride_64 mem_write_local_stride_512 mem_write_local_stride_4096 mem_write_local_stride_32768 mem_write_local_stride_262144 mem_write_global_stride_0 mem_write_global_stride_8 mem_write_global_stride_64 mem_write_global_stride_512 mem_write_global_stride_4096 mem_write_global_stride_32768 mem_write_global_stride_262144" > micaTable.txt 11 | 12 | for i in $benchmarks 13 | do 14 | printf "$benchmarks" 15 | 16 | if [ -d "$i" ] 17 | then 18 | tmp=$PWD 19 | cd $i 20 | # *** process directory *** 21 | echo "**********************************************************" 22 | echo $i 23 | j_pid=1 24 | pidList=$(ls * |grep ilp_full_int_ |sed 's/ilp_full_int_//' |sed 's/_pin.out/ /' | tr -d "\n") 25 | for i_pid in $pidList 26 | do 27 | echo -en "\n$i dataset$j_pid " >> ../micaTable.txt 28 | cat *$i_pid* | tr -d "\n" >> ../micaTable.txt 29 | j_pid=$(($j_pid+1)) 30 | done 31 | echo "" 32 | echo "" 33 | # ************************* 34 | 35 | cd $tmp 36 | fi 37 | 38 | done 39 | 40 | 41 | --------------------------------------------------------------------------------