├── .gitignore ├── calipers_logo.png ├── CODE_OF_CONDUCT.md ├── SUPPORT.md ├── demo ├── InO.cfg ├── OoO.cfg ├── README.md ├── sample2.trace └── sample1.trace ├── Makefile ├── LICENSE ├── src ├── common │ ├── calipers_util.h │ ├── calipers_util.cpp │ ├── calipers_defs.h │ ├── calipers_types.h │ └── main.cpp ├── branch_predictor │ ├── branch_predictor.h │ └── statistical_bp.h ├── memory │ ├── cache.h │ ├── ideal_cache.h │ ├── statistical_cache.h │ └── real_cache.h ├── trace │ ├── instruction_stream.cpp │ ├── instruction_stream.h │ └── riscv_stream.h └── graph │ ├── graph.h │ ├── scoreboard_simple.h │ ├── o3_core_graph.h │ ├── inorder_core_graph.h │ ├── o3_core_graph_advanced.h │ ├── scoreboard.h │ ├── graph_util.h │ ├── graph.cpp │ └── inorder_core_graph.cpp ├── SECURITY.md └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | -------------------------------------------------------------------------------- /calipers_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/calipers/HEAD/calipers_logo.png -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /SUPPORT.md: -------------------------------------------------------------------------------- 1 | # Support 2 | 3 | ## How to file issues and get help 4 | 5 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing 6 | issues before filing new issues to avoid duplicates. For new issues, file your bug or 7 | feature request as a new Issue. 8 | 9 | For help and questions about using this project, please contact 10 | [hosseing@umich.edu](mailto:hosseing@umich.edu), [gagg@microsoft.com](mailto:gagg@microsoft.com). 11 | 12 | ## Microsoft Support Policy 13 | 14 | Support for this **PROJECT or PRODUCT** is limited to the resources listed above. 15 | -------------------------------------------------------------------------------- /demo/InO.cfg: -------------------------------------------------------------------------------- 1 | ISA RISC-V 2 | Core InO 3 | Branch_Predictor TraceB 4 | I_Cache TraceC 5 | D_Cache TraceC 6 | Fetch_Bandwidth 16 7 | Dispatch_Bandwidth 2 8 | Issue_Bandwidth 2 9 | Commit_Bandwidth 2 10 | Decode_Cycles 2 11 | Dispatch_Cycles 1 12 | Execute_To_Commit_Cycles 0 13 | Prediction_Cycles 2 14 | Misprediction_Penalty 0 15 | Mem_Issue_Bandwidth 1 16 | Mem_Commit_Bandwidth 1 17 | Max_Mem_Accesses 1 18 | Int_ALU_Count 2 19 | Int_Mul_Count 2 20 | Int_Div_Count 2 21 | FPU_Count 2 22 | LSU_Count 1 23 | Load_Dependent_Early_Issue 1 24 | Load_Early_Issue 0 25 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | FLAGS = -O2 2 | BUILD_BASE = build 3 | SRC_BASE = src 4 | SRC_DIRS = common trace graph memory branch_predictor 5 | 6 | #-------------------------------------------------------------------------------------------------# 7 | 8 | $(foreach src_dir, $(SRC_DIRS), $(eval SRCS += $(wildcard $(SRC_BASE)/$(src_dir)/*.cpp))) 9 | INCS = $(addprefix -I$(SRC_BASE)/, $(SRC_DIRS)) 10 | OBJ_DIRS = $(addprefix $(BUILD_BASE)/, $(SRC_DIRS)) 11 | OBJS = $(SRCS:$(SRC_BASE)/%.cpp=$(BUILD_BASE)/%.o) 12 | DEPS = $(OBJS:%.o=%.d) 13 | 14 | $(BUILD_BASE)/calipers: $(OBJS) 15 | $(CXX) $(FLAGS) -o $@ $^ 16 | 17 | $(BUILD_BASE)/%.o: $(SRC_BASE)/%.cpp 18 | $(CXX) $(FLAGS) $(INCS) -MMD -MP -c -o $@ $< 19 | 20 | $(OBJS): | $(OBJ_DIRS) 21 | 22 | $(OBJ_DIRS): | $(BUILD_BASE) 23 | mkdir -p $(OBJ_DIRS) 24 | 25 | $(BUILD_BASE): 26 | mkdir -p $(BUILD_BASE) 27 | 28 | .PHONY: clean 29 | 30 | clean: 31 | rm -rf $(BUILD_BASE) 32 | 33 | -include $(DEPS) 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /demo/OoO.cfg: -------------------------------------------------------------------------------- 1 | ISA RISC-V 2 | Core OoO 3 | Branch_Predictor StatisticalB 4 | Branch_Predictor_Config 90:1 # accuracy:prediction_cycles 5 | I_Cache StatisticalC 6 | I_Cache_Config 98:2:10:98:2:10 # load_hit_rate:load_hit_cycles:load_miss_cycles:store_hit_rate:store_hit_cycles:store_miss_cycles 7 | D_Cache RealC 8 | D_Cache_Config 32768:4:524288:16:1:13:100:1:10:50 # l1_size:l1_associativity:l2_size:l2_associativity:l1_load_hit_cycles:l2_load_hit_cycles:l2_load_miss_cycles:l1_store_hit_cycles:l2_store_hit_cycles:l2_store_miss_cycles 9 | Instr_Buffer_Size 192 10 | Instr_Queue_Size 64 11 | Fetch_Bandwidth 32 12 | Dispatch_Bandwidth 8 13 | Issue_Bandwidth 8 14 | Commit_Bandwidth 8 15 | Decode_Cycles 1 16 | Dispatch_Cycles 3 17 | Execute_To_Commit_Cycles 1 18 | Prediction_Cycles 0 19 | Misprediction_Penalty 2 20 | Mem_Issue_Bandwidth 8 21 | Mem_Commit_Bandwidth 8 22 | Int_ALU_Count 6 23 | Int_Mul_Div_Count 2 24 | FP_ALU_Count 4 25 | FP_Mul_Div_Count 2 26 | LSU_Count 4 27 | LQ_Size 32 28 | SQ_Size 32 29 | -------------------------------------------------------------------------------- /src/common/calipers_util.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * MIT License 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | #ifndef CALIPERS_UTIL_H 26 | #define CALIPERS_UTIL_H 27 | 28 | #include 29 | 30 | #include "calipers_types.h" 31 | 32 | using namespace std; 33 | 34 | vector split_string(string str, char c); 35 | uint64_t unsigned_diff(uint64_t a, uint64_t b); 36 | void print_instruction(Instruction& instr); 37 | 38 | #endif // CALIPERS_UTIL_H 39 | -------------------------------------------------------------------------------- /src/branch_predictor/branch_predictor.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * MIT License 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | #ifndef BRANCH_PREDICTOR_H 26 | #define BRANCH_PREDICTOR_H 27 | 28 | #include 29 | 30 | /** 31 | * The base branch predictor class 32 | */ 33 | class BranchPredictor 34 | { 35 | public: 36 | uint32_t predictionCycles; 37 | 38 | virtual bool mispredicted(uint64_t pc) = 0; 39 | }; 40 | 41 | #endif // BRANCH_PREDICTOR_H 42 | -------------------------------------------------------------------------------- /src/memory/cache.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * MIT License 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | #ifndef CACHE_H 26 | #define CACHE_H 27 | 28 | #include 29 | 30 | /** 31 | * The base cache class 32 | */ 33 | class Cache 34 | { 35 | public: 36 | virtual uint32_t loadCycles(uint64_t base, uint32_t length) = 0; 37 | virtual uint32_t storeCycles(uint64_t base, uint32_t length) = 0; 38 | virtual void printStats() {} 39 | }; 40 | 41 | #endif // CACHE_H 42 | -------------------------------------------------------------------------------- /src/memory/ideal_cache.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * MIT License 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | #ifndef IDEAL_CACHE_H 26 | #define IDEAL_CACHE_H 27 | 28 | #include 29 | 30 | #include "cache.h" 31 | 32 | /** 33 | * An ideal cache with single-cycle loads/stores 34 | */ 35 | class IdealCache: public Cache 36 | { 37 | public: 38 | uint32_t loadCycles(uint64_t base, uint32_t length) 39 | { 40 | return 1; 41 | } 42 | 43 | uint32_t storeCycles(uint64_t base, uint32_t length) 44 | { 45 | return 1; 46 | } 47 | }; 48 | 49 | #endif // IDEAL_CACHE_H 50 | -------------------------------------------------------------------------------- /src/trace/instruction_stream.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * MIT License 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | #include 26 | 27 | #include "calipers_defs.h" 28 | #include "instruction_stream.h" 29 | 30 | using namespace std; 31 | 32 | InstructionStream::InstructionStream(string trace_file_name, bool trace_bp, 33 | bool trace_icache, bool trace_dcache) : 34 | traceBP(trace_bp), 35 | traceICache(trace_icache), 36 | traceDCache(trace_dcache) 37 | { 38 | traceFile.open(trace_file_name); 39 | if (!traceFile.is_open()) 40 | { 41 | CALIPERS_ERROR("Unable to open the trace file"); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/common/calipers_util.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * MIT License 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | #include 26 | 27 | #include 28 | 29 | #include "calipers_util.h" 30 | #include "calipers_types.h" 31 | 32 | using namespace std; 33 | 34 | vector split_string(string str, char c) 35 | { 36 | vector v; 37 | size_t pos = 0; 38 | 39 | while (true) 40 | { 41 | size_t char_pos = str.find(c, pos); 42 | v.push_back(str.substr(pos, char_pos - pos)); 43 | 44 | if (char_pos == string::npos) 45 | { 46 | break; 47 | } 48 | 49 | pos = char_pos + 1; 50 | } 51 | 52 | return v; 53 | } 54 | 55 | uint64_t unsigned_diff(uint64_t a, uint64_t b) 56 | { 57 | if (a > b) 58 | { 59 | return a - b; 60 | } 61 | else 62 | { 63 | return b - a; 64 | } 65 | } 66 | 67 | void print_instruction(Instruction& instr) 68 | { 69 | } 70 | -------------------------------------------------------------------------------- /src/trace/instruction_stream.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * MIT License 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | #ifndef INSTRUCTION_STREAM_H 26 | #define INSTRUCTION_STREAM_H 27 | 28 | #include 29 | #include 30 | 31 | #include "calipers_types.h" 32 | 33 | using namespace std; 34 | 35 | /** 36 | * The base class for reading a stream of instructions from a trace file 37 | * Dervied classes define how the trace is parsed based on the ISA specifications. 38 | */ 39 | class InstructionStream 40 | { 41 | protected: 42 | ifstream traceFile; 43 | bool traceBP; // Whether the trace provides branch prdecition outcomes 44 | bool traceICache; // Whether the trace provides I-Cache access cycles 45 | bool traceDCache; // Whether the trace provides D-Cache access cycles 46 | Instruction instr; 47 | 48 | public: 49 | InstructionStream(string trace_file_name, bool trace_bp, 50 | bool trace_icache, bool trace_dcache); 51 | virtual Instruction* next() = 0; 52 | }; 53 | 54 | #endif // INSTRUCTION_STREAM_H 55 | -------------------------------------------------------------------------------- /src/branch_predictor/statistical_bp.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * MIT License 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | #ifndef STATISTICAL_BP_H 26 | #define STATISTICAL_BP_H 27 | 28 | #include 29 | 30 | #include "calipers_util.h" 31 | #include "branch_predictor.h" 32 | 33 | using namespace std; 34 | 35 | /** 36 | * A statistical/stochastic branch predictor with fixed accuracy 37 | */ 38 | class StatisticalBp : public BranchPredictor 39 | { 40 | private: 41 | float accuracy; 42 | 43 | public: 44 | StatisticalBp(string config) 45 | { 46 | vector config_vec = split_string(config, ':'); 47 | 48 | if (config_vec.size() != 2) 49 | { 50 | CALIPERS_ERROR("Invalid configuration for the statistical branch predictor"); 51 | } 52 | 53 | accuracy = stof(config_vec[0]); 54 | predictionCycles = stoi(config_vec[1]); 55 | 56 | } 57 | 58 | bool mispredicted(uint64_t pc) 59 | { 60 | int r = rand() % 1000; 61 | return (r >= 10 * accuracy); 62 | } 63 | }; 64 | 65 | #endif // STATISTICAL_BP_H 66 | -------------------------------------------------------------------------------- /demo/README.md: -------------------------------------------------------------------------------- 1 | ## Configuration 2 | 3 | Two sample configuration files are provided here: `InO.cfg` and `OoO.cfg` (for the in-order and 4 | out-of-order processor models, respectively). The common configuration parameters are: 5 | - `ISA`: Currently, only `RISC-V` is acceptable. 6 | - `Core`: Can be `InO` or `OoO`. These processor models are based on 7 | [gem5](https://www.gem5.org/)'s *MinorCPU* and *DerivO3CPU* models, respectively. 8 | - `Branch_Predictor`: Can be `TraceB` (when branch prediction information is provided in the 9 | trace) or `StatisticalB` (when the *statistical* model is used). 10 | - `Branch_Predictor_Config`: Used for configuring the branch predictor when a model (rather than 11 | the trace) is used. 12 | - `I_Cache`/`D_Cache`: Can be `TraceC` (when load/store information is provided in the 13 | trace) or `IdealC`/`StatisticalC`/`RealC` (when the *ideal*/*statistical*/*real* model is used). 14 | - `I_Cache_Config`/`D_Cache_Config`: Used for configuring the I/D-cache when a model (rather than 15 | the trace) is used. 16 | 17 | Further configuration parameters specify other aspects of the core, which may be used in one 18 | model but not in another. 19 | 20 | ## Trace 21 | 22 | Two sample trace files are provided here: `sample1.trace` and `sample2.trace`. 23 | The traces are text-based and include the following for each instruction: 24 | - `@I disassembled_instruction [@A base_address]`: The instruction and the base address of 25 | accessed data in the case of loads/stores 26 | - `@F fetch_ticks`: Clock ticks\* spent on fetching this instruction 27 | (required when an I-cache model is not used) 28 | - `@B prediction_correctness`: Correctness of branch prediction (required when a branch prediction 29 | model is not used): 0 if mispredicted; 1 if correctly predicted or the instruction is not a branch 30 | - `@M memory_access_ticks`: Clock ticks\* spent on accessing the memory (required for 31 | loads/stores and when a D-cache model is not used) 32 | 33 | Note that `sample1.trace` should be used with `InO.cfg`, because this configuration specifies 34 | that branch prediction and load/store information are provided along with the trace. 35 | Also, `sample2.trace` (where all trace lines start with `@I`) should be used with `OoO.cfg`, 36 | because this configuration specifies that particular branch prediction and cache models are used. 37 | 38 | \* The number of ticks per cycle is defined in 39 | [calipers_defs.h](../src/common/calipers_defs.h). 40 | -------------------------------------------------------------------------------- /src/common/calipers_defs.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * MIT License 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | #ifndef CALIPERS_DEFS_H 26 | #define CALIPERS_DEFS_H 27 | 28 | #include 29 | #include 30 | 31 | using namespace std; 32 | 33 | #define CALIPERS_INFO(msg) \ 34 | cerr << "CALIPERS_INFO | " << msg << endl; 35 | 36 | #define CALIPERS_WARNING(warn_msg) \ 37 | cerr << "CALIPERS_WARNING | " << warn_msg << endl; 38 | 39 | #define CALIPERS_ERROR(error_msg) \ 40 | {cerr << "CALIPERS_ERROR | " << error_msg << endl; exit(-1);} 41 | 42 | #define RAND_SEED 27302730 43 | 44 | #define TICKS_PER_CYCLE 500 // Access times in the trace are given in ticks. 45 | 46 | #define CACHE_LINE_BYTES 64 47 | #define CACHE_ADDRESS_ZEROS 6 48 | 49 | #define MAX_REG_RD 3 // Maximum number of registers read 50 | #define MAX_REG_WR 1 // Maximum number of registers written 51 | #define MAX_OPERANDS (MAX_REG_RD + MAX_REG_WR) 52 | 53 | #define INO_WINDOW 400 54 | #define MAX_PARENTS 10 55 | 56 | #define OOO_HOPPING_WINDOW 10000000 57 | #define OOO_SLIDING_WINDOW 800 58 | 59 | #define VECTOR_WIDTH 1 60 | 61 | template 62 | using sys_time = chrono::time_point; 63 | using sys_nanoseconds = sys_time; 64 | 65 | 66 | #endif // CALIPERS_DEFS_H 67 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd). 40 | 41 | -------------------------------------------------------------------------------- /src/memory/statistical_cache.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * MIT License 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | #ifndef STATISTICAL_CACHE_H 26 | #define STATISTICAL_CACHE_H 27 | 28 | #include 29 | 30 | #include "calipers_util.h" 31 | #include "cache.h" 32 | 33 | /** 34 | * A statistical/stochastic cache with fixed load/store hit rate and hit/miss cycles 35 | */ 36 | class StatisticalCache: public Cache 37 | { 38 | private: 39 | float loadHitRate; 40 | uint32_t loadHitCycles; 41 | uint32_t loadMissCycles; 42 | 43 | float storeHitRate; 44 | uint32_t storeHitCycles; 45 | uint32_t storeMissCycles; 46 | 47 | public: 48 | StatisticalCache(string config) 49 | { 50 | vector config_vec = split_string(config, ':'); 51 | if (config_vec.size() != 6) 52 | { 53 | CALIPERS_ERROR("Invalid configuration for the statistical cache"); 54 | } 55 | 56 | loadHitRate = stof(config_vec[0]); 57 | loadHitCycles = stoi(config_vec[1]); 58 | loadMissCycles = stoi(config_vec[2]); 59 | 60 | storeHitRate = stof(config_vec[3]); 61 | storeHitCycles = stoi(config_vec[4]); 62 | storeMissCycles = stoi(config_vec[5]); 63 | } 64 | 65 | uint32_t loadCycles(uint64_t base, uint32_t length) 66 | { 67 | int r = rand() % 1000; 68 | if (r >= 10 * loadHitRate) 69 | { 70 | return loadMissCycles; 71 | } 72 | else 73 | { 74 | return loadHitCycles; 75 | } 76 | } 77 | 78 | uint32_t storeCycles(uint64_t base, uint32_t length) 79 | { 80 | int r = rand() % 1000; 81 | if (r >= 10 * storeHitRate) 82 | { 83 | return storeMissCycles; 84 | } 85 | else 86 | { 87 | return storeHitCycles; 88 | } 89 | } 90 | }; 91 | 92 | #endif // STATISTICAL_CACHE_H 93 | -------------------------------------------------------------------------------- /demo/sample2.trace: -------------------------------------------------------------------------------- 1 | @I 0xd1684 ld a0, 40(s2) @A 0x989f78 2 | @I 0xd1688 c_slli a5, 32 3 | @I 0xd168a c_srli a5, 28 4 | @I 0xd168c c_add a5, a0 5 | @I 0xd168e sd s5, 0(a5) @A 0xa82130 6 | @I 0xd1692 lwu a4, 52(s2) @A 0x989f84 7 | @I 0xd1696 ld a5, 40(s2) @A 0x989f78 8 | @I 0xd169a c_slli a4, 4 9 | @I 0xd169c c_add a5, a4 10 | @I 0xd169e c_li a4, 10 11 | @I 0xd16a0 c_sw a4, 8(a5) @A 0xa82138 12 | @I 0xd16a2 lw a5, 52(s2) @A 0x989f84 13 | @I 0xd16a6 sd s3, 16(s2) @A 0x989f60 14 | @I 0xd16aa c_addiw a5, 1 15 | @I 0xd16ac sw a5, 52(s2) @A 0x989f84 16 | @I 0xd16b0 c_ldsp ra, 104(sp) @A 0x7fffffffffffeb08 17 | @I 0xd16b2 c_ldsp s0, 96(sp) @A 0x7fffffffffffeb00 18 | @I 0xd16b4 c_ldsp s1, 88(sp) @A 0x7fffffffffffeaf8 19 | @I 0xd16b6 c_ldsp s2, 80(sp) @A 0x7fffffffffffeaf0 20 | @I 0xd16b8 c_ldsp s3, 72(sp) @A 0x7fffffffffffeae8 21 | @I 0xd16ba c_ldsp s4, 64(sp) @A 0x7fffffffffffeae0 22 | @I 0xd16bc c_ldsp s5, 56(sp) @A 0x7fffffffffffead8 23 | @I 0xd16be c_ldsp s6, 48(sp) @A 0x7fffffffffffead0 24 | @I 0xd16c0 c_ldsp s7, 40(sp) @A 0x7fffffffffffeac8 25 | @I 0xd16c2 c_ldsp s8, 32(sp) @A 0x7fffffffffffeac0 26 | @I 0xd16c4 c_ldsp s9, 24(sp) @A 0x7fffffffffffeab8 27 | @I 0xd16c6 c_ldsp s10, 16(sp) @A 0x7fffffffffffeab0 28 | @I 0xd16c8 c_ldsp s11, 8(sp) @A 0x7fffffffffffeaa8 29 | @I 0xd16ca c_addi16sp sp, 112 30 | @I 0xd16cc c_jr ra 31 | @I 0xd4794 c_j -256 32 | @I 0xd4694 lw a5, 484(s1) @A 0x9845a4 33 | @I 0xd4698 c_bnez a5, 22 34 | @I 0xd469a ld s2, 456(s1) @A 0x984588 35 | @I 0xd469e addi a5, s1, 440 36 | @I 0xd46a2 sd a5, 472(s1) @A 0x984598 37 | @I 0xd46a6 addi a5, s2, 24 38 | @I 0xd46aa sd a5, 432(s1) @A 0x984570 39 | @I 0xd46ae addi a5, zero, 64 40 | @I 0xd46b2 sh a5, 6(s2) @A 0x9848e6 41 | @I 0xd46b6 lbu a4, 25(s1) @A 0x9843d9 42 | @I 0xd46ba c_li a5, 2 43 | @I 0xd46bc beq a4, a5, 198 44 | @I 0xd46c0 c_ld s0, 0(s1) @A 0x9843c0 45 | @I 0xd46c2 lui s6, 1760 46 | @I 0xd46c6 addi s6, s6, -1968 47 | @I 0xd46ca c_ld a5, 40(s1) @A 0x9843e8 48 | @I 0xd46cc c_lw a5, 36(a5) @A 0x949024 49 | @I 0xd46ce sw a5, 0(s2) @A 0x9848e0 50 | @I 0xd46d2 lwu a3, 48(s0) @A 0x989f80 51 | @I 0xd46d6 c_ld a4, 40(s0) @A 0x989f78 52 | @I 0xd46d8 c_ld a5, 0(s0) @A 0x989f50 53 | @I 0xd46da c_slli a3, 4 54 | @I 0xd46dc c_add a4, a3 55 | @I 0xd46de c_ld a4, 0(a4) @A 0xa82130 56 | @I 0xd46e0 bgeu a5, a4, 64 57 | @I 0xd46e4 addi a1, a5, 1 58 | @I 0xd46e8 c_sd a1, 0(s0) @A 0x989f50 59 | @I 0xd46ea c_ld a0, 40(s1) @A 0x9843e8 60 | @I 0xd46ec c_ld a4, 8(s0) @A 0x989f58 61 | @I 0xd46ee lbu s3, 0(a5) @A 0x40000000002dd961 62 | @I 0xd46f2 c_lw a5, 40(a0) @A 0x949028 63 | @I 0xd46f4 c_subw a1, a4 64 | @I 0xd46f6 addiw s7, s3, 0 65 | @I 0xd46fa bgeu a1, a5, 126 66 | @I 0xd46fe c_lw a5, 36(a0) @A 0x949024 67 | @I 0xd4700 c_lw a4, 32(a0) @A 0x949020 68 | @I 0xd4702 c_addw a5, a1 69 | @I 0xd4704 c_mv a1, a5 70 | @I 0xd4706 bgeu a5, a4, 50 71 | @I 0xd4738 c_sw a5, 32(a0) @A 0x949020 72 | @I 0xd473a c_j -48 73 | @I 0xd470a sw a1, 0(s2) @A 0x9848e0 74 | @I 0xd470e addi a5, zero, 126 75 | @I 0xd4712 bltu a5, s3, 1618 76 | @I 0xd4716 c_slli s3, 2 77 | @I 0xd4718 c_add s3, s6 78 | @I 0xd471a lw a5, 0(s3) @A 0x6df8d0 79 | @I 0xd471e c_jr a5 80 | @I 0xd4aec lhu a5, 6(s2) @A 0x9848e6 81 | @I 0xd4af0 lui s9, 2002 82 | @I 0xd4af4 c_lui s8, 1 83 | @I 0xd4af6 ori a5, a5, 1 84 | @I 0xd4afa sh a5, 6(s2) @A 0x9848e6 85 | @I 0xd4afe ld s3, 0(s1) @A 0x9843c0 86 | @I 0xd4b02 c_li a6, 0 87 | @I 0xd4b04 addi s9, s9, 736 88 | @I 0xd4b08 ld a4, 0(s3) @A 0x989f50 89 | @I 0xd4b0c addi s10, zero, 32 90 | @I 0xd4b10 c_li s11, 9 91 | @I 0xd4b12 lui a7, 1760 92 | @I 0xd4b16 addi s8, s8, -2048 93 | @I 0xd4b1a beq s7, s10, 70 94 | @I 0xd4b60 addi a3, a4, 1 95 | @I 0xd4b64 sd a3, 0(s3) @A 0x989f50 96 | @I 0xd4b68 lbu s7, 0(a4) @A 0x40000000002dd962 97 | @I 0xd4b6c slli a5, s7, 1 98 | @I 0xd4b70 c_add a5, s9 99 | @I 0xd4b72 lhu a5, 0(a5) @A 0x7d2320 100 | @I 0xd4b76 and a5, s8, a5 101 | -------------------------------------------------------------------------------- /src/common/calipers_types.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * MIT License 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | #ifndef CALIPERS_TYPES_H 26 | #define CALIPERS_TYPES_H 27 | 28 | #include "graph_util.h" 29 | 30 | 31 | enum CacheType 32 | { 33 | TraceC, // Provided by the trace 34 | IdealC, 35 | StatisticalC, 36 | RealC 37 | }; 38 | 39 | 40 | enum BranchPredictorType 41 | { 42 | TraceB, // Provided by the trace 43 | StatisticalB 44 | }; 45 | 46 | 47 | // An ISA might not need all the execution types defined below 48 | enum ExecutionType 49 | { 50 | IntBase, 51 | IntMul, 52 | IntDiv, 53 | FpBase, 54 | FpMul, 55 | FpDiv, 56 | Load, 57 | Store, 58 | BranchCond, 59 | BranchUncond, 60 | Syscall, 61 | Atomic, 62 | Other 63 | }; 64 | 65 | 66 | // Resource types that may cause structural hazards 67 | enum Resource 68 | { 69 | RscFetch, 70 | RscDispatch, 71 | RscIssue, 72 | RscMemIssue, 73 | RscCommit, 74 | RscMemCommit, 75 | RscIntAlu, 76 | RscIntMul, 77 | RscIntDiv, 78 | RscIntMulDiv, 79 | RscFpu, 80 | RscFpAlu, 81 | RscFpMul, 82 | RscFpDiv, 83 | RscFpMulDiv, 84 | RscLsu, 85 | }; 86 | 87 | 88 | // Queue types that may cause structural hazards 89 | enum QueueResource 90 | { 91 | RscInstrQ, 92 | RscLQ, 93 | RscSQ, 94 | }; 95 | 96 | 97 | typedef struct INSTRUCTION 98 | { 99 | uint64_t pc; 100 | uint32_t bytes; 101 | uint32_t fetchCycles; 102 | uint32_t lsCycles; 103 | bool mispredicted; 104 | 105 | int executionType; // From the ExecutionType enum 106 | // If an instruction needs multiple execution 107 | // units, define executionType as an arrays 108 | 109 | uint32_t regReadCount; 110 | int regRead[MAX_REG_RD]; 111 | 112 | uint32_t regWriteCount; 113 | int regWrite[MAX_REG_WR]; 114 | 115 | uint32_t memLoadCount; 116 | uint64_t memLoadBase; 117 | uint32_t memLoadLength; 118 | 119 | uint32_t memStoreCount; 120 | uint64_t memStoreBase; 121 | uint32_t memStoreLength; 122 | } Instruction; 123 | 124 | 125 | // A container of different types of instructions 126 | // (Used, e.g., for calculating the breakdown of critical path instructions) 127 | typedef struct INSTRUCTION_TYPES 128 | { 129 | Vector intInstructions; 130 | Vector fpInstructions; 131 | Vector loadInstructions; 132 | Vector storeInstructions; 133 | Vector branchInstructions; 134 | Vector otherInstructions; 135 | } InstructionTypes; 136 | 137 | 138 | // A container of different types of cycles 139 | // (Used, e.g., for calculating the breakdown of critical path cycles) 140 | typedef struct CYCLE_TYPES 141 | { 142 | Vector goodFetchHitCycles; 143 | Vector goodFetchMissCycles; 144 | Vector badFetchHitCycles; 145 | Vector badFetchMissCycles; 146 | Vector decodeCycles; 147 | Vector dispatchCycles; 148 | Vector intCycles; 149 | Vector fpCycles; 150 | Vector lsCycles; 151 | Vector loadL1HitCycles; 152 | Vector loadL2HitCycles; 153 | Vector loadMissCycles; 154 | Vector storeL1HitCycles; 155 | Vector storeL2HitCycles; 156 | Vector storeMissCycles; 157 | Vector branchCycles; 158 | Vector syscallCycles; 159 | Vector atomicCycles; 160 | Vector otherCycles; 161 | Vector commitCycles; 162 | } CycleTypes; 163 | 164 | 165 | 166 | // Different vertex types for different stages of an instruction in the core pipeline 167 | enum VertexType 168 | { 169 | InstrFetch = 0, 170 | InstrDispatch = 1, 171 | InstrExecute = 2, 172 | MemExecute = 3, 173 | InstrCommit = 4, 174 | Last = 4 175 | }; 176 | 177 | #endif // CALIPERS_TYPES_H 178 | -------------------------------------------------------------------------------- /src/trace/riscv_stream.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * MIT License 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | #ifndef RISCV_STREAM_H 26 | #define RISCV_STREAM_H 27 | 28 | #include 29 | #include "instruction_stream.h" 30 | 31 | /** 32 | * Defining how a RISC-V stream of instructions is parsed 33 | * Based on: "The RISC-V Instruction Set Manual" (Version 2.2) 34 | */ 35 | class RiscvStream : public InstructionStream 36 | { 37 | private: 38 | enum IntReg 39 | { 40 | zero = 0, 41 | ra = 1, 42 | sp = 2, 43 | gp = 3, 44 | tp = 4, 45 | t0 = 5, 46 | t1 = 6, 47 | t2 = 7, 48 | s0 = 8, 49 | fp = 8, 50 | s1 = 9, 51 | a0 = 10, 52 | a1 = 11, 53 | a2 = 12, 54 | a3 = 13, 55 | a4 = 14, 56 | a5 = 15, 57 | a6 = 16, 58 | a7 = 17, 59 | s2 = 18, 60 | s3 = 19, 61 | s4 = 20, 62 | s5 = 21, 63 | s6 = 22, 64 | s7 = 23, 65 | s8 = 24, 66 | s9 = 25, 67 | s10 = 26, 68 | s11 = 27, 69 | t3 = 28, 70 | t4 = 29, 71 | t5 = 30, 72 | t6 = 31, 73 | pc = 32, 74 | last = 32 75 | }; // enum IntReg 76 | 77 | enum FpReg 78 | { 79 | ft0 = IntReg::last + 1, 80 | ft1, 81 | ft2, 82 | ft3, 83 | ft4, 84 | ft5, 85 | ft6, 86 | ft7, 87 | fs0, 88 | fs1, 89 | fa0, 90 | fa1, 91 | fa2, 92 | fa3, 93 | fa4, 94 | fa5, 95 | fa6, 96 | fa7, 97 | fs2, 98 | fs3, 99 | fs4, 100 | fs5, 101 | fs6, 102 | fs7, 103 | fs8, 104 | fs9, 105 | fs10, 106 | fs11, 107 | ft8, 108 | ft9, 109 | ft10, 110 | ft11 111 | }; // enum FpReg 112 | 113 | enum Csr 114 | { 115 | Ustatus = 0x000, 116 | Fflags = 0x001, 117 | Frm = 0x002, 118 | Fcsr = 0x003, 119 | Uie = 0x004, 120 | Utvec = 0x005, 121 | Uscratch = 0x040, 122 | Uepc = 0x041, 123 | Ucause = 0x042, 124 | Utval = 0x043, 125 | Uio = 0x044, 126 | Cycle = 0xc00, 127 | Time = 0xc01, 128 | Instret = 0xc02, 129 | Cycleh = 0xc80, 130 | Timeh = 0xc81, 131 | Instreth = 0xc82 132 | }; // enum Csr 133 | 134 | unordered_map regMap; 135 | // Key: Register name, Value: Register number in the IntReg enum 136 | 137 | unordered_map opcodeToTypeMap; 138 | // Key: Opcode, Value: ExecutionType 139 | 140 | unordered_map syntaxMap; 141 | // Key: Opcode, Value: R/W characters for register read/write 142 | 143 | unordered_map memAccessMap; 144 | // Key: Opcode, Value: L/S/A character for memory load/store/atomic operations 145 | 146 | unordered_map memLengthMap; 147 | // Key: Opcode, Value: Memory access in bytes 148 | 149 | unordered_map bytesMap; 150 | // Key: Opcode, Value: Number of instruction bytes 151 | 152 | string lastInstrLine; 153 | bool readFromFile = true; 154 | 155 | void initMaps(); 156 | string parseNext(string& instr_line, size_t& current_pos); 157 | void parseInstr(string& instr_line); 158 | bool parseBranch(string& branch_line); 159 | uint32_t parseMemoryCycles(string& mem_line); 160 | uint32_t parseFetchCycles(string& fetch_line); 161 | 162 | public: 163 | RiscvStream(string trace_file_name, bool trace_bp, bool trace_icache, bool trace_dcache) : 164 | InstructionStream(trace_file_name, trace_bp, trace_icache, trace_dcache) 165 | { 166 | initMaps(); 167 | } 168 | 169 | Instruction* next(); 170 | }; 171 | 172 | #endif // RISCV_STREAM_H 173 | -------------------------------------------------------------------------------- /src/graph/graph.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * MIT License 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | #ifndef GRAPH_H 26 | #define GRAPH_H 27 | 28 | #include "calipers_types.h" 29 | #include "instruction_stream.h" 30 | #include "graph_util.h" 31 | #include "cache.h" 32 | #include "branch_predictor.h" 33 | 34 | /** 35 | * The base class for graph-based modeling of a processor 36 | */ 37 | class Graph 38 | { 39 | private: 40 | string traceFileName; 41 | string resultFileName; 42 | 43 | protected: 44 | struct VertexHash 45 | { 46 | uint64_t operator()(const Vertex& vertex) const 47 | { 48 | return (vertex.instrNum % AnalysisWindow) * 49 | (VertexType::Last + 1) + vertex.type; 50 | } 51 | }; 52 | 53 | struct VertexEqual 54 | { 55 | bool operator()(const Vertex& lhs, const Vertex& rhs) const 56 | { 57 | return (lhs.type == rhs.type) && 58 | (lhs.instrNum % AnalysisWindow == rhs.instrNum % AnalysisWindow); 59 | } 60 | }; 61 | 62 | struct ScheduleComparison 63 | { 64 | bool operator()(const pair& lhs, 65 | const pair& rhs) const 66 | { 67 | if (lhs.second != rhs.second) 68 | { 69 | return lhs.second < rhs.second; 70 | } 71 | else 72 | { 73 | return lhs.first < rhs.first; 74 | } 75 | } 76 | }; 77 | 78 | struct VertexScheduleComparison 79 | { 80 | bool operator()(const pair& lhs, const pair& rhs) 81 | { 82 | if (lhs.second != rhs.second) 83 | { 84 | return lhs.second < rhs.second; 85 | } 86 | else if (lhs.first.instrNum != rhs.first.instrNum) 87 | { 88 | return lhs.first.instrNum < rhs.first.instrNum; 89 | } 90 | else 91 | { 92 | return lhs.first.type < rhs.first.type; 93 | } 94 | } 95 | }; 96 | 97 | typedef std::set, ScheduleComparison> ScheduleSet; 98 | // First: Instruction number, Second: Critical path length 99 | // Used for per-instruction (per-InstrExecute-vertex) scheduling 100 | 101 | typedef std::set, VertexScheduleComparison> VertexScheduleSet; 102 | // First: Vertex, Second: Critical path length 103 | // Used for per-vertex scheduling (for future use cases) 104 | 105 | InstructionStream* instrStream; 106 | 107 | Cache* icache; 108 | Cache* dcache; 109 | BranchPredictor* bp; 110 | 111 | // Parameters for calculating cache misses 112 | uint32_t l1iThreshold; 113 | uint32_t l2iThreshold; 114 | uint32_t l1dThreshold; 115 | uint32_t l2dThreshold; 116 | 117 | uint32_t intAluTotalCycles; // Ugly but OK 118 | 119 | unordered_map executionType; 120 | // Key: Instruction number % AnalysisWindow, Value: ExecutionType (-1 for invalid) 121 | 122 | 123 | /*** Analysis outcome ***/ 124 | 125 | // The size Vertex-key'ed maps are controlled through the corresponding key-equal function 126 | 127 | unordered_map length; 128 | // length[v] = Length of the critical path to Vertex v 129 | //VertexToVectorMapExp lengthExp; 130 | 131 | unordered_map criticalPathCycles; 132 | // criticalPathCycles[v] = Composition of cycles on the critical path to Vertex v 133 | 134 | unordered_map criticalPathInstructions; 135 | // criticalPathInstructions[v] = Composition of instructions on the critical path to Vertex v 136 | 137 | uint64_t instructionMix[6]; 138 | // 0: int, 1: fp, 2: load, 3: store, 4: branch, 5: other 139 | 140 | // Execution time statistics 141 | uint64_t streamTime; 142 | uint64_t graphConstructionTime; 143 | uint64_t graphAnalysisTime; 144 | 145 | // Miscellaneous statistics 146 | uint64_t instrCount; 147 | uint64_t analyzedWindows; 148 | uint64_t l1iMisses; 149 | uint64_t l2iMisses; 150 | uint64_t l1dMisses; 151 | uint64_t l2dMisses; 152 | uint64_t bpMisses; 153 | uint64_t branchCount; 154 | 155 | 156 | void updateCriticalPathCycles(Vertex& parent, OutgoingEdge& e); 157 | void recordStats(bool show_details, bool hopping_window); 158 | void printEdge(Vertex& parent, OutgoingEdge& e); 159 | void printEdge(Vertex& child, IncomingEdge& e); 160 | 161 | public: 162 | static uint32_t AnalysisWindow; 163 | Graph(string trace_file_name, string result_file_name, InstructionStream* instr_stream); 164 | virtual void run() = 0; 165 | }; 166 | 167 | #endif // GRAPH_H 168 | -------------------------------------------------------------------------------- /src/graph/scoreboard_simple.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * MIT License 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | #ifndef SCOREBOARD_SIMPLE_H 26 | #define SCOREBOARD_SIMPLE_H 27 | 28 | #include "calipers_defs.h" 29 | 30 | 31 | /** 32 | * A class for keeping track of the users of different execution units 33 | */ 34 | class ScoreboardSimple 35 | { 36 | private: 37 | typedef struct RESOURCE_INSTANCE 38 | { 39 | uint32_t count; 40 | uint32_t totalCycles; 41 | uint32_t sourceIndependentCycles; // Cycles possible to proceed without source operands 42 | uint32_t nextIssueCycles; // Cycles before the next operation can be issued 43 | } ResourceInstance; 44 | 45 | 46 | // The record of the units of a particular resrouce type 47 | typedef struct RESOURCE_RECORD 48 | { 49 | uint64_t* users; // Instruction number of the last user of each unit 50 | uint32_t next; // Index of the unit to be used by the next instruction 51 | uint64_t** prevUsers; // Instruction numbers of the previous users of each unit 52 | uint32_t* pipelineHead; // Index of the pipeline head (in prevUsers) for each unit 53 | 54 | } ResourceRecord; 55 | 56 | unordered_map resources; // Key: Resource type (from enum Resource) 57 | unordered_map records; // Key: Resource type (from enum Resource) 58 | 59 | public: 60 | void initResource(int type, uint32_t count, uint32_t total_cycles, 61 | uint32_t source_independent_cycles, uint32_t next_issue_cycles) 62 | { 63 | if (resources.count(type) != 0) 64 | { 65 | CALIPERS_ERROR("Resource already initialized") 66 | } 67 | 68 | resources[type].count = count; 69 | resources[type].totalCycles = total_cycles; 70 | resources[type].sourceIndependentCycles = source_independent_cycles; 71 | resources[type].nextIssueCycles = next_issue_cycles; 72 | 73 | for (uint32_t i = 0; i < count; ++i) 74 | { 75 | records[type].users = new uint64_t[count]; 76 | records[type].prevUsers = new uint64_t*[count]; 77 | for (uint32_t j = 0; j < count; ++j) 78 | { 79 | records[type].prevUsers[j] = new uint64_t[total_cycles]; 80 | } 81 | records[type].pipelineHead = new uint32_t[count]; 82 | } 83 | 84 | initRecords(); 85 | } 86 | 87 | void initRecords() 88 | { 89 | for (auto it = records.begin(); it != records.end(); ++it) 90 | { 91 | int type = it->first; 92 | records[type].next = 0; 93 | for (uint32_t j = 0; j < resources[type].count; ++j) 94 | { 95 | records[type].users[j] = UINT64_MAX; 96 | records[type].pipelineHead[j] = 0; 97 | for (uint32_t k = 0; k < resources[type].totalCycles; ++k) 98 | { 99 | records[type].prevUsers[j][k] = UINT64_MAX; 100 | } 101 | } 102 | } 103 | } 104 | 105 | void scheduleResource(int type, uint64_t instrNum, 106 | uint32_t& instance, uint64_t& previous_instr, 107 | uint32_t& wait_cycles, uint64_t& head_of_pipeline) 108 | { 109 | instance = records[type].next; 110 | uint32_t pipeline_idx = records[type].pipelineHead[instance]; 111 | previous_instr = records[type].users[instance]; 112 | wait_cycles = resources[type].nextIssueCycles; 113 | head_of_pipeline = records[type].prevUsers[instance][pipeline_idx]; 114 | 115 | records[type].users[instance] = instrNum; 116 | records[type].next = (instance + 1) % resources[type].count; 117 | records[type].prevUsers[instance][pipeline_idx] = instrNum; 118 | records[type].pipelineHead[instance] = (pipeline_idx + 1) % resources[type].totalCycles; 119 | 120 | } 121 | 122 | uint32_t resourceCount(int type) 123 | { 124 | return resources[type].count; 125 | } 126 | 127 | uint32_t resourceTotalCycles(int type) 128 | { 129 | return resources[type].totalCycles; 130 | } 131 | 132 | uint32_t resourceSourceIndependentCycles(int type) 133 | { 134 | return resources[type].sourceIndependentCycles; 135 | } 136 | 137 | uint32_t resourceNextIssueCycles(int type) 138 | { 139 | return resources[type].nextIssueCycles; 140 | } 141 | 142 | ~ScoreboardSimple() 143 | { 144 | for (auto it = records.begin(); it != records.end(); ++it) 145 | { 146 | int type = it->first; 147 | delete[] records[type].users; 148 | for (uint32_t j = 0; j < resources[type].count; ++j) 149 | { 150 | delete[] records[type].prevUsers[j]; 151 | } 152 | delete[] records[type].prevUsers; 153 | delete[] records[type].pipelineHead; 154 | } 155 | } 156 | 157 | }; 158 | 159 | #endif // SCOREBOARD_SIMPLE_H 160 | -------------------------------------------------------------------------------- /src/graph/o3_core_graph.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * MIT License 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | #ifndef O3_CORE_GRAPH 26 | #define O3_CORE_GRAPH 27 | 28 | #include "graph.h" 29 | #include "calipers_defs.h" 30 | #include "scoreboard.h" 31 | 32 | 33 | /** 34 | * An out-of-order processor model based on gem5's DerivO3CPU 35 | * The current implementation performs a "hopping-window" analysis, i.e., 36 | * if the number of instructions is greater than OOO_HOPPING_WINDOW, 37 | * the graph is separately constructed/analyzed for windows of size 38 | * OOO_HOPPING_WINDOW (at most). 39 | */ 40 | class O3CoreGraph : public Graph 41 | { 42 | private: 43 | /*** Microarchitectural parameters ***/ 44 | 45 | uint32_t instrBufferSize; // Bandwidth is in instructions per cycle 46 | uint32_t fetchBandwidth; 47 | uint32_t dispatchBandwidth; 48 | uint32_t issueBandwidth; 49 | uint32_t commitBandwidth; 50 | uint32_t decodeCycles; 51 | uint32_t dispatchCycles; 52 | uint32_t executeToCommitCycles; 53 | uint32_t predictionCycles; 54 | uint32_t mispredictionPenalty; 55 | uint32_t memIssueBandwidth; 56 | uint32_t memCommitBandwidth; 57 | int bpType; 58 | string bpConfig; 59 | int icacheType; 60 | string icacheConfig; 61 | int dcacheType; 62 | string dcacheConfig; 63 | Scoreboard scoreboard[VECTOR_WIDTH]; // Also performs bookkeeping 64 | 65 | 66 | /*** Bookkeeping ***/ 67 | 68 | uint64_t currentIcacheLine; 69 | uint64_t lastMisprediction; 70 | uint64_t lastBranch; 71 | bool previousInstrMispredicted; 72 | bool previousWasBranch; 73 | uint64_t linearPC; 74 | uint64_t lastMemLdSt; 75 | 76 | unordered_map> regLastWrittenBy; 77 | // Key: Register, Value: 78 | 79 | unordered_map regLastWrittenByLoad; 80 | // Key: Register, Value: Whether it was written by a load 81 | 82 | pair>* ldStWindow; // Second: of address 84 | bool* ldStWindowType; // Is load? 85 | uint32_t ldStWindowPointer; 86 | 87 | unordered_map lsCycles; 88 | // Key: Instruction number % AnalysisWindow, Value: Load/store cycles (UINT32_MAX for invalid) 89 | 90 | unordered_map executionCycles; 91 | // Key: Instruction number % AnalysisWindow, Value: Execution cycles (UINT32_MAX for invalid) 92 | 93 | 94 | /*** Graph-related data structures ***/ 95 | 96 | unordered_map, VertexHash, VertexEqual> graph; 97 | // graph[v] = Vector of children of Vertex v 98 | 99 | ScheduleSet scheduleOrder[VECTOR_WIDTH]; 100 | // The set(s) of pairs sorted based on length 101 | 102 | 103 | void initBookKeeping(); 104 | void anaylzeWindow(); 105 | void model(Instruction* instr); 106 | void modelPipeline(Vertex& fetch_vertex, Vertex& dispatch_vertex, 107 | Vertex& execute_vertex, Vertex& mem_vertex, 108 | Vertex& commit_vertex, Instruction* instr, 109 | uint32_t execution_cycles); 110 | bool modelMemoryOrderConstraint(Instruction* instr, Vertex& mem_vertex); 111 | void trackDataDependencies(Instruction* instr, 112 | Vertex& execute_vertex, Vertex& mem_vertex); 113 | void modelResourceDependencies(); 114 | void addEdge(Vertex& parent, OutgoingEdge& e); 115 | void calculateCriticalPathForScheduling(); 116 | void calculateFinalCriticalPath(); 117 | 118 | public: 119 | O3CoreGraph(string trace_file_name, 120 | string result_file_name, 121 | InstructionStream* instr_stream, 122 | uint32_t instr_buffer_size, 123 | uint32_t instr_queue_size, 124 | uint32_t fetch_bandwidth, 125 | uint32_t dispatch_bandwidth, 126 | uint32_t issue_bandwidth, 127 | uint32_t commit_bandwidth, 128 | uint32_t decode_cycles, 129 | uint32_t dispatch_cycles, 130 | uint32_t execute_to_commit_cycles, 131 | uint32_t prediction_cycles, 132 | uint32_t misprediction_penalty, 133 | uint32_t mem_issue_bandwidth, 134 | uint32_t mem_commit_bandwidth, 135 | uint32_t int_alu_count, 136 | uint32_t int_mul_div_count, 137 | uint32_t fp_alu_count, 138 | uint32_t fp_mul_div_count, 139 | uint32_t lsu_count, 140 | uint32_t lq_qize, 141 | uint32_t sq_size, 142 | int bp_type, 143 | string bp_config, 144 | int icache_type, 145 | string icache_config, 146 | int dcache_type, 147 | string dcache_config); 148 | ~O3CoreGraph(); 149 | void run(); 150 | }; 151 | 152 | 153 | #endif // O3_CORE_GRAPH 154 | -------------------------------------------------------------------------------- /src/graph/inorder_core_graph.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * MIT License 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | #ifndef IN_ORDER_CORE_GRAPH_H 26 | #define IN_ORDER_CORE_GRAPH_H 27 | 28 | #include "graph.h" 29 | #include "scoreboard_simple.h" 30 | 31 | /** 32 | * An in-order processor model based on gem5's MinorCPU 33 | * The current implementation requires that branch prediction 34 | * result and load/store cycles are provided in the trace. 35 | */ 36 | class InorderCoreGraph : public Graph 37 | { 38 | private: 39 | /*** Microarchitectural parameters ***/ 40 | 41 | uint32_t fetchBandwidth; // Bandwidth is in instructions per cycle 42 | uint32_t dispatchBandwidth; // MinorCPU: decodeInputWidth 43 | uint32_t issueBandwidth; // MinorCPU: executeIssueLimit 44 | uint32_t commitBandwidth; // MinorCPU: executeCommitLimit 45 | uint32_t decodeCycles; // MinorCPU: fetch1ToFetch2ForwardDelay + fetch2ToDecodeForwardDelay 46 | uint32_t dispatchCycles; // MinorCPU: decodeToExecuteForwardDelay 47 | uint32_t executeToCommitCycles; 48 | uint32_t predictionCycles; 49 | uint32_t mispredictionPenalty; 50 | uint32_t memIssueBandwidth; 51 | uint32_t memCommitBandwidth; 52 | uint32_t maxMemAccesses; 53 | bool loadDependentEarlyIssue; 54 | bool loadEarlyIssue; 55 | uint32_t extraLoadLatency; 56 | ScoreboardSimple scoreboard; // Also performs bookkeeping 57 | 58 | 59 | /*** Bookkeeping ***/ 60 | 61 | uint64_t lastMisprediction; 62 | bool previousInstrMispredicted; 63 | bool previousWasBranch; 64 | uint64_t linearPC; 65 | 66 | unordered_map> regLastWrittenBy; 67 | // Key: Register, Value: 68 | 69 | unordered_map regLastWrittenByLoad; 70 | // Key: Register, Value: Whether it was written by a load 71 | 72 | pair* ldStWindow; // First: Load/store number, Second: Access cycles 73 | uint32_t ldStWindowPointer; 74 | uint64_t lastMemLdSt; 75 | uint64_t lastLdStCriticalNum; 76 | uint32_t lastLdStCriticalCycles; 77 | 78 | pair neededRsc[INO_WINDOW]; 79 | // Key: Instruction number % INO_WINDOW 80 | // Value.first: Type from enum Resource (-1 means don't care) 81 | // Value.second: The needed resource instance number 82 | 83 | 84 | /*** Graph-related data structures ***/ 85 | 86 | IncomingEdge miniGraph[VertexType::Last + 1][MAX_PARENTS]; 87 | // miniGraph[i] = Incoming edges to current instruction's i'th vertex 88 | 89 | uint32_t parents[VertexType::Last + 1]; 90 | // parents[i] = Number of parents of current instruction's i'th vertex 91 | 92 | 93 | void initBookKeeping(); 94 | void model(Instruction* instr); 95 | void modelPipeline(Vertex& fetch_vertex, Vertex& dispatch_vertex, 96 | Vertex& execute_vertex, Vertex& mem_vertex, 97 | Vertex& commit_vertex, Instruction* instr, 98 | uint32_t execution_cycles, 99 | unordered_map& execute_parent); 100 | void modelMemoryOrderConstraint(Vertex& mem_vertex, bool is_load, bool is_store); 101 | void trackDataDependencies(Instruction* instr, 102 | uint32_t source_independent_cycles, 103 | Vertex& execute_vertex, 104 | unordered_map& execute_parent); 105 | void modelResourceDependenciesSimple(bool is_int, bool is_Int_mul, 106 | bool is_int_div, bool is_fp, 107 | bool is_load_store, Vertex& execute_vertex, 108 | unordered_map& execute_parent); 109 | void addEdge(Vertex& parent, OutgoingEdge& e); 110 | void calculateInstructionCriticalPath(); 111 | 112 | public: 113 | InorderCoreGraph(string trace_file_name, 114 | string result_file_name, 115 | InstructionStream* instr_stream, 116 | uint32_t fetch_bandwidth, 117 | uint32_t dispatch_bandwidth, 118 | uint32_t issue_bandwidth, 119 | uint32_t commit_bandwidth, 120 | uint32_t decode_cycles, 121 | uint32_t dispatch_cycles, 122 | uint32_t execute_to_commit_cycles, 123 | uint32_t prediction_cycles, 124 | uint32_t misprediction_penalty, 125 | uint32_t mem_issue_bandwidth, 126 | uint32_t mem_commit_bandwidth, 127 | uint32_t max_mem_accesses, 128 | uint32_t int_alu_count, 129 | uint32_t int_mul_count, 130 | uint32_t int_div_count, 131 | uint32_t fpu_count, 132 | uint32_t lsu_count, 133 | bool load_dependent_early_issue, 134 | bool load_early_issue); 135 | ~InorderCoreGraph(); 136 | void run(); 137 | }; 138 | 139 | #endif // IN_ORDER_CORE_GRAPH_H 140 | -------------------------------------------------------------------------------- /demo/sample1.trace: -------------------------------------------------------------------------------- 1 | @I 0xd1684 ld a0, 40(s2) @A 0x989f78 2 | @F 0 3 | @B 1 4 | @M 1000 5 | @I 0xd1688 c_slli a5, 32 6 | @F 0 7 | @B 1 8 | @I 0xd168a c_srli a5, 28 9 | @F 0 10 | @B 1 11 | @I 0xd168c c_add a5, a0 12 | @F 0 13 | @B 1 14 | @I 0xd168e sd s5, 0(a5) @A 0xa82130 15 | @F 0 16 | @B 1 17 | @M 7000 18 | @I 0xd1692 lwu a4, 52(s2) @A 0x989f84 19 | @F 0 20 | @B 1 21 | @M 1000 22 | @I 0xd1696 ld a5, 40(s2) @A 0x989f78 23 | @F 0 24 | @B 1 25 | @M 1000 26 | @I 0xd169a c_slli a4, 4 27 | @F 0 28 | @B 1 29 | @I 0xd169c c_add a5, a4 30 | @F 0 31 | @B 1 32 | @I 0xd169e c_li a4, 10 33 | @F 0 34 | @B 1 35 | @I 0xd16a0 c_sw a4, 8(a5) @A 0xa82138 36 | @F 0 37 | @B 1 38 | @M 6000 39 | @I 0xd16a2 lw a5, 52(s2) @A 0x989f84 40 | @F 0 41 | @B 1 42 | @M 1000 43 | @I 0xd16a6 sd s3, 16(s2) @A 0x989f60 44 | @F 0 45 | @B 1 46 | @M 6500 47 | @I 0xd16aa c_addiw a5, 1 48 | @F 0 49 | @B 1 50 | @I 0xd16ac sw a5, 52(s2) @A 0x989f84 51 | @F 0 52 | @B 1 53 | @M 6500 54 | @I 0xd16b0 c_ldsp ra, 104(sp) @A 0x7fffffffffffeb08 55 | @F 0 56 | @B 1 57 | @M 1000 58 | @I 0xd16b2 c_ldsp s0, 96(sp) @A 0x7fffffffffffeb00 59 | @F 0 60 | @B 1 61 | @M 1000 62 | @I 0xd16b4 c_ldsp s1, 88(sp) @A 0x7fffffffffffeaf8 63 | @F 0 64 | @B 1 65 | @M 1000 66 | @I 0xd16b6 c_ldsp s2, 80(sp) @A 0x7fffffffffffeaf0 67 | @F 0 68 | @B 1 69 | @M 1000 70 | @I 0xd16b8 c_ldsp s3, 72(sp) @A 0x7fffffffffffeae8 71 | @F 0 72 | @B 1 73 | @M 1500 74 | @I 0xd16ba c_ldsp s4, 64(sp) @A 0x7fffffffffffeae0 75 | @F 0 76 | @B 1 77 | @M 1500 78 | @I 0xd16bc c_ldsp s5, 56(sp) @A 0x7fffffffffffead8 79 | @F 0 80 | @B 1 81 | @M 1500 82 | @I 0xd16be c_ldsp s6, 48(sp) @A 0x7fffffffffffead0 83 | @F 0 84 | @B 1 85 | @M 2000 86 | @I 0xd16c0 c_ldsp s7, 40(sp) @A 0x7fffffffffffeac8 87 | @F 500 88 | @B 1 89 | @M 1500 90 | @I 0xd16c2 c_ldsp s8, 32(sp) @A 0x7fffffffffffeac0 91 | @F 0 92 | @B 1 93 | @M 2000 94 | @I 0xd16c4 c_ldsp s9, 24(sp) @A 0x7fffffffffffeab8 95 | @F 0 96 | @B 1 97 | @M 2000 98 | @I 0xd16c6 c_ldsp s10, 16(sp) @A 0x7fffffffffffeab0 99 | @F 0 100 | @B 1 101 | @M 2000 102 | @I 0xd16c8 c_ldsp s11, 8(sp) @A 0x7fffffffffffeaa8 103 | @F 0 104 | @B 1 105 | @M 2000 106 | @I 0xd16ca c_addi16sp sp, 112 107 | @F 0 108 | @B 1 109 | @I 0xd16cc c_jr ra 110 | @F 0 111 | @B 0 112 | @I 0xd4794 c_j -256 113 | @F 500 114 | @B 1 115 | @I 0xd4694 lw a5, 484(s1) @A 0x9845a4 116 | @F 500 117 | @B 1 118 | @M 1000 119 | @I 0xd4698 c_bnez a5, 22 120 | @F 0 121 | @B 1 122 | @I 0xd469a ld s2, 456(s1) @A 0x984588 123 | @F 0 124 | @B 1 125 | @M 1000 126 | @I 0xd469e addi a5, s1, 440 127 | @F 0 128 | @B 1 129 | @I 0xd46a2 sd a5, 472(s1) @A 0x984598 130 | @F 0 131 | @B 1 132 | @M 3500 133 | @I 0xd46a6 addi a5, s2, 24 134 | @F 0 135 | @B 1 136 | @I 0xd46aa sd a5, 432(s1) @A 0x984570 137 | @F 0 138 | @B 1 139 | @M 4500 140 | @I 0xd46ae addi a5, zero, 64 141 | @F 0 142 | @B 1 143 | @I 0xd46b2 sh a5, 6(s2) @A 0x9848e6 144 | @F 0 145 | @B 1 146 | @M 4500 147 | @I 0xd46b6 lbu a4, 25(s1) @A 0x9843d9 148 | @F 0 149 | @B 1 150 | @M 1000 151 | @I 0xd46ba c_li a5, 2 152 | @F 0 153 | @B 1 154 | @I 0xd46bc beq a4, a5, 198 155 | @F 0 156 | @B 1 157 | @I 0xd46c0 c_ld s0, 0(s1) @A 0x9843c0 158 | @F 500 159 | @B 1 160 | @M 1000 161 | @I 0xd46c2 lui s6, 1760 162 | @F 0 163 | @B 1 164 | @I 0xd46c6 addi s6, s6, -1968 165 | @F 0 166 | @B 1 167 | @I 0xd46ca c_ld a5, 40(s1) @A 0x9843e8 168 | @F 0 169 | @B 1 170 | @M 1000 171 | @I 0xd46cc c_lw a5, 36(a5) @A 0x949024 172 | @F 0 173 | @B 1 174 | @M 2500 175 | @I 0xd46ce sw a5, 0(s2) @A 0x9848e0 176 | @F 0 177 | @B 1 178 | @M 7000 179 | @I 0xd46d2 lwu a3, 48(s0) @A 0x989f80 180 | @F 0 181 | @B 1 182 | @M 2000 183 | @I 0xd46d6 c_ld a4, 40(s0) @A 0x989f78 184 | @F 0 185 | @B 1 186 | @M 2000 187 | @I 0xd46d8 c_ld a5, 0(s0) @A 0x989f50 188 | @F 0 189 | @B 1 190 | @M 1500 191 | @I 0xd46da c_slli a3, 4 192 | @F 0 193 | @B 1 194 | @I 0xd46dc c_add a4, a3 195 | @F 0 196 | @B 1 197 | @I 0xd46de c_ld a4, 0(a4) @A 0xa82130 198 | @F 0 199 | @B 1 200 | @M 3500 201 | @I 0xd46e0 bgeu a5, a4, 64 202 | @F 0 203 | @B 1 204 | @I 0xd46e4 addi a1, a5, 1 205 | @F 0 206 | @B 1 207 | @I 0xd46e8 c_sd a1, 0(s0) @A 0x989f50 208 | @F 0 209 | @B 1 210 | @M 7000 211 | @I 0xd46ea c_ld a0, 40(s1) @A 0x9843e8 212 | @F 0 213 | @B 1 214 | @M 1000 215 | @I 0xd46ec c_ld a4, 8(s0) @A 0x989f58 216 | @F 500 217 | @B 1 218 | @M 1000 219 | @I 0xd46ee lbu s3, 0(a5) @A 0x40000000002dd961 220 | @F 0 221 | @B 1 222 | @M 1000 223 | @I 0xd46f2 c_lw a5, 40(a0) @A 0x949028 224 | @F 0 225 | @B 1 226 | @M 1000 227 | @I 0xd46f4 c_subw a1, a4 228 | @F 0 229 | @B 1 230 | @I 0xd46f6 addiw s7, s3, 0 231 | @F 0 232 | @B 1 233 | @I 0xd46fa bgeu a1, a5, 126 234 | @F 0 235 | @B 1 236 | @I 0xd46fe c_lw a5, 36(a0) @A 0x949024 237 | @F 0 238 | @B 1 239 | @M 1000 240 | @I 0xd4700 c_lw a4, 32(a0) @A 0x949020 241 | @F 500 242 | @B 1 243 | @M 1500 244 | @I 0xd4702 c_addw a5, a1 245 | @F 0 246 | @B 1 247 | @I 0xd4704 c_mv a1, a5 248 | @F 0 249 | @B 1 250 | @I 0xd4706 bgeu a5, a4, 50 251 | @F 0 252 | @B 0 253 | @I 0xd4738 c_sw a5, 32(a0) @A 0x949020 254 | @F 500 255 | @B 1 256 | @M 3000 257 | @I 0xd473a c_j -48 258 | @F 0 259 | @B 1 260 | @I 0xd470a sw a1, 0(s2) @A 0x9848e0 261 | @F 500 262 | @B 1 263 | @M 3000 264 | @I 0xd470e addi a5, zero, 126 265 | @F 0 266 | @B 1 267 | @I 0xd4712 bltu a5, s3, 1618 268 | @F 0 269 | @B 1 270 | @I 0xd4716 c_slli s3, 2 271 | @F 0 272 | @B 1 273 | @I 0xd4718 c_add s3, s6 274 | @F 0 275 | @B 1 276 | @I 0xd471a lw a5, 0(s3) @A 0x6df8d0 277 | @F 0 278 | @B 1 279 | @M 2000 280 | @I 0xd471e c_jr a5 281 | @F 0 282 | @B 0 283 | @I 0xd4aec lhu a5, 6(s2) @A 0x9848e6 284 | @F 500 285 | @B 1 286 | @M 1000 287 | @I 0xd4af0 lui s9, 2002 288 | @F 0 289 | @B 1 290 | @I 0xd4af4 c_lui s8, 1 291 | @F 0 292 | @B 1 293 | @I 0xd4af6 ori a5, a5, 1 294 | @F 0 295 | @B 1 296 | @I 0xd4afa sh a5, 6(s2) @A 0x9848e6 297 | @F 0 298 | @B 1 299 | @M 4500 300 | @I 0xd4afe ld s3, 0(s1) @A 0x9843c0 301 | @F 500 302 | @B 1 303 | @M 1000 304 | @I 0xd4b02 c_li a6, 0 305 | @F 0 306 | @B 1 307 | @I 0xd4b04 addi s9, s9, 736 308 | @F 0 309 | @B 1 310 | @I 0xd4b08 ld a4, 0(s3) @A 0x989f50 311 | @F 0 312 | @B 1 313 | @M 2000 314 | @I 0xd4b0c addi s10, zero, 32 315 | @F 0 316 | @B 1 317 | @I 0xd4b10 c_li s11, 9 318 | @F 0 319 | @B 1 320 | @I 0xd4b12 lui a7, 1760 321 | @F 0 322 | @B 1 323 | @I 0xd4b16 addi s8, s8, -2048 324 | @F 0 325 | @B 1 326 | @I 0xd4b1a beq s7, s10, 70 327 | @F 0 328 | @B 0 329 | @I 0xd4b60 addi a3, a4, 1 330 | @F 36500 331 | @B 1 332 | @I 0xd4b64 sd a3, 0(s3) @A 0x989f50 333 | @F 0 334 | @B 1 335 | @M 3500 336 | @I 0xd4b68 lbu s7, 0(a4) @A 0x40000000002dd962 337 | @F 0 338 | @B 1 339 | @M 1000 340 | @I 0xd4b6c slli a5, s7, 1 341 | @F 0 342 | @B 1 343 | @I 0xd4b70 c_add a5, s9 344 | @F 0 345 | @B 1 346 | @I 0xd4b72 lhu a5, 0(a5) @A 0x7d2320 347 | @F 0 348 | @B 1 349 | @M 3000 350 | @I 0xd4b76 and a5, s8, a5 351 | @F 0 352 | @B 1 353 | -------------------------------------------------------------------------------- /src/graph/o3_core_graph_advanced.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * MIT License 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | #ifndef O3_CORE_GRAPH_ADVANCED 26 | #define O3_CORE_GRAPH_ADVANCED 27 | 28 | #include "graph.h" 29 | #include "calipers_defs.h" 30 | #include "scoreboard.h" 31 | 32 | /** 33 | * An out-of-order processor model based on gem5's DerivO3CPU 34 | * This is a space-efficient implementation that performs a 35 | * "sliding-window" analysis, i.e., graph construction and analysis 36 | * are done (simultaneosly) on a window of size OOO_SLIDIING_WINDOW 37 | * that slides from the beginning to the end of instructions in the 38 | * trace. 39 | */ 40 | class O3CoreGraphAdvanced : public Graph 41 | { 42 | private: 43 | /*** Microarchitectural parameters ***/ 44 | 45 | uint32_t instrBufferSize; // Bandwidth is in instructions per cycle 46 | uint32_t fetchBandwidth; 47 | uint32_t dispatchBandwidth; 48 | uint32_t issueBandwidth; 49 | uint32_t commitBandwidth; 50 | uint32_t decodeCycles; 51 | uint32_t dispatchCycles; 52 | uint32_t executeToCommitCycles; 53 | uint32_t predictionCycles; 54 | uint32_t mispredictionPenalty; 55 | uint32_t memIssueBandwidth; 56 | uint32_t memCommitBandwidth; 57 | int bpType; 58 | string bpConfig; 59 | int icacheType; 60 | string icacheConfig; 61 | int dcacheType; 62 | string dcacheConfig; 63 | Scoreboard scoreboard[VECTOR_WIDTH]; // Also performs bookkeeping 64 | 65 | 66 | /*** Bookkeeping ***/ 67 | 68 | uint64_t currentIcacheLine; 69 | uint64_t lastMisprediction; 70 | uint64_t lastBranch; 71 | bool previousInstrMispredicted; 72 | bool previousWasBranch; 73 | uint64_t linearPC; 74 | uint64_t lastMemLdSt; 75 | uint32_t prevExecutionCycles; 76 | 77 | unordered_map> regLastWrittenBy; 78 | // Key: Register, Value: 79 | 80 | unordered_map regLastWrittenByLoad; 81 | // Key: Register, Value: Whether it was written by a load 82 | 83 | pair>* ldStWindow; // Second: of address 85 | bool* ldStWindowType; // Is load? 86 | uint32_t ldStWindowPointer; 87 | 88 | unordered_map lsCycles; 89 | // Key: Instruction number % AnalysisWindow, Value: Load/store cycles (UINT32_MAX for invalid) 90 | 91 | unordered_map executionCycles; 92 | // Key: Instruction number % AnalysisWindow, Value: Execution cycles (UINT32_MAX for invalid) 93 | 94 | 95 | /*** Graph-related data structures ***/ 96 | 97 | unordered_map, VertexHash, VertexEqual> graphChildren; 98 | // graphChildren[v] = Vector of children of Vertex v 99 | 100 | unordered_map, VertexHash, VertexEqual> graphParents; 101 | // graphParents[v] = Vector of parents of Vertex v 102 | 103 | ScheduleSet scheduleOrder[VECTOR_WIDTH]; 104 | // The set(s) of pairs sorted based on length 105 | 106 | uint64_t headInstr; 107 | // The next instruction to be brought into the analysis window 108 | 109 | uint64_t headScheduledInstr; 110 | // All intructions before headScheduledInstr have been scheduled. 111 | 112 | set alreadyScheduled[VECTOR_WIDTH]; 113 | // The set(s) of instructions (starting from headScheduledInstr) that have been scheduled 114 | 115 | uint64_t maxSchedInstrNum[VECTOR_WIDTH]; 116 | // The instruction(s) with the greatest number that has been scheduled 117 | 118 | 119 | void initBookKeeping(); 120 | void initScoreboard(); 121 | void model(Instruction* instr); 122 | void modelPipeline(Vertex& fetch_vertex, Vertex& dispatch_vertex, 123 | Vertex& execute_vertex, Vertex& mem_vertex, 124 | Vertex& commit_vertex, Instruction* instr, 125 | uint32_t execution_cycles); 126 | bool modelMemoryOrderConstraint(Instruction* instr, Vertex& mem_vertex); 127 | void trackDataDependencies(Instruction* instr, 128 | Vertex& execute_vertex, Vertex& mem_vertex); 129 | pair modelResourceDependencies(); 130 | void addEdge(Vertex& parent, OutgoingEdge& e); 131 | void calculateInstructionCriticalPath(); 132 | void updateCriticalPath(uint32_t idx, 133 | Vertex* parent1, OutgoingEdge* e1, 134 | Vertex* parent2, OutgoingEdge* e2, 135 | Vertex* parent3, OutgoingEdge* e3); 136 | 137 | public: 138 | O3CoreGraphAdvanced(string trace_file_name, 139 | string result_file_name, 140 | InstructionStream* instr_stream, 141 | uint32_t instr_buffer_size, 142 | uint32_t instr_queue_size, 143 | uint32_t fetch_bandwidth, 144 | uint32_t dispatch_bandwidth, 145 | uint32_t issue_bandwidth, 146 | uint32_t commit_bandwidth, 147 | uint32_t decode_cycles, 148 | uint32_t dispatch_cycles, 149 | uint32_t execute_to_commit_cycles, 150 | uint32_t prediction_cycles, 151 | uint32_t misprediction_penalty, 152 | uint32_t mem_issue_bandwidth, 153 | uint32_t mem_commit_bandwidth, 154 | uint32_t int_alu_count, 155 | uint32_t int_mul_div_count, 156 | uint32_t fp_alu_count, 157 | uint32_t fp_mul_div_count, 158 | uint32_t lsu_count, 159 | uint32_t lq_qize, 160 | uint32_t sq_size, 161 | int bp_type, 162 | string bp_config, 163 | int icache_type, 164 | string icache_config, 165 | int dcache_type, 166 | string dcache_config); 167 | ~O3CoreGraphAdvanced(); 168 | void run(); 169 | }; 170 | 171 | 172 | #endif // O3_CORE_GRAPH_ADVANCED 173 | -------------------------------------------------------------------------------- /src/graph/scoreboard.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * MIT License 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | #ifndef SCOREBOARD_H 26 | #define SCOREBOARD_H 27 | 28 | #include "calipers_defs.h" 29 | 30 | 31 | /** 32 | * A class for keeping track of the users of different execution units and queues 33 | */ 34 | class Scoreboard 35 | { 36 | private: 37 | typedef struct RESOURCE_INSTANCE 38 | { 39 | uint32_t count; 40 | uint32_t latency; 41 | bool pipelined; 42 | uint32_t nextAvailable; 43 | uint64_t* assignedInstrNum; 44 | int* assignedOp; // For resources that do a mixture of operations 45 | } ResourceInstance; 46 | 47 | 48 | typedef struct QUEUE_INSTANCE 49 | { 50 | uint32_t size; 51 | uint64_t* latency; 52 | uint32_t nextAvailable; 53 | uint64_t* assignedInstrNum; 54 | } QueueInstance; 55 | 56 | unordered_map resources; 57 | // Key: Resource type (from enum Resource) 58 | 59 | unordered_map mixedOperationResource; 60 | // Key: Operation type (from enum Resource) 61 | // Value: Resource type (from enum Resource) 62 | 63 | unordered_map> mixedOperationSpec; 64 | // Key: Operation type (from enum Resource) 65 | // Value.first: Operation latency 66 | // Value.second: Pipelined? 67 | 68 | unordered_map queues; 69 | // Key: Queue type (from enum QueueResource) 70 | 71 | public: 72 | void setMixedOperation(int operation_type, int resource_type, 73 | uint32_t latency, bool pipelined) 74 | { 75 | if (resources.count(resource_type) == 0) 76 | { 77 | CALIPERS_ERROR("Resource for the mixed operation not initialized yet"); 78 | } 79 | 80 | if (resources[resource_type].assignedOp == NULL) 81 | { 82 | resources[resource_type].assignedOp = new int[resources[resource_type].count]; 83 | } 84 | 85 | mixedOperationResource[operation_type] = resource_type; 86 | mixedOperationSpec[operation_type].first = latency; 87 | mixedOperationSpec[operation_type].second = pipelined; 88 | } 89 | 90 | void initResource(int resource_type, uint32_t count, 91 | uint32_t latency, bool pipelined) 92 | { 93 | if (resources.count(resource_type) != 0) 94 | { 95 | CALIPERS_ERROR("Resource already initialized"); 96 | } 97 | 98 | resources[resource_type].count = count; 99 | resources[resource_type].latency = latency; 100 | resources[resource_type].pipelined = pipelined; 101 | resources[resource_type].assignedInstrNum = new uint64_t[count]; 102 | resources[resource_type].assignedOp = NULL; 103 | resetResource(resource_type); 104 | } 105 | 106 | void resetResource(int resource_type) 107 | { 108 | resources[resource_type].nextAvailable = 0; 109 | for (uint32_t i = 0; i < resources[resource_type].count; ++i) 110 | { 111 | resources[resource_type].assignedInstrNum[i] = UINT64_MAX; 112 | } 113 | } 114 | 115 | void initQueue(int type, uint32_t size) 116 | { 117 | if (queues.count(type) != 0) 118 | { 119 | CALIPERS_ERROR("Queue already initialized"); 120 | } 121 | 122 | queues[type].size = size; 123 | queues[type].latency = new uint64_t[size]; 124 | queues[type].assignedInstrNum = new uint64_t[size]; 125 | resetQueue(type); 126 | } 127 | 128 | void resetQueue(int type) 129 | { 130 | queues[type].nextAvailable = 0; 131 | for (uint32_t i = 0; i < queues[type].size; ++i) 132 | { 133 | queues[type].assignedInstrNum[i] = UINT64_MAX; 134 | queues[type].latency[i] = 0; 135 | } 136 | } 137 | 138 | void scheduleResource(int operation_type, uint64_t instr_num, 139 | uint64_t& previous_instr, uint32_t& wait_cycles) 140 | { 141 | int resource_type; 142 | uint32_t sample_num; 143 | uint32_t latency; 144 | bool pipelined; 145 | 146 | if (mixedOperationResource.count(operation_type) == 0) 147 | { 148 | resource_type = operation_type; 149 | sample_num = resources[resource_type].nextAvailable; 150 | latency = resources[resource_type].latency; 151 | pipelined = resources[resource_type].pipelined; 152 | } 153 | else 154 | { 155 | resource_type = mixedOperationResource[operation_type]; 156 | sample_num = resources[resource_type].nextAvailable; 157 | int previous_operation_type = resources[resource_type].assignedOp[sample_num]; 158 | latency = mixedOperationSpec[previous_operation_type].first; 159 | pipelined = mixedOperationSpec[previous_operation_type].second; 160 | resources[resource_type].assignedOp[sample_num] = operation_type; 161 | } 162 | 163 | previous_instr = resources[resource_type].assignedInstrNum[sample_num]; 164 | wait_cycles = pipelined ? 1 : latency; 165 | 166 | resources[resource_type].assignedInstrNum[sample_num] = instr_num; 167 | resources[resource_type].nextAvailable = (sample_num + 1) % 168 | resources[resource_type].count; 169 | } 170 | 171 | void scheduleQueue(int type, uint64_t instr_num, uint32_t latency, 172 | uint64_t& previous_instr, uint32_t& wait_cycles) 173 | { 174 | uint32_t entry = queues[type].nextAvailable; 175 | previous_instr = queues[type].assignedInstrNum[entry]; 176 | wait_cycles = queues[type].latency[entry]; 177 | 178 | queues[type].nextAvailable = (entry + 1) % queues[type].size; 179 | queues[type].assignedInstrNum[entry] = instr_num; 180 | queues[type].latency[entry] = latency; 181 | } 182 | 183 | uint32_t getResourceCount(int type) 184 | { 185 | return resources[type].count; 186 | } 187 | 188 | uint32_t getResourceLatency(int type) 189 | { 190 | if (mixedOperationResource.count(type) == 0) 191 | { 192 | return resources[type].latency; 193 | } 194 | else 195 | { 196 | return mixedOperationSpec[type].first; 197 | } 198 | } 199 | 200 | uint32_t getQueueSize(int type) 201 | { 202 | return queues[type].size; 203 | } 204 | 205 | ~Scoreboard() 206 | { 207 | for (auto it = resources.begin(); it != resources.end(); ++it) 208 | { 209 | int type = it->first; 210 | delete[] resources[type].assignedInstrNum; 211 | if (resources[type].assignedOp != NULL) 212 | { 213 | delete[] resources[type].assignedOp; 214 | } 215 | } 216 | 217 | for (auto it = queues.begin(); it != queues.end(); ++it) 218 | { 219 | int type = it->first; 220 | delete[] queues[type].latency; 221 | delete[] queues[type].assignedInstrNum; 222 | } 223 | } 224 | }; 225 | 226 | #endif // SCOREBOARD_H 227 | -------------------------------------------------------------------------------- /src/graph/graph_util.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * MIT License 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | #ifndef GRAPH_UTIL_H 26 | #define GRAPH_UTIL_H 27 | 28 | #include 29 | #include 30 | #include 31 | 32 | #include "calipers_defs.h" 33 | 34 | using namespace std; 35 | 36 | 37 | /** 38 | * A utility class for vector-weighted edges 39 | */ 40 | class Vector 41 | { 42 | private: 43 | int64_t vec[VECTOR_WIDTH]; 44 | 45 | public: 46 | Vector(int64_t* arr, uint32_t width) 47 | { 48 | if (width != VECTOR_WIDTH) 49 | { 50 | CALIPERS_ERROR("Invalid array width in vector init"); 51 | } 52 | for (uint32_t i = 0; i < VECTOR_WIDTH; ++i) 53 | { 54 | vec[i] = arr[i]; 55 | } 56 | } 57 | 58 | Vector(int64_t* arr, uint32_t width, int64_t offset) 59 | { 60 | if (width != VECTOR_WIDTH) 61 | { 62 | CALIPERS_ERROR("Invalid array width in vector init with offset"); 63 | } 64 | for (uint32_t i = 0; i < VECTOR_WIDTH; ++i) 65 | { 66 | vec[i] = arr[i] - offset; 67 | } 68 | } 69 | 70 | Vector(const Vector& v_in, int64_t offset) 71 | { 72 | for (uint32_t i = 0; i < VECTOR_WIDTH; ++i) 73 | { 74 | vec[i] = v_in[i] - offset; 75 | } 76 | } 77 | 78 | Vector(int64_t val) 79 | { 80 | for (uint32_t i = 0; i < VECTOR_WIDTH; ++i) 81 | { 82 | vec[i] = val; 83 | } 84 | } 85 | 86 | Vector(int64_t val, uint32_t idx) 87 | { 88 | if (idx >= VECTOR_WIDTH) 89 | { 90 | CALIPERS_ERROR("Invalid index in vector init"); 91 | } 92 | for (uint32_t i = 0; i < VECTOR_WIDTH; ++i) 93 | { 94 | vec[i] = INT64_MAX; 95 | } 96 | vec[idx] = val; 97 | } 98 | Vector() : Vector(0) 99 | {} 100 | 101 | void operator=(const Vector& v_in) 102 | { 103 | for (uint32_t i = 0; i < VECTOR_WIDTH; ++i) 104 | { 105 | vec[i] = v_in[i]; 106 | } 107 | } 108 | 109 | int64_t operator[](const uint32_t idx) const 110 | { 111 | if (idx >= VECTOR_WIDTH) 112 | { 113 | CALIPERS_ERROR("Invalid index for vector element access"); 114 | } 115 | return vec[idx]; 116 | } 117 | 118 | void update(const Vector& v_in1, const Vector& v_in2) 119 | { 120 | for (uint32_t i = 0; i < VECTOR_WIDTH; ++i) 121 | { 122 | if ((v_in2[i] != INT64_MAX) && (v_in1[i] + v_in2[i] > vec[i])) 123 | { 124 | vec[i] = v_in1[i] + v_in2[i]; 125 | } 126 | } 127 | } 128 | 129 | void update(const Vector& v_in1, const Vector& v_in2, bool* mask, uint32_t width) 130 | { 131 | if (width != VECTOR_WIDTH) 132 | { 133 | CALIPERS_ERROR("Invalid output mask width in vector update"); 134 | } 135 | for (uint32_t i = 0; i < VECTOR_WIDTH; ++i) 136 | { 137 | if ((v_in2[i] != INT64_MAX) && (v_in1[i] + v_in2[i] >= vec[i])) 138 | { 139 | vec[i] = v_in1[i] + v_in2[i]; 140 | mask[i] = true; 141 | } 142 | else 143 | { 144 | mask[i] = false; 145 | } 146 | } 147 | } 148 | 149 | void maskedSet(const Vector& v_in, bool* mask, uint32_t width) 150 | { 151 | if (width != VECTOR_WIDTH) 152 | { 153 | CALIPERS_ERROR("Invalid input mask width in vector maskedSet"); 154 | } 155 | for (uint32_t i = 0; i < VECTOR_WIDTH; ++i) 156 | { 157 | if (mask[i]) 158 | { 159 | vec[i] = v_in[i]; 160 | } 161 | } 162 | } 163 | 164 | void maskedAdd(const Vector& v_in, bool* mask, uint32_t width) 165 | { 166 | if (width != VECTOR_WIDTH) 167 | { 168 | CALIPERS_ERROR("Invalid input mask width in vector maskedAdd"); 169 | } 170 | for (uint32_t i = 0; i < VECTOR_WIDTH; ++i) 171 | { 172 | if (mask[i]) 173 | { 174 | vec[i] += v_in[i]; 175 | } 176 | } 177 | } 178 | 179 | void largerThan(int64_t val, bool* mask, bool* result, uint32_t width) 180 | { 181 | if (width != VECTOR_WIDTH) 182 | { 183 | CALIPERS_ERROR("Invalid mask/result width in vector largerThan"); 184 | } 185 | for (uint32_t i = 0; i < VECTOR_WIDTH; ++i) 186 | { 187 | if (mask[i] && (vec[i] > val)) 188 | { 189 | result[i] = true; 190 | } 191 | else 192 | { 193 | result[i] = false; 194 | } 195 | } 196 | } 197 | 198 | void smallerThanOrEqual(int64_t val, bool* mask, bool* result, uint32_t width) 199 | { 200 | if (width != VECTOR_WIDTH) 201 | { 202 | CALIPERS_ERROR("Invalid mask/result width in vector smallerThanOrEqual"); 203 | } 204 | for (uint32_t i = 0; i < VECTOR_WIDTH; ++i) 205 | { 206 | if (mask[i] && (vec[i] <= val)) 207 | { 208 | result[i] = true; 209 | } 210 | else 211 | { 212 | result[i] = false; 213 | } 214 | } 215 | } 216 | 217 | void between(int64_t val1, int64_t val2, bool* mask, bool* result, uint32_t width) 218 | { 219 | if (width != VECTOR_WIDTH) 220 | { 221 | CALIPERS_ERROR("Invalid mask/result width in vector between"); 222 | } 223 | for (uint32_t i = 0; i < VECTOR_WIDTH; ++i) 224 | { 225 | if (mask[i] && (vec[i] > val1) && (vec[i] <= val2)) 226 | { 227 | result[i] = true; 228 | } 229 | else 230 | { 231 | result[i] = false; 232 | } 233 | } 234 | } 235 | 236 | string toString() 237 | { 238 | string s = ""; 239 | for (uint32_t i = 0; i < VECTOR_WIDTH; ++i) 240 | { 241 | s.append(to_string(vec[i]) + " "); 242 | } 243 | return s; 244 | } 245 | }; 246 | 247 | typedef struct VERTEX 248 | { 249 | int type; // From enum VertexType 250 | uint64_t instrNum; 251 | 252 | VERTEX(int type, uint64_t instrNum) : type(type), instrNum(instrNum) 253 | {} 254 | 255 | VERTEX() : type(0), instrNum(0) 256 | {} 257 | } Vertex; 258 | 259 | /** 260 | * An INT64_MAX entry in the weight vector denotes the corresponding edge 261 | * does not exist in that specific scenraio. An edge migh exist in 262 | * one scenario and not exist in another scenraio. This may happen, e.g., 263 | * for edges related to branch misprediction and structural hazards. 264 | */ 265 | typedef struct OUTGOING_EDGE 266 | { 267 | Vertex child; 268 | Vector weight; 269 | 270 | OUTGOING_EDGE() : child(Vertex(0, 0)), weight(0) 271 | {} 272 | 273 | OUTGOING_EDGE(Vertex child, Vector& v) : child(child) 274 | { 275 | weight = v; 276 | } 277 | 278 | OUTGOING_EDGE(Vertex child, int64_t val) : child(child), weight(val) 279 | {} 280 | 281 | OUTGOING_EDGE(Vertex child, int64_t val, uint32_t idx) : child(child), weight(val, idx) 282 | {} 283 | 284 | } OutgoingEdge; 285 | 286 | 287 | typedef struct INCOMING_EDGE 288 | { 289 | Vertex parent; 290 | Vector weight; 291 | 292 | INCOMING_EDGE() : parent(Vertex(0, 0)), weight(0) 293 | {} 294 | 295 | INCOMING_EDGE(Vertex parent, Vector& v) : parent(parent) 296 | { 297 | weight = v; 298 | } 299 | 300 | INCOMING_EDGE(Vertex parent, int64_t val) : parent(parent), weight(val) 301 | {} 302 | 303 | INCOMING_EDGE(Vertex parent, int64_t val, uint32_t idx) : parent(parent), weight(val, idx) 304 | {} 305 | 306 | } IncomingEdge; 307 | 308 | #endif // GRAPH_UTIL_H 309 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Calipers 2 | 3 | Calipers is a tool for modeling processor performance through *event-dependence graphs*. 4 | Calipers takes the program's dynamic instruction trace and a configuration file containing 5 | microarchitectural and ISA specifications of the processor. It constructs a graph that models 6 | the dependency and latency between microarchitectural events. Calipers then calculates the 7 | performance (cycles per instruction) and provides the breakdown of bottlenecks through graph 8 | analysis. 9 | 10 | For more information, please refer to our publication, 11 | "[Calipers: A Criticality-aware Framework for Modeling Processor Performance](https://dl.acm.org/doi/abs/10.1145/3524059.3532390)," in 12 | *Proceedings of the 36th ACM International Conference on Supercomputing (ICS '22)*. 13 | ([extended version on arXiv](https://arxiv.org/abs/2201.05884)) 14 | 15 | A recording of the ICS'22 conference presentation is available on [YouTube](https://www.youtube.com/watch?v=QP9uVPTna_s). 16 | 17 | ## Build Instructions 18 | 19 | The code has no particular dependencies and can be built with `make`: 20 | 21 | ```sh 22 | git clone git@github.com:microsoft/calipers.git 23 | cd calipers 24 | make 25 | ``` 26 | 27 | The code has been built and tested on Ubuntu 18.04. 28 | 29 | ## Usage Instructions 30 | 31 | ```sh 32 | ./calipers config_file trace_file result_file 33 | ``` 34 | 35 | Example: 36 | 37 | ```sh 38 | cd build 39 | ./calipers ../demo/InO.cfg ../demo/sample1.trace ./result_InO.txt 40 | ./calipers ../demo/OoO.cfg ../demo/sample2.trace ./result_OoO.txt 41 | ``` 42 | ## Directory Structure 43 | 44 | - `demo`: Contains sample configuration and trace files. 45 | Please refer to [README.md](demo/README.md) under this directory for more details. 46 | - `src`: Contains the source code of the project: 47 | - `branch_predictor`: Branch prediction information can be either provided through the trace 48 | or obtained from a model. The models are placed in this directory. Currently, it only 49 | contains a *statistical model* with configurable accuracy. 50 | - `common`: Contains the main and utility functions as well as defined constants and 51 | data types. 52 | - `graph`: Contains the graph-based modeler and analyzer for an in-order and an out-of-order 53 | processor. The latter also has a memory-efficient (advanced) implementation. 54 | - `memory`: Load and store information can be either provided through the trace 55 | or obtained from a model. The models are placed in this directory. Currently, it contains 56 | an *ideal model* (single-cycle loads/stores), a *statistical model* (configurable load/store 57 | hit rate and hit/miss cycles), and a *real model* (analytical two-layer cache with 58 | configurable size, associativity, and load/store hit/miss cycles). 59 | - `trace`: Contains the trace reader/parser. Currently, the RISC-V ISA is supported. 60 | 61 | ## Design Space Exploration 62 | 63 | There are two ways for exploring new processor designs: 64 | 1. For a given processor model (such as the ones implemented in 65 | [inorder_core_graph.cpp](src/graph/inorder_core_graph.cpp) and 66 | [o3_core_graph.cpp](src/graph/o3_core_graph.cpp), the exposed parameters (such as the ones 67 | specified in [InO.cfg](demo/InO.cfg) and [OoO.cfg](demo/OoO.cfg) for the implemented models) 68 | can be varied to configure new designs. 69 | 2. A processor model in Calipers essentially consists of microarchitectural events (graph 70 | vertices) and dependencies between them (graph edges and their weights) caused by data, 71 | control, and structural hazards. Therefore, new processors can be modeled by varying the 72 | events and/or dependencies between them. For example, in-order issue constraint is modeled 73 | by: 74 | ```cpp 75 | Vertex execute_vertex(VertexType::InstrExecute, instrCount); 76 | Vertex prev_execute_vertex(VertexType::InstrExecute, instrCount - 1); 77 | OutgoingEdge in_order_issue(execute_vertex, 0); 78 | addEdge(prev_execute_vertex, in_order_issue); 79 | ``` 80 | in `modelPipeline` in [inorder_core_graph.cpp](src/graph/inorder_core_graph.cpp), whereas 81 | out-of-order issue is modeled by first obtaining a scheduling list for the vertices of type 82 | `VertexType::InstrExecute` in [o3_core_graph.cpp](src/graph/o3_core_graph.cpp) according to 83 | data and control dependencies. 84 | 85 | ### Sample What-if Scenarios 86 | 87 | Designers often face what-if scenarios when exploring new designs, e.g., how much a specific 88 | component of the core is worth optimizing. Such scenarios can be evaluated in Calipers by 89 | manipulating the graph vertices and edges and/or adjusting edge weights. 90 | 91 | - Example 1: branch prediction. 92 | As discussed in our paper, the effect of improving the branch predictor can be evaluated by 93 | transforming the edge *E**n* → *F**n+1* to *F**n* → *F**n+1*, 94 | where instruction *n* is a branch. Therefore, by 95 | adjusting the `mispredicted` condition in the following code block (derived from `modelPipeline` 96 | in [o3_core_graph.cpp](src/graph/o3_core_graph.cpp)), we can model different scenarios. 97 | For example, by setting `mispredicted` to `false` all the time, a perfect branch predictor is modeled. 98 | ```cpp 99 | Vertex fetch_vertex(VertexType::InstrFetch, instrCount); 100 | if (mispredicted) 101 | { 102 | Vertex prev_branch_vertex(VertexType::InstrExecute, instrCount - 1); 103 | OutgoingEdge mispredicted_fetch(fetch_vertex, misprdecited_fetch_weight); 104 | addEdge(prev_branch_vertex, mispredicted_fetch); 105 | } 106 | else 107 | { 108 | Vertex prev_fetch_vertex(VertexType::InstrFetch, instrCount - 1); 109 | OutgoingEdge in_order_fetch(fetch_vertex, in_order_fetch_weight); 110 | addEdge(prev_fetch_vertex, in_order_fetch); 111 | } 112 | ``` 113 | 114 | - Example 2: value prediction. 115 | Value prediction enables instructions to continue execution even before the source data, 116 | particularly from an earlier load, is available. Since value prediction demands chip resources, 117 | it is crucial to know which loads and what fraction of them should be value-predicted for higher 118 | performance gains. Calipers models data dependency from instruction *n* to instruction *m* by the 119 | edge *E**n* → *E**m*. Removing such an edge means instruction *n* is 120 | correctly value-predicted. Different criteria for performing value prediction can be evaluated 121 | by removing the corresponding edges. For example, setting `is_value_predicted` in the following 122 | code block (derived from `trackDataDependencies` in 123 | [o3_core_graph.cpp](src/graph/o3_core_graph.cpp)) should be done according a particular criterion 124 | for selecting to-be-value-predicted loads. 125 | ```cpp 126 | if (reg_written_by_load && !is_value_predicted) 127 | { 128 | Vertex execute_vertex(VertexType::InstrExecute, instrCount); 129 | Vertex prev_mem_vertex(VertexType::MemExecute, load_num); 130 | OutgoingEdge dependence_edge(execute_vertex, data_weight); 131 | addEdge(prev_mem_vertex, dependence_edge); 132 | } 133 | ``` 134 | 135 | ### Leveraging Vectorization 136 | 137 | Calipers introduces *vectorized* graphs, wherein a vector of weights instead of a scalar value 138 | can be assigned to each edge. Vectorization allows multiple configurations to be modeled 139 | and analyzed simultaneously. Using vectorization, Calipers does not need to construct the graph 140 | from scratch in *N* separate runs, i.e., using *N* separate threads of execution, for *N* 141 | configurations. 142 | 143 | The width of the edge-weight vectors, `VECTOR_WIDTH`, is defined in 144 | [calipers_defs.h](src/common/calipers_defs.h). As an example, assume that we want to model three 145 | different decode cycles simultaneously. Therefore, `VECTOR_WIDTH` is set to 3. The decode edge 146 | can be created as follows: 147 | ```cpp 148 | Vertex fetch_vertex(VertexType::InstrFetch, instrCount); 149 | Vertex dispatch_vertex(VertexType::InstrDispatch, instrCount); 150 | int64_t decode_vec[VECTOR_WIDTH] = {base_weight, base_weight + 1, base_weight + 2}; 151 | OutgoingEdge fetch_after_dispatch(dispatch_vertex, Vector(decode_vec, VECTOR_WIDTH)); 152 | addEdge(fetch_vertex, fetch_after_dispatch); 153 | ``` 154 | In this example, other edges can be created as if their weights were scalars. For them, the 155 | elements of the weight vector will have the same value. 156 | 157 | ## Contributing 158 | 159 | This project welcomes contributions and suggestions. Most contributions require you to agree to a 160 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us 161 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com. 162 | 163 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide 164 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions 165 | provided by the bot. You will only need to do this once across all repos using our CLA. 166 | 167 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 168 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or 169 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 170 | 171 | ## Trademarks 172 | 173 | This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft 174 | trademarks or logos is subject to and must follow 175 | [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general). 176 | Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. 177 | Any use of third-party trademarks or logos are subject to those third-party's policies. 178 | -------------------------------------------------------------------------------- /src/common/main.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * MIT License 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | #include "calipers_defs.h" 32 | #include "calipers_types.h" 33 | #include "instruction_stream.h" 34 | #include "riscv_stream.h" 35 | #include "graph.h" 36 | #include "inorder_core_graph.h" 37 | #include "o3_core_graph.h" 38 | #include "o3_core_graph_advanced.h" 39 | 40 | using namespace std; 41 | 42 | uint32_t Graph::AnalysisWindow; 43 | 44 | void extract_config(string config_file_name, unordered_map& config) 45 | { 46 | ifstream config_file; 47 | config_file.open(config_file_name); 48 | if (!config_file.is_open()) 49 | { 50 | CALIPERS_ERROR("Unable to open the config file"); 51 | } 52 | 53 | string line; 54 | while (getline(config_file, line)) 55 | { 56 | istringstream iss(line); 57 | string param, val; 58 | iss >> param; 59 | iss >> val; 60 | config[param] = val; 61 | } 62 | 63 | if (config["ISA"].compare("RISC-V") != 0) 64 | { 65 | CALIPERS_ERROR("Unsupproted ISA: " << config["ISA"]); 66 | } 67 | 68 | if ((config["Core"].compare("InO") != 0) && 69 | (config["Core"].compare("OoO") != 0)) 70 | { 71 | CALIPERS_ERROR("Unsupproted core: " << config["Core"]); 72 | } 73 | 74 | if ((config["Branch_Predictor"].compare("TraceB") != 0) && 75 | (config["Branch_Predictor"].compare("StatisticalB") != 0)) 76 | { 77 | CALIPERS_ERROR("Unsupproted branch predictor: " << config["Branch_Predictor"]); 78 | } 79 | 80 | if ((config["I_Cache"].compare("TraceC") != 0) && 81 | (config["I_Cache"].compare("IdealC") != 0) && 82 | (config["I_Cache"].compare("StatisticalC") != 0) && 83 | (config["I_Cache"].compare("RealC") != 0)) 84 | { 85 | CALIPERS_ERROR("Unsupproted I-cache: " << config["I_Cache"]); 86 | } 87 | 88 | if ((config["D_Cache"].compare("TraceC") != 0) && 89 | (config["D_Cache"].compare("IdealC") != 0) && 90 | (config["D_Cache"].compare("StatisticalC") != 0) && 91 | (config["D_Cache"].compare("RealC") != 0)) 92 | { 93 | CALIPERS_ERROR("Unsupproted D-cache: " << config["D_Cache"]); 94 | } 95 | } 96 | 97 | bool use_bp_model(unordered_map& config) 98 | { 99 | return (config["Branch_Predictor"].compare("TraceB") != 0); 100 | } 101 | 102 | bool use_icache_model(unordered_map& config) 103 | { 104 | return (config["I_Cache"].compare("TraceC") != 0); 105 | } 106 | 107 | bool use_dcache_model(unordered_map& config) 108 | { 109 | return (config["D_Cache"].compare("TraceC") != 0); 110 | } 111 | 112 | int bp_type(string str) 113 | { 114 | int type; 115 | if (str.compare("TraceB") == 0) 116 | { 117 | type = BranchPredictorType::TraceB; 118 | } 119 | else // if (str.compare("StatisticalB") == 0) 120 | { 121 | type = BranchPredictorType::StatisticalB; 122 | } 123 | return type; 124 | } 125 | 126 | int cache_type(string str) 127 | { 128 | int type; 129 | if (str.compare("TraceC") == 0) 130 | { 131 | type = CacheType::TraceC; 132 | } 133 | else if (str.compare("IdealC") == 0) 134 | { 135 | type = CacheType::IdealC; 136 | } 137 | else if (str.compare("StatisticalC") == 0) 138 | { 139 | type = CacheType::StatisticalC; 140 | } 141 | else // if (str.compare("RealC") == 0) 142 | { 143 | type = CacheType::RealC; 144 | } 145 | return type; 146 | } 147 | 148 | Graph* init(char* argv[], InstructionStream* instr_stream) 149 | { 150 | srand(RAND_SEED); // For the statistical cache or branch preditor model, if used 151 | 152 | Graph* graph; 153 | unordered_map config; 154 | extract_config(argv[1], config); 155 | 156 | bool trace_bp = !use_bp_model(config); 157 | bool trace_icache = !use_icache_model(config); 158 | bool trace_dcache = !use_dcache_model(config); 159 | 160 | instr_stream = new RiscvStream(argv[2], // Trace file name 161 | trace_bp, trace_icache, trace_dcache); 162 | 163 | if (config["Core"].compare("InO") == 0) 164 | { 165 | if (!(trace_bp && trace_icache && trace_dcache)) 166 | { 167 | CALIPERS_ERROR( 168 | "Current InO model needs trace-provided branch prediction and load/store info"); 169 | } 170 | 171 | Graph::AnalysisWindow = INO_WINDOW; 172 | graph = new InorderCoreGraph(argv[2], // Trace file name 173 | argv[3], // Result file name 174 | instr_stream, 175 | stoi(config["Fetch_Bandwidth"]), 176 | stoi(config["Dispatch_Bandwidth"]), 177 | stoi(config["Issue_Bandwidth"]), 178 | stoi(config["Commit_Bandwidth"]), 179 | stoi(config["Decode_Cycles"]), 180 | stoi(config["Dispatch_Cycles"]), 181 | stoi(config["Execute_To_Commit_Cycles"]), 182 | stoi(config["Prediction_Cycles"]), 183 | stoi(config["Misprediction_Penalty"]), 184 | stoi(config["Mem_Issue_Bandwidth"]), 185 | stoi(config["Mem_Commit_Bandwidth"]), 186 | stoi(config["Max_Mem_Accesses"]), 187 | stoi(config["Int_ALU_Count"]), 188 | stoi(config["Int_Mul_Count"]), 189 | stoi(config["Int_Div_Count"]), 190 | stoi(config["FPU_Count"]), 191 | stoi(config["LSU_Count"]), 192 | stoi(config["Load_Dependent_Early_Issue"]), 193 | stoi(config["Load_Early_Issue"])); 194 | 195 | } 196 | else //if (config["Core"].compare("OoO") == 0) 197 | { 198 | Graph::AnalysisWindow = OOO_HOPPING_WINDOW; 199 | graph = new O3CoreGraph(argv[2], // Trace file name 200 | argv[3], // Result file name 201 | instr_stream, 202 | stoi(config["Instr_Buffer_Size"]), 203 | stoi(config["Instr_Queue_Size"]), 204 | stoi(config["Fetch_Bandwidth"]), 205 | stoi(config["Dispatch_Bandwidth"]), 206 | stoi(config["Issue_Bandwidth"]), 207 | stoi(config["Commit_Bandwidth"]), 208 | stoi(config["Decode_Cycles"]), 209 | stoi(config["Dispatch_Cycles"]), 210 | stoi(config["Execute_To_Commit_Cycles"]), 211 | stoi(config["Prediction_Cycles"]), 212 | stoi(config["Misprediction_Penalty"]), 213 | stoi(config["Mem_Issue_Bandwidth"]), 214 | stoi(config["Mem_Commit_Bandwidth"]), 215 | stoi(config["Int_ALU_Count"]), 216 | stoi(config["Int_Mul_Div_Count"]), 217 | stoi(config["FP_ALU_Count"]), 218 | stoi(config["FP_Mul_Div_Count"]), 219 | stoi(config["LSU_Count"]), 220 | stoi(config["LQ_Size"]), 221 | stoi(config["SQ_Size"]), 222 | bp_type(config["Branch_Predictor"]), 223 | config["Branch_Predictor_Config"], 224 | cache_type(config["I_Cache"]), 225 | config["I_Cache_Config"], 226 | cache_type(config["D_Cache"]), 227 | config["D_Cache_Config"]); 228 | /* 229 | Graph::AnalysisWindow = OOO_SLIDING_WINDOW; 230 | graph = new O3CoreGraphAdvanced(argv[2], // Trace file name 231 | argv[3], // Result file name 232 | instr_stream, 233 | stoi(config["Instr_Buffer_Size"]), 234 | stoi(config["Instr_Queue_Size"]), 235 | stoi(config["Fetch_Bandwidth"]), 236 | stoi(config["Dispatch_Bandwidth"]), 237 | stoi(config["Issue_Bandwidth"]), 238 | stoi(config["Commit_Bandwidth"]), 239 | stoi(config["Decode_Cycles"]), 240 | stoi(config["Dispatch_Cycles"]), 241 | stoi(config["Execute_To_Commit_Cycles"]), 242 | stoi(config["Prediction_Cycles"]), 243 | stoi(config["Misprediction_Penalty"]), 244 | stoi(config["Mem_Issue_Bandwidth"]), 245 | stoi(config["Mem_Commit_Bandwidth"]), 246 | stoi(config["Int_ALU_Count"]), 247 | stoi(config["Int_Mul_Div_Count"]), 248 | stoi(config["FP_ALU_Count"]), 249 | stoi(config["FP_Mul_Div_Count"]), 250 | stoi(config["LSU_Count"]), 251 | stoi(config["LQ_Size"]), 252 | stoi(config["SQ_Size"]), 253 | bp_type(config["Branch_Predictor"]), 254 | config["Branch_Predictor_Config"], 255 | cache_type(config["I_Cache"]), 256 | config["I_Cache_Config"], 257 | cache_type(config["D_Cache"]), 258 | config["D_Cache_Config"]); 259 | */ 260 | } 261 | 262 | return graph; 263 | } 264 | 265 | void finish(InstructionStream* instr_stream, Graph* graph) 266 | { 267 | delete instr_stream; 268 | delete graph; 269 | } 270 | 271 | int main(int argc, char* argv[]) 272 | { 273 | if (argc != 4) 274 | { 275 | CALIPERS_ERROR("Usage --> arg1: config file, arg2: trace file, arg3: result file"); 276 | } 277 | 278 | InstructionStream* instr_stream; 279 | Graph* graph; 280 | 281 | graph = init(argv, instr_stream); 282 | graph->run(); 283 | finish(instr_stream, graph); 284 | 285 | return 0; 286 | } 287 | -------------------------------------------------------------------------------- /src/memory/real_cache.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * MIT License 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | #ifndef REAL_CACHE_H 26 | #define REAL_CACHE_H 27 | 28 | #include 29 | 30 | #include "calipers_util.h" 31 | 32 | #define MAX_WAYS 16 33 | 34 | const uint64_t HIT = 1; 35 | const uint64_t MISS = 0; 36 | 37 | typedef struct CACHE_LINE 38 | { 39 | uint32_t valid; 40 | uint32_t dirty; 41 | uint64_t tag; 42 | } CacheLine; 43 | 44 | typedef struct CACHE_SET 45 | { 46 | CacheLine line[MAX_WAYS]; 47 | } CacheSet; 48 | 49 | class MyCache 50 | { 51 | public: 52 | uint64_t numSets; 53 | uint64_t numWays; 54 | uint64_t lineSize; 55 | 56 | CacheSet* sets; 57 | CacheLine lastEvictedLine; // For checking write-backs 58 | 59 | // Stats 60 | uint64_t statReadAccess; 61 | uint64_t statWriteAccess; 62 | uint64_t statReadMiss; 63 | uint64_t statWriteMiss; 64 | uint64_t statDirtyEvicts; // How many dirty lines were evicted? 65 | 66 | // PLRU replacement 67 | uint64_t* plruTree; 68 | uint64_t plruNumLevels; 69 | uint64_t plruEffecAssoc; 70 | 71 | // Stack hits 72 | deque* lruStack; // Try list as well 73 | uint64_t lruStackHit[16]; 74 | 75 | // Find replacement victim 76 | // Return value should be 0-15 or 16 (bypass) 77 | uint64_t getVictimInSet(uint64_t set) 78 | { 79 | // PLRU replacement 80 | uint64_t victim = 0; 81 | uint64_t index = 0; 82 | uint64_t bit; 83 | 84 | for (uint64_t i = 0; i < plruNumLevels; ++i) 85 | { 86 | bit = (plruTree[set] >> index) & 1; 87 | victim += bit ? (plruEffecAssoc >> (i + 1)) : 0; 88 | index = bit ? ((index * 2) + 2) : ((index * 2) + 1); 89 | } 90 | 91 | victim = (victim > (numWays - 1)) ? (numWays - 1) : victim; 92 | 93 | return victim; 94 | } 95 | 96 | // Called on every cache hit and cache fill 97 | void updateReplacementState(uint64_t set, uint64_t way) 98 | { 99 | uint64_t index = 0; 100 | uint64_t bit; 101 | 102 | for (int64_t i = plruNumLevels - 1; i >= 0; --i) 103 | { 104 | bit = (way >> i) & 1; 105 | if (bit) 106 | { 107 | plruTree[set] &= ~(1 << index); 108 | } 109 | else 110 | { 111 | plruTree[set] |= uint64_t(1) << index; 112 | } 113 | 114 | index = bit ? (index * 2) + 2 : (index * 2) + 1; 115 | } 116 | } 117 | 118 | // Initialize replacement state 119 | void initReplacementState() 120 | { 121 | plruTree = new uint64_t[numSets]; 122 | plruNumLevels = 0; 123 | plruEffecAssoc = 1; 124 | 125 | while (plruEffecAssoc < numWays) 126 | { 127 | plruEffecAssoc <<= 1; 128 | } 129 | 130 | uint64_t assoc = numWays; 131 | while (true) 132 | { 133 | assoc /= 2; 134 | if (!assoc) 135 | { 136 | break; 137 | } 138 | ++plruNumLevels; 139 | } 140 | 141 | // LRU stack 142 | lruStack = new deque[numSets]; 143 | for (int i = 0; i < 16; ++i) 144 | { 145 | lruStackHit[i] = 0; 146 | } 147 | } 148 | }; 149 | 150 | class CacheInternals 151 | { 152 | private: 153 | 154 | MyCache* cacheNew(uint64_t size, uint64_t assoc, uint64_t line_size) 155 | { 156 | if (assoc > MAX_WAYS) 157 | { 158 | CALIPERS_ERROR("Max number of cache ways is " << MAX_WAYS); 159 | } 160 | 161 | MyCache* c = new MyCache; 162 | c->numWays = assoc; 163 | c->lineSize = line_size; 164 | 165 | // Determine number of sets, and init the cache 166 | c->numSets = size / (line_size * assoc); 167 | c->sets = new CacheSet[c->numSets]; 168 | 169 | // Counters 170 | c->statReadAccess = 0; 171 | c->statWriteAccess = 0; 172 | c->statReadMiss = 0; 173 | c->statWriteMiss = 0; 174 | c->statDirtyEvicts = 0; 175 | 176 | c->initReplacementState(); 177 | 178 | return c; 179 | } 180 | 181 | // Copy victim into lastEvictedLine for tracking write-backs 182 | // type 0: load, read 183 | // type 1: store (RFO), full cache line writes 184 | // type 2: write-back 185 | // Should fix so that you can't hit the line right after miss. 186 | // Keep ready-cycle by propagating latency. 187 | uint32_t cacheAccessInstall(MyCache* c, uint64_t p_addr, uint32_t type) 188 | { 189 | uint32_t outcome = MISS; 190 | uint64_t line_addr = p_addr / c->lineSize; 191 | 192 | uint64_t way = 0; 193 | uint64_t ii = 0; 194 | uint64_t set = (p_addr / c->lineSize) % c->numSets; 195 | 196 | if (type == 0) 197 | { 198 | ++(c->statReadAccess); 199 | } 200 | else 201 | { 202 | ++(c->statWriteAccess); 203 | } 204 | 205 | for (ii = 0; ii < c->numWays; ++ii) 206 | { 207 | if (c->sets[set].line[ii].valid && (c->sets[set].line[ii].tag == line_addr)) 208 | { 209 | outcome = HIT; 210 | way = ii; 211 | break; 212 | } 213 | } 214 | 215 | // LRU stack calculation 216 | auto lruStackPtr = &(c->lruStack[set]); 217 | int lruDepthCtr = 0; 218 | bool lruStackHit = MISS; 219 | for (auto it = lruStackPtr->begin(); it != lruStackPtr->end(); ++it) 220 | { 221 | if (*it == line_addr) 222 | { 223 | ++c->lruStackHit[lruDepthCtr]; 224 | if (lruDepthCtr != 0) 225 | { 226 | lruStackPtr->erase(it); 227 | lruStackPtr->emplace_front(line_addr); 228 | } 229 | lruStackHit = HIT; 230 | break; 231 | } 232 | ++lruDepthCtr; 233 | if (lruDepthCtr == 16) 234 | { 235 | // Missed; erase last element 236 | lruStackPtr->erase(it); 237 | break; 238 | } 239 | } 240 | 241 | if (lruStackHit == MISS) 242 | { 243 | lruStackPtr->emplace_front(line_addr); 244 | } 245 | 246 | if (outcome == HIT) 247 | { 248 | if ((type == 1) || (type == 2)) 249 | { 250 | c->sets[set].line[ii].dirty = true; 251 | } 252 | c->updateReplacementState(set, way); 253 | return HIT; 254 | } 255 | 256 | // Got miss, victim and install 257 | uint64_t victim_way = c->getVictimInSet(set); // Pass next reuse into back 258 | if (victim_way < c->numWays) 259 | { 260 | if (c->sets[set].line[victim_way].dirty) 261 | { 262 | ++(c->statDirtyEvicts); 263 | } 264 | 265 | c->lastEvictedLine.valid = c->sets[set].line[victim_way].valid; 266 | c->lastEvictedLine.tag = c->sets[set].line[victim_way].tag; 267 | c->lastEvictedLine.dirty = c->sets[set].line[victim_way].dirty; 268 | 269 | c->sets[set].line[victim_way].valid = true; 270 | c->sets[set].line[victim_way].tag = line_addr; 271 | c->sets[set].line[victim_way].dirty = ((type == 1) || (type == 2)) ? true : false; 272 | 273 | c->updateReplacementState(set, victim_way); 274 | } 275 | 276 | if (type == 0) 277 | { 278 | ++(c->statReadMiss); 279 | } 280 | else 281 | { 282 | ++(c->statWriteMiss); 283 | } 284 | 285 | return MISS; 286 | } 287 | 288 | 289 | public: 290 | uint64_t sInstructionCount; 291 | MyCache* l1cache; 292 | MyCache* l2cache; 293 | MyCache* l3cache; 294 | uint64_t pCacheConfig; 295 | 296 | CacheInternals(uint64_t cache_config, 297 | uint32_t l1_size, uint32_t l1_assoc, 298 | uint32_t l2_size, uint32_t l2_assoc) 299 | { 300 | sInstructionCount = 0; 301 | pCacheConfig = cache_config; 302 | 303 | switch (pCacheConfig) 304 | { 305 | case 0: 306 | break; 307 | case 1: 308 | break; 309 | case 2: 310 | l1cache = cacheNew(32 * 1024, 4 , 64); // 32 KB, 4-way 311 | break; 312 | case 3: 313 | l1cache = cacheNew(l1_size, l1_assoc, 64); 314 | l2cache = cacheNew(l2_size, l2_assoc, 64); 315 | break; 316 | case 4: 317 | l1cache = cacheNew(32 * 1024, 4, 64); // 32 KB, 4-way 318 | l2cache = cacheNew(256 * 1024, 8, 64); // 256 KB, 8-way 319 | l3cache = cacheNew(2 * 1024 * 1024, 8, 64); // 2048 KB, 8-way 320 | break; 321 | default: 322 | break; 323 | } 324 | } 325 | 326 | ~CacheInternals() 327 | { 328 | switch (pCacheConfig) 329 | { 330 | case 0: 331 | break; 332 | case 1: 333 | break; 334 | case 2: 335 | delete l1cache->sets; 336 | delete l1cache; 337 | break; 338 | case 3: 339 | delete l1cache->sets; 340 | delete l1cache; 341 | delete l2cache->sets; 342 | delete l2cache; 343 | break; 344 | case 4: 345 | delete l1cache->sets; 346 | delete l1cache; 347 | delete l2cache->sets; 348 | delete l2cache; 349 | delete l3cache->sets; 350 | delete l3cache; 351 | break; 352 | default: 353 | break; 354 | } 355 | } 356 | 357 | uint32_t memoryAccess(uint64_t addr, uint32_t type) 358 | { 359 | uint32_t outcome = false; 360 | uint32_t hit_level = 5; 361 | 362 | switch (pCacheConfig) 363 | { 364 | case 0: // Perfect mem 365 | hit_level = 0; 366 | break; 367 | case 1: // Perfect l1 368 | hit_level = 1; 369 | break; 370 | case 2: // l1, perfect l2 371 | outcome = cacheAccessInstall(l1cache, addr, type); 372 | if (type == 1) // ASSUME FULL CACHE LINE WRITES. Stores don't propagate. 373 | { 374 | hit_level = 1; 375 | break; 376 | } 377 | if (outcome != false) // Should stores be single latency or L1 cache latency? 378 | { 379 | hit_level = 1; 380 | } 381 | else 382 | { 383 | hit_level = 2; 384 | } 385 | break; 386 | case 3: // l1 + l2, perfect l3 387 | outcome = cacheAccessInstall(l1cache, addr, type); 388 | if (type == 1) // ASSUME FULL CACHE LINE WRITES. Stores don't propagate. 389 | { 390 | hit_level = 1; 391 | break; 392 | } 393 | if (outcome != false) 394 | { 395 | hit_level = 1; 396 | } 397 | else 398 | { 399 | outcome = cacheAccessInstall(l2cache, addr, 0); 400 | if (outcome != false) 401 | { 402 | hit_level = 2; 403 | } 404 | else 405 | { 406 | hit_level = 3; 407 | } 408 | if (l1cache->lastEvictedLine.valid && l1cache->lastEvictedLine.dirty) 409 | { 410 | cacheAccessInstall(l2cache, 411 | l1cache->lastEvictedLine.tag * l1cache->lineSize, 2); 412 | } 413 | } 414 | break; 415 | case 4: // l1 + l2 + l3, fixed latency mem 416 | outcome = cacheAccessInstall(l1cache, addr, type); 417 | if (type == 1) // ASSUME FULL CACHE LINE WRITES. Stores don't propagate. 418 | { 419 | hit_level = 1; 420 | break; 421 | } 422 | if (outcome != false) 423 | { 424 | hit_level = 1; 425 | } 426 | else 427 | { 428 | outcome = cacheAccessInstall(l2cache, addr, 0); // Demand access after l2 429 | if (outcome != false) 430 | { 431 | hit_level = 2; 432 | } 433 | else 434 | { 435 | outcome = cacheAccessInstall(l3cache, addr, 0); 436 | if (outcome != false) 437 | { 438 | hit_level = 3; 439 | } 440 | else 441 | { 442 | hit_level = 4; 443 | } 444 | if (l2cache->lastEvictedLine.valid && l2cache->lastEvictedLine.dirty) 445 | { 446 | cacheAccessInstall(l3cache, 447 | l2cache->lastEvictedLine.tag * l2cache->lineSize, 2); 448 | } 449 | } 450 | if (l1cache->lastEvictedLine.valid && l1cache->lastEvictedLine.dirty) 451 | { 452 | outcome = cacheAccessInstall(l2cache, 453 | l1cache->lastEvictedLine.tag * l1cache->lineSize, 2); 454 | if ((outcome == false) && 455 | l2cache->lastEvictedLine.valid && l2cache->lastEvictedLine.dirty) 456 | { 457 | cacheAccessInstall(l3cache, 458 | l2cache->lastEvictedLine.tag * l2cache->lineSize, 2); 459 | } 460 | } 461 | } 462 | break; 463 | default: 464 | CALIPERS_ERROR("Incorrect cache configuration knob"); 465 | } 466 | 467 | return hit_level; 468 | } 469 | }; 470 | 471 | /** 472 | * An analytical/functional cache model 473 | */ 474 | class RealCache: public Cache 475 | { 476 | private: 477 | CacheInternals* cacheInternals; 478 | uint32_t l1Size; 479 | uint32_t l1Assoc; 480 | uint32_t l2Size; 481 | uint32_t l2Assoc; 482 | uint32_t l1LoadHitCycles; 483 | uint32_t l2LoadHitCycles; 484 | uint32_t l2LoadMissCycles; 485 | uint32_t l1StoreHitCycles; 486 | uint32_t l2StoreHitCycles; 487 | uint32_t l2StoreMissCycles; 488 | 489 | public: 490 | RealCache(string config) 491 | { 492 | vector config_vec = split_string(config, ':'); 493 | if (config_vec.size() != 10) 494 | { 495 | CALIPERS_ERROR("Invalid configuration for the real cache"); 496 | } 497 | 498 | l1Size = stof(config_vec[0]); 499 | l1Assoc = stoi(config_vec[1]); 500 | l2Size = stoi(config_vec[2]); 501 | l2Assoc = stof(config_vec[3]); 502 | l1LoadHitCycles = stof(config_vec[4]); 503 | l2LoadHitCycles = stoi(config_vec[5]); 504 | l2LoadMissCycles = stoi(config_vec[6]); 505 | l1StoreHitCycles = stof(config_vec[7]); 506 | l2StoreHitCycles = stoi(config_vec[8]); 507 | l2StoreMissCycles = stoi(config_vec[9]); 508 | 509 | // Two-level cache 510 | cacheInternals = new CacheInternals(3, l1Size, l1Assoc, l2Size, l2Assoc); 511 | } 512 | 513 | uint32_t loadCycles(uint64_t base, uint32_t length) 514 | { 515 | uint32_t hit_level; 516 | hit_level = cacheInternals->memoryAccess(base, 0); 517 | switch (hit_level) 518 | { 519 | case 1: return l1LoadHitCycles; 520 | case 2: return l2LoadHitCycles; 521 | default: return l2LoadMissCycles; 522 | 523 | } 524 | } 525 | 526 | uint32_t storeCycles(uint64_t base, uint32_t length) 527 | { 528 | uint32_t hit_level; 529 | hit_level = cacheInternals->memoryAccess(base, 1); 530 | switch (hit_level) 531 | { 532 | case 1: return l1StoreHitCycles; 533 | case 2: return l2StoreHitCycles; 534 | default: return l2StoreMissCycles; 535 | 536 | } 537 | } 538 | 539 | void printStats() 540 | { 541 | cout << "*** L1 stats:" << endl; 542 | cout << " Read accesses: " << cacheInternals->l1cache->statReadAccess << endl; 543 | cout << " Read misses: " << cacheInternals->l1cache->statReadMiss << endl; 544 | cout << " Write accesses: " << cacheInternals->l1cache->statWriteAccess << endl; 545 | cout << " Write misses: " << cacheInternals->l1cache->statWriteMiss << endl; 546 | cout << " Dirty evicts: " << cacheInternals->l1cache->statDirtyEvicts << endl; 547 | 548 | cout << "*** L2 stats:" << endl; 549 | cout << " Read accesses: " << cacheInternals->l2cache->statReadAccess << endl; 550 | cout << " Read misses: " << cacheInternals->l2cache->statReadMiss << endl; 551 | cout << " Write accesses: " << cacheInternals->l2cache->statWriteAccess << endl; 552 | cout << " Write misses: " << cacheInternals->l2cache->statWriteMiss << endl; 553 | cout << " Dirty evicts: " << cacheInternals->l2cache->statDirtyEvicts << endl; 554 | } 555 | }; 556 | 557 | #endif // REAL_CACHE_H 558 | -------------------------------------------------------------------------------- /src/graph/graph.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * MIT License 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | #include 26 | #include 27 | 28 | #include "calipers_defs.h" 29 | #include "calipers_types.h" 30 | #include "graph.h" 31 | #include "instruction_stream.h" 32 | 33 | using namespace std; 34 | 35 | Graph::Graph(string trace_file_name, string result_file_name, InstructionStream* instr_stream) : 36 | streamTime(0), 37 | graphConstructionTime(0), 38 | graphAnalysisTime(0), 39 | instrCount(0), 40 | analyzedWindows(0), 41 | l1iMisses(0), 42 | l2iMisses(0), 43 | l1dMisses(0), 44 | l2dMisses(0), 45 | bpMisses(0), 46 | branchCount(0), 47 | traceFileName(trace_file_name), 48 | resultFileName(result_file_name), 49 | instrStream(instr_stream) 50 | { 51 | // TODO: Parameterize the following 52 | l1iThreshold = 5; 53 | l2iThreshold = 20; 54 | l1dThreshold = 5; 55 | l2dThreshold = 20; 56 | } 57 | 58 | void Graph::updateCriticalPathCycles(Vertex& parent, OutgoingEdge& e) 59 | { 60 | bool mask[VECTOR_WIDTH]; 61 | bool comparison[VECTOR_WIDTH]; 62 | 63 | Vertex& child = e.child; 64 | Vector& weight = e.weight; 65 | 66 | length[child].update(length[parent], weight, mask, VECTOR_WIDTH); 67 | 68 | criticalPathInstructions[child].intInstructions.maskedSet( 69 | criticalPathInstructions[parent].intInstructions, mask, VECTOR_WIDTH); 70 | criticalPathInstructions[child].fpInstructions.maskedSet( 71 | criticalPathInstructions[parent].fpInstructions, mask, VECTOR_WIDTH); 72 | criticalPathInstructions[child].loadInstructions.maskedSet( 73 | criticalPathInstructions[parent].loadInstructions, mask, VECTOR_WIDTH); 74 | criticalPathInstructions[child].storeInstructions.maskedSet( 75 | criticalPathInstructions[parent].storeInstructions, mask, VECTOR_WIDTH); 76 | criticalPathInstructions[child].branchInstructions.maskedSet( 77 | criticalPathInstructions[parent].branchInstructions, mask, VECTOR_WIDTH); 78 | criticalPathInstructions[child].otherInstructions.maskedSet( 79 | criticalPathInstructions[parent].otherInstructions, mask, VECTOR_WIDTH); 80 | 81 | criticalPathCycles[child].goodFetchHitCycles.maskedSet( 82 | criticalPathCycles[parent].goodFetchHitCycles, mask, VECTOR_WIDTH); 83 | criticalPathCycles[child].goodFetchMissCycles.maskedSet( 84 | criticalPathCycles[parent].goodFetchMissCycles, mask, VECTOR_WIDTH); 85 | criticalPathCycles[child].badFetchHitCycles.maskedSet( 86 | criticalPathCycles[parent].badFetchHitCycles, mask, VECTOR_WIDTH); 87 | criticalPathCycles[child].badFetchMissCycles.maskedSet( 88 | criticalPathCycles[parent].badFetchMissCycles, mask, VECTOR_WIDTH); 89 | criticalPathCycles[child].decodeCycles.maskedSet( 90 | criticalPathCycles[parent].decodeCycles, mask, VECTOR_WIDTH); 91 | criticalPathCycles[child].dispatchCycles.maskedSet( 92 | criticalPathCycles[parent].dispatchCycles, mask, VECTOR_WIDTH); 93 | criticalPathCycles[child].intCycles.maskedSet( 94 | criticalPathCycles[parent].intCycles, mask, VECTOR_WIDTH); 95 | criticalPathCycles[child].fpCycles.maskedSet( 96 | criticalPathCycles[parent].fpCycles, mask, VECTOR_WIDTH); 97 | criticalPathCycles[child].lsCycles.maskedSet( 98 | criticalPathCycles[parent].lsCycles, mask, VECTOR_WIDTH); 99 | criticalPathCycles[child].loadL1HitCycles.maskedSet( 100 | criticalPathCycles[parent].loadL1HitCycles, mask, VECTOR_WIDTH); 101 | criticalPathCycles[child].loadL2HitCycles.maskedSet( 102 | criticalPathCycles[parent].loadL2HitCycles, mask, VECTOR_WIDTH); 103 | criticalPathCycles[child].loadMissCycles.maskedSet( 104 | criticalPathCycles[parent].loadMissCycles, mask, VECTOR_WIDTH); 105 | criticalPathCycles[child].storeL1HitCycles.maskedSet( 106 | criticalPathCycles[parent].storeL1HitCycles, mask, VECTOR_WIDTH); 107 | criticalPathCycles[child].storeL2HitCycles.maskedSet( 108 | criticalPathCycles[parent].storeL2HitCycles, mask, VECTOR_WIDTH); 109 | criticalPathCycles[child].storeMissCycles.maskedSet( 110 | criticalPathCycles[parent].storeMissCycles, mask, VECTOR_WIDTH); 111 | criticalPathCycles[child].branchCycles.maskedSet( 112 | criticalPathCycles[parent].branchCycles, mask, VECTOR_WIDTH); 113 | criticalPathCycles[child].syscallCycles.maskedSet( 114 | criticalPathCycles[parent].syscallCycles, mask, VECTOR_WIDTH); 115 | criticalPathCycles[child].atomicCycles.maskedSet( 116 | criticalPathCycles[parent].atomicCycles, mask, VECTOR_WIDTH); 117 | criticalPathCycles[child].otherCycles.maskedSet( 118 | criticalPathCycles[parent].otherCycles, mask, VECTOR_WIDTH); 119 | criticalPathCycles[child].commitCycles.maskedSet( 120 | criticalPathCycles[parent].commitCycles, mask, VECTOR_WIDTH); 121 | 122 | if (((parent.type == VertexType::InstrExecute) && (child.type != VertexType::MemExecute)) || 123 | (parent.type == VertexType::MemExecute)) 124 | { 125 | int parent_execution_type = executionType[parent.instrNum % AnalysisWindow]; 126 | Vector one_vector(1); 127 | switch (parent_execution_type) 128 | { 129 | case ExecutionType::IntBase: 130 | case ExecutionType::IntMul: 131 | case ExecutionType::IntDiv: 132 | criticalPathInstructions[child].intInstructions.maskedAdd( 133 | one_vector, mask, VECTOR_WIDTH); 134 | break; 135 | case ExecutionType::FpBase: 136 | case ExecutionType::FpMul: 137 | case ExecutionType::FpDiv: 138 | criticalPathInstructions[child].fpInstructions.maskedAdd( 139 | one_vector, mask, VECTOR_WIDTH); 140 | break; 141 | case ExecutionType::Load: 142 | criticalPathInstructions[child].loadInstructions.maskedAdd( 143 | one_vector, mask, VECTOR_WIDTH); 144 | break; 145 | case ExecutionType::Store: 146 | criticalPathInstructions[child].storeInstructions.maskedAdd( 147 | one_vector, mask, VECTOR_WIDTH); 148 | break; 149 | case ExecutionType::BranchCond: 150 | case ExecutionType::BranchUncond: 151 | criticalPathInstructions[child].branchInstructions.maskedAdd( 152 | one_vector, mask, VECTOR_WIDTH); 153 | break; 154 | default: 155 | criticalPathInstructions[child].otherInstructions.maskedAdd( 156 | one_vector, mask, VECTOR_WIDTH); 157 | } 158 | } 159 | 160 | if (parent.type == VertexType::InstrFetch) 161 | { 162 | if (child.type == VertexType::InstrFetch) 163 | { 164 | weight.smallerThanOrEqual(l2iThreshold, mask, comparison, VECTOR_WIDTH); 165 | criticalPathCycles[child].goodFetchHitCycles.maskedAdd( 166 | weight, comparison, VECTOR_WIDTH); 167 | 168 | weight.largerThan(l2iThreshold, mask, comparison, VECTOR_WIDTH); 169 | criticalPathCycles[child].goodFetchMissCycles.maskedAdd( 170 | weight, comparison, VECTOR_WIDTH); 171 | } 172 | else // child.type == VertexType::InstrDispatch 173 | { 174 | criticalPathCycles[child].decodeCycles.maskedAdd( 175 | weight, mask, VECTOR_WIDTH); 176 | } 177 | } 178 | else if (parent.type == VertexType::InstrDispatch) 179 | { 180 | criticalPathCycles[child].dispatchCycles.maskedAdd( 181 | weight, mask, VECTOR_WIDTH); 182 | } 183 | else if (parent.type == VertexType::InstrExecute) 184 | { 185 | if ((child.type == VertexType::InstrDispatch) || 186 | (child.type == VertexType::InstrExecute) || 187 | (child.type == VertexType::MemExecute) || 188 | (child.type == VertexType::InstrCommit)) 189 | { 190 | int parent_execution_type = executionType[parent.instrNum % AnalysisWindow]; 191 | switch (parent_execution_type) 192 | { 193 | case ExecutionType::IntBase: 194 | case ExecutionType::IntMul: 195 | case ExecutionType::IntDiv: 196 | criticalPathCycles[child].intCycles.maskedAdd( 197 | weight, mask, VECTOR_WIDTH); 198 | break; 199 | case ExecutionType::FpBase: 200 | case ExecutionType::FpMul: 201 | case ExecutionType::FpDiv: 202 | criticalPathCycles[child].fpCycles.maskedAdd( 203 | weight, mask, VECTOR_WIDTH); 204 | break; 205 | case ExecutionType::Load: 206 | case ExecutionType::Store: 207 | criticalPathCycles[child].lsCycles.maskedAdd( 208 | weight, mask, VECTOR_WIDTH); 209 | break; 210 | case ExecutionType::BranchCond: 211 | case ExecutionType::BranchUncond: 212 | criticalPathCycles[child].branchCycles.maskedAdd( 213 | weight, mask, VECTOR_WIDTH); 214 | break; 215 | case ExecutionType::Syscall: 216 | criticalPathCycles[child].syscallCycles.maskedAdd( 217 | weight, mask, VECTOR_WIDTH); 218 | break; 219 | case ExecutionType::Atomic: 220 | criticalPathCycles[child].atomicCycles.maskedAdd( 221 | weight, mask, VECTOR_WIDTH); 222 | break; 223 | default: // ExecutionType::Other 224 | criticalPathCycles[child].otherCycles.maskedAdd( 225 | weight, mask, VECTOR_WIDTH); 226 | } 227 | } 228 | else // child.type == VertexType::InstrFetch 229 | { 230 | // The weight equals total cycles of RscIntAlu + mispredictionPenalty + fetchCycles 231 | 232 | Vector br_weight(intAluTotalCycles); 233 | criticalPathCycles[child].branchCycles.maskedAdd( 234 | br_weight, mask, VECTOR_WIDTH); 235 | 236 | Vector fetch_weight(weight, intAluTotalCycles); 237 | 238 | fetch_weight.smallerThanOrEqual(l2iThreshold, mask, comparison, VECTOR_WIDTH); 239 | criticalPathCycles[child].badFetchHitCycles.maskedAdd( 240 | fetch_weight, comparison, VECTOR_WIDTH); 241 | 242 | fetch_weight.largerThan(l2iThreshold, mask, comparison, VECTOR_WIDTH); 243 | criticalPathCycles[child].badFetchMissCycles.maskedAdd( 244 | fetch_weight, comparison, VECTOR_WIDTH); 245 | } 246 | } 247 | else if (parent.type == VertexType::MemExecute) 248 | { 249 | int parent_execution_type = executionType[parent.instrNum % AnalysisWindow]; 250 | if (parent_execution_type == ExecutionType::Load) 251 | { 252 | weight.smallerThanOrEqual(l1dThreshold, mask, comparison, VECTOR_WIDTH); 253 | criticalPathCycles[child].loadL1HitCycles.maskedAdd( 254 | weight, comparison, VECTOR_WIDTH); 255 | 256 | weight.between(l1dThreshold, l2dThreshold, mask, comparison, VECTOR_WIDTH); 257 | criticalPathCycles[child].loadL2HitCycles.maskedAdd( 258 | weight, comparison, VECTOR_WIDTH); 259 | 260 | weight.largerThan(l2dThreshold, mask, comparison, VECTOR_WIDTH); 261 | criticalPathCycles[child].loadMissCycles.maskedAdd( 262 | weight, comparison, VECTOR_WIDTH); 263 | } 264 | else // parentExecutionType == ExecutionType::Store 265 | { 266 | weight.smallerThanOrEqual(l1dThreshold, mask, comparison, VECTOR_WIDTH); 267 | criticalPathCycles[child].storeL1HitCycles.maskedAdd( 268 | weight, comparison, VECTOR_WIDTH); 269 | 270 | weight.between(l1dThreshold, l2dThreshold, mask, comparison, VECTOR_WIDTH); 271 | criticalPathCycles[child].storeL2HitCycles.maskedAdd( 272 | weight, comparison, VECTOR_WIDTH); 273 | 274 | weight.largerThan(l2dThreshold, mask, comparison, VECTOR_WIDTH); 275 | criticalPathCycles[child].storeMissCycles.maskedAdd( 276 | weight, comparison, VECTOR_WIDTH); 277 | } 278 | } 279 | else // parent.type == VertexType::InstrCommit 280 | { 281 | criticalPathCycles[child].commitCycles.maskedAdd( 282 | weight, mask, VECTOR_WIDTH); 283 | } 284 | } 285 | 286 | void Graph::recordStats(bool show_details, bool hopping_window) 287 | { 288 | fstream result_file; 289 | result_file.open(resultFileName, fstream::out | fstream::app); 290 | result_file << "==============================================================" << endl; 291 | result_file << traceFileName << endl; 292 | 293 | ostringstream os; 294 | 295 | uint64_t window_instructions = instrCount - analyzedWindows * AnalysisWindow; 296 | 297 | os << setprecision(4); 298 | 299 | Vertex last_vertex(VertexType::InstrCommit, instrCount - 1); 300 | for (uint32_t i = 0; i < VECTOR_WIDTH; ++i) 301 | { 302 | os << "--------------------------------------------------------------" << endl; 303 | os << "*** "; 304 | if (hopping_window) 305 | { 306 | os << "Window " << analyzedWindows << ", "; 307 | } 308 | os << "Scenario " << (int)i << endl << endl; 309 | 310 | os << "Total instructions count: " << instrCount << endl; 311 | if (hopping_window) 312 | { 313 | os << "Window instructions count: " << window_instructions << endl << endl; 314 | } 315 | 316 | os << "Length: " << length[last_vertex][i] << endl; 317 | os << "ILP: " << ((double)window_instructions / (double)length[last_vertex][i]) << endl; 318 | os << "CPI: " << ((double)length[last_vertex][i] / (double)window_instructions) << endl; 319 | 320 | if (show_details) 321 | { 322 | os << endl; 323 | 324 | double good_fetch_hit_cycles = 325 | ((double)criticalPathCycles[last_vertex].goodFetchHitCycles[i] / 326 | (double)length[last_vertex][i]) * 100; 327 | double good_fetch_miss_cycles = 328 | ((double)criticalPathCycles[last_vertex].goodFetchMissCycles[i] / 329 | (double)length[last_vertex][i]) * 100; 330 | double bad_fetch_hit_cycles = 331 | ((double)criticalPathCycles[last_vertex].badFetchHitCycles[i] / 332 | (double)length[last_vertex][i]) * 100; 333 | double bad_fetch_miss_cycles = 334 | ((double)criticalPathCycles[last_vertex].badFetchMissCycles[i] / 335 | (double)length[last_vertex][i]) * 100; 336 | double decode_cycles = 337 | ((double)criticalPathCycles[last_vertex].decodeCycles[i] / 338 | (double)length[last_vertex][i]) * 100; 339 | double dispatch_cycles = 340 | ((double)criticalPathCycles[last_vertex].dispatchCycles[i] / 341 | (double)length[last_vertex][i]) * 100; 342 | double int_cycles = 343 | ((double)criticalPathCycles[last_vertex].intCycles[i] / 344 | (double)length[last_vertex][i]) * 100; 345 | double fp_cycles = 346 | ((double)criticalPathCycles[last_vertex].fpCycles[i] / 347 | (double)length[last_vertex][i]) * 100; 348 | double ls_cycles = 349 | ((double)criticalPathCycles[last_vertex].lsCycles[i] / 350 | (double)length[last_vertex][i]) * 100; 351 | double load_l1_hit_cycles = 352 | ((double)criticalPathCycles[last_vertex].loadL1HitCycles[i] / 353 | (double)length[last_vertex][i]) * 100; 354 | double load_l2_hit_cycles = 355 | ((double)criticalPathCycles[last_vertex].loadL2HitCycles[i] / 356 | (double)length[last_vertex][i]) * 100; 357 | double load_miss_cycles = 358 | ((double)criticalPathCycles[last_vertex].loadMissCycles[i] / 359 | (double)length[last_vertex][i]) * 100; 360 | double store_l1_hit_cycles = 361 | ((double)criticalPathCycles[last_vertex].storeL1HitCycles[i] / 362 | (double)length[last_vertex][i]) * 100; 363 | double store_l2_hit_cycles = 364 | ((double)criticalPathCycles[last_vertex].storeL2HitCycles[i] / 365 | (double)length[last_vertex][i]) * 100; 366 | double store_miss_cycles = 367 | ((double)criticalPathCycles[last_vertex].storeMissCycles[i] / 368 | (double)length[last_vertex][i]) * 100; 369 | double branch_cycles = 370 | ((double)criticalPathCycles[last_vertex].branchCycles[i] / 371 | (double)length[last_vertex][i]) * 100; 372 | double syscall_cycles = 373 | ((double)criticalPathCycles[last_vertex].syscallCycles[i] / 374 | (double)length[last_vertex][i]) * 100; 375 | double atomic_cycles = 376 | ((double)criticalPathCycles[last_vertex].atomicCycles[i] / 377 | (double)length[last_vertex][i]) * 100; 378 | double other_cycles = 379 | ((double)criticalPathCycles[last_vertex].otherCycles[i] / 380 | (double)length[last_vertex][i]) * 100; 381 | double commit_cycles = 382 | ((double)criticalPathCycles[last_vertex].commitCycles[i] / 383 | (double)length[last_vertex][i]) * 100; 384 | 385 | uint64_t critical_instructions = 386 | criticalPathInstructions[last_vertex].intInstructions[i] + 387 | criticalPathInstructions[last_vertex].fpInstructions[i] + 388 | criticalPathInstructions[last_vertex].loadInstructions[i] + 389 | criticalPathInstructions[last_vertex].storeInstructions[i] + 390 | criticalPathInstructions[last_vertex].branchInstructions[i] + 391 | criticalPathInstructions[last_vertex].otherInstructions[i]; 392 | double int_instructions_critical = 393 | ((double)criticalPathInstructions[last_vertex].intInstructions[i] / 394 | (double)critical_instructions) * 100; 395 | double fp_instructions_critical = 396 | ((double)criticalPathInstructions[last_vertex].fpInstructions[i] / 397 | (double)critical_instructions) * 100; 398 | double load_instructions_critical = 399 | ((double)criticalPathInstructions[last_vertex].loadInstructions[i] / 400 | (double)critical_instructions) * 100; 401 | double store_instructions_critical = 402 | ((double)criticalPathInstructions[last_vertex].storeInstructions[i] / 403 | (double)critical_instructions) * 100; 404 | double branch_instructions_critical = 405 | ((double)criticalPathInstructions[last_vertex].branchInstructions[i] / 406 | (double)critical_instructions) * 100; 407 | double other_instructions_critical = 408 | ((double)criticalPathInstructions[last_vertex].otherInstructions[i] / 409 | (double)critical_instructions) * 100; 410 | 411 | double int_instructions_all = 412 | ((double)instructionMix[0] / 413 | (double)window_instructions) * 100; 414 | double fp_instructions_all = 415 | ((double)instructionMix[1] / 416 | (double)window_instructions) * 100; 417 | double load_instructions_all = 418 | ((double)instructionMix[2] / 419 | (double)window_instructions) * 100; 420 | double store_instructions_all = 421 | ((double)instructionMix[3] / 422 | (double)window_instructions) * 100; 423 | double branch_instructions_all = 424 | ((double)instructionMix[4] / 425 | (double)window_instructions) * 100; 426 | double other_instructions_all = 427 | ((double)instructionMix[5] / 428 | (double)window_instructions) * 100; 429 | 430 | os << "Good fetch hit cycles: " 431 | << good_fetch_hit_cycles << "% (" 432 | << criticalPathCycles[last_vertex].goodFetchHitCycles[i] << ")" << endl; 433 | os << "Good fetch miss cycles: " 434 | << good_fetch_miss_cycles << "% (" 435 | << criticalPathCycles[last_vertex].goodFetchMissCycles[i] << ")" << endl; 436 | os << "Bad fetch hit cycles: " 437 | << bad_fetch_hit_cycles << "% (" 438 | << criticalPathCycles[last_vertex].badFetchHitCycles[i] << ")" << endl; 439 | os << "Bad fetch miss cycles: " 440 | << bad_fetch_miss_cycles << "% (" 441 | << criticalPathCycles[last_vertex].badFetchMissCycles[i] << ")" << endl; 442 | os << "Decode cycles: " 443 | << decode_cycles << "% (" 444 | << criticalPathCycles[last_vertex].decodeCycles[i] << ")" << endl; 445 | os << "Dispatch cycles: " 446 | << dispatch_cycles << "% (" 447 | << criticalPathCycles[last_vertex].dispatchCycles[i] << ")" << endl; 448 | os << "Int cycles: " 449 | << int_cycles << "% (" 450 | << criticalPathCycles[last_vertex].intCycles[i] << ")" << endl; 451 | os << "FP cycles: " 452 | << fp_cycles << "% (" 453 | << criticalPathCycles[last_vertex].fpCycles[i] << ")" << endl; 454 | os << "LS cycles: " 455 | << ls_cycles << "% (" 456 | << criticalPathCycles[last_vertex].lsCycles[i] << ")" << endl; 457 | os << "Load L1 hit cycles: " 458 | << load_l1_hit_cycles << "% (" 459 | << criticalPathCycles[last_vertex].loadL1HitCycles[i] << ")" << endl; 460 | os << "Load L2 hit cycles: " 461 | << load_l2_hit_cycles << "% (" 462 | << criticalPathCycles[last_vertex].loadL2HitCycles[i] << ")" << endl; 463 | os << "Load miss cycles: " 464 | << load_miss_cycles << "% (" 465 | << criticalPathCycles[last_vertex].loadMissCycles[i] << ")" << endl; 466 | os << "Store L1 hit cycles: " 467 | << store_l1_hit_cycles << "% (" 468 | << criticalPathCycles[last_vertex].storeL1HitCycles[i] << ")" << endl; 469 | os << "Store L2 hit cycles: " 470 | << store_l2_hit_cycles << "% (" 471 | << criticalPathCycles[last_vertex].storeL2HitCycles[i] << ")" << endl; 472 | os << "Store miss cycles: " 473 | << store_miss_cycles << "% (" 474 | << criticalPathCycles[last_vertex].storeMissCycles[i] << ")" << endl; 475 | os << "Branch cycles: " 476 | << branch_cycles << "% (" 477 | << criticalPathCycles[last_vertex].branchCycles[i] << ")" << endl; 478 | os << "Syscall cycles: " 479 | << syscall_cycles << "% (" 480 | << criticalPathCycles[last_vertex].syscallCycles[i] << ")" << endl; 481 | os << "Atomic cycles: " 482 | << atomic_cycles << "% (" 483 | << criticalPathCycles[last_vertex].atomicCycles[i] << ")" << endl; 484 | os << "Other cycles: " 485 | << other_cycles << "% (" 486 | << criticalPathCycles[last_vertex].otherCycles[i] << ")" << endl; 487 | os << "Commit cycles: " 488 | << commit_cycles << "% (" 489 | << criticalPathCycles[last_vertex].commitCycles[i] << ")" << endl; 490 | 491 | os << endl; 492 | 493 | os << "Critical int instructions: " 494 | << int_instructions_critical << "% (" 495 | << criticalPathInstructions[last_vertex].intInstructions[i] << ")" << endl; 496 | os << "Critical fp instructions: " 497 | << fp_instructions_critical << "% (" 498 | << criticalPathInstructions[last_vertex].fpInstructions[i] << ")" << endl; 499 | os << "Critical load instructions: " 500 | << load_instructions_critical << "% (" 501 | << criticalPathInstructions[last_vertex].loadInstructions[i] << ")" << endl; 502 | os << "Critical store instructions: " 503 | << store_instructions_critical << "% (" 504 | << criticalPathInstructions[last_vertex].storeInstructions[i] << ")" << endl; 505 | os << "Critical branch instructions: " 506 | << branch_instructions_critical << "% (" 507 | << criticalPathInstructions[last_vertex].branchInstructions[i] << ")" << endl; 508 | os << "Critical other instructions: " 509 | << other_instructions_critical << "% (" 510 | << criticalPathInstructions[last_vertex].otherInstructions[i] << ")" << endl; 511 | os << "All int instructions: " 512 | << int_instructions_all << "% (" 513 | << instructionMix[0] << ")" << endl; 514 | os << "All fp instructions: " 515 | << fp_instructions_all << "% (" 516 | << instructionMix[1] << ")" << endl; 517 | os << "All load instructions: " 518 | << load_instructions_all << "% (" 519 | << instructionMix[2] << ")" << endl; 520 | os << "All store instructions: " 521 | << store_instructions_all << "% (" 522 | << instructionMix[3] << ")" << endl; 523 | os << "All branch instructions: " 524 | << branch_instructions_all << "% (" 525 | << instructionMix[4] << ")" << endl; 526 | os << "All other instructions: " 527 | << other_instructions_all << "% (" 528 | << instructionMix[5] << ")" << endl; 529 | 530 | os << endl; 531 | 532 | os << "L1i MPKI: " 533 | << ((double)(1000 * l1iMisses) / (double)window_instructions) << endl; 534 | os << "L2i MPKI: " 535 | << ((double)(1000 * l2iMisses) / (double)window_instructions) << endl; 536 | os << "L1d MPKI: " 537 | << ((double)(1000 * l1dMisses) / (double)window_instructions) << endl; 538 | os << "L2d MPKI: " 539 | << ((double)(1000 * l2dMisses) / (double)window_instructions) << endl; 540 | os << "BP MPKI: " 541 | << ((double)(1000 * bpMisses) / (double)window_instructions) << endl; 542 | os << "BP accuracy (%): " 543 | << ((double)(100 * (branchCount - bpMisses)) / (double)branchCount) << endl; 544 | 545 | os << endl; 546 | } 547 | } 548 | 549 | string str = os.str(); 550 | cout << str; 551 | result_file << str; 552 | 553 | /* 554 | if (show_details) 555 | { 556 | cout << endl; 557 | if (icache != NULL) 558 | { 559 | cout << "***** I-cache stats *****" << endl; 560 | icache->printStats(); 561 | } 562 | if (dcache != NULL) 563 | { 564 | cout << "***** D-cache stats *****" << endl; 565 | dcache->printStats(); 566 | } 567 | } 568 | */ 569 | 570 | cout << "--------------------------------------------------------------" << endl; 571 | } 572 | 573 | void Graph::printEdge(Vertex& parent, OutgoingEdge& e) 574 | { 575 | cout << "*** Edge: " 576 | << parent.instrNum << "," << (int)parent.type << " to " 577 | << e.child.instrNum << "," << (int)e.child.type << "; " 578 | << e.weight.toString() << endl; 579 | } 580 | 581 | void Graph::printEdge(Vertex& child, IncomingEdge& e) 582 | { 583 | cout << "*** Edge: " 584 | << e.parent.instrNum << "," << (int)e.parent.type << " to " 585 | << child.instrNum << "," << (int)child.type << "; " 586 | << e.weight.toString() << endl; 587 | } 588 | -------------------------------------------------------------------------------- /src/graph/inorder_core_graph.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Microsoft Corporation. 3 | * 4 | * MIT License 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | #include "graph_util.h" 26 | #include "inorder_core_graph.h" 27 | 28 | InorderCoreGraph::InorderCoreGraph(string trace_file_name, 29 | string result_file_name, 30 | InstructionStream* instr_stream, 31 | uint32_t fetch_bandwidth, 32 | uint32_t dispatch_bandwidth, 33 | uint32_t issue_bandwidth, 34 | uint32_t commit_bandwidth, 35 | uint32_t decode_cycles, 36 | uint32_t dispatch_cycles, 37 | uint32_t execute_to_commit_cycles, 38 | uint32_t prediction_cycles, 39 | uint32_t misprediction_penalty, 40 | uint32_t mem_issue_bandwidth, 41 | uint32_t mem_commit_bandwidth, 42 | uint32_t max_mem_accesses, 43 | uint32_t int_alu_count, 44 | uint32_t int_mul_count, 45 | uint32_t int_div_count, 46 | uint32_t fpu_count, 47 | uint32_t lsu_count, 48 | bool load_dependent_early_issue, 49 | bool load_early_issue) : 50 | Graph(trace_file_name, result_file_name, instr_stream), 51 | fetchBandwidth(fetch_bandwidth), 52 | dispatchBandwidth(dispatch_bandwidth), 53 | issueBandwidth(issue_bandwidth), 54 | commitBandwidth(commit_bandwidth), 55 | decodeCycles(decode_cycles), 56 | dispatchCycles(dispatch_cycles), 57 | executeToCommitCycles(execute_to_commit_cycles), 58 | predictionCycles(prediction_cycles), 59 | mispredictionPenalty(misprediction_penalty), 60 | memIssueBandwidth(mem_issue_bandwidth), 61 | memCommitBandwidth(mem_commit_bandwidth), 62 | maxMemAccesses(max_mem_accesses), 63 | loadDependentEarlyIssue(load_dependent_early_issue), 64 | loadEarlyIssue(load_early_issue) 65 | { 66 | // TODO: Parameterize the last three arguments of initResource 67 | // (i.e., total_cycles, source_independent_cycles, next_issue_cycles) 68 | scoreboard.initResource(Resource::RscIntAlu, int_alu_count, 3, 2, 1); 69 | scoreboard.initResource(Resource::RscIntMul, int_mul_count, 3, 0, 1); 70 | scoreboard.initResource(Resource::RscIntDiv, int_div_count, 9, 0, 9); 71 | scoreboard.initResource(Resource::RscFpu, fpu_count, 6, 2, 1); 72 | scoreboard.initResource(Resource::RscLsu, lsu_count, 1, 1, 1); 73 | 74 | intAluTotalCycles = scoreboard.resourceTotalCycles(Resource::RscIntAlu); 75 | 76 | extraLoadLatency = 2; // TODO: Make this a parameter 77 | 78 | ldStWindow = new pair[maxMemAccesses]; 79 | 80 | initBookKeeping(); 81 | } 82 | 83 | InorderCoreGraph::~InorderCoreGraph() 84 | { 85 | delete[] ldStWindow; 86 | } 87 | 88 | void InorderCoreGraph::run() 89 | { 90 | CALIPERS_INFO("Running the graph-based modeler..."); 91 | 92 | sys_nanoseconds my_time; 93 | 94 | while (true) 95 | { 96 | my_time = chrono::system_clock::now(); 97 | Instruction* instr = instrStream->next(); 98 | streamTime += (chrono::system_clock::now() - my_time).count(); 99 | 100 | if (instr == NULL) 101 | { 102 | break; 103 | } 104 | 105 | model(instr); 106 | ++instrCount; 107 | 108 | if (instrCount % 100000 == 0) 109 | { 110 | CALIPERS_INFO("*** " << instrCount << " instructions modeled/analyzed" << endl); 111 | } 112 | } 113 | 114 | my_time = chrono::system_clock::now(); 115 | recordStats(true, false); 116 | graphAnalysisTime += (chrono::system_clock::now() - my_time).count(); 117 | 118 | CALIPERS_INFO("Instruction stream time: " 119 | << (streamTime / 1000000) << " ms" << endl); 120 | CALIPERS_INFO("Graph construction time: " 121 | << (graphConstructionTime / 1000000) << " ms" << endl); 122 | CALIPERS_INFO("Graph analysis time: " 123 | << (graphAnalysisTime / 1000000) << " ms" << endl); 124 | } 125 | 126 | void InorderCoreGraph::initBookKeeping() 127 | { 128 | lastMisprediction = UINT64_MAX; 129 | previousInstrMispredicted = false; 130 | ldStWindowPointer = 0; 131 | lastMemLdSt = UINT64_MAX; 132 | lastLdStCriticalNum = UINT64_MAX; 133 | lastLdStCriticalCycles = UINT32_MAX; 134 | 135 | regLastWrittenBy.clear(); 136 | regLastWrittenByLoad.clear(); 137 | 138 | scoreboard.initRecords(); 139 | 140 | for (uint32_t i = 0; i < AnalysisWindow; ++i) 141 | { 142 | neededRsc[i].first = -1; 143 | } 144 | 145 | Vertex first_vertex(0, 0); 146 | Vector zero_vector(0); 147 | length[first_vertex] = zero_vector; 148 | criticalPathCycles[first_vertex].goodFetchHitCycles = zero_vector; 149 | criticalPathCycles[first_vertex].goodFetchMissCycles = zero_vector; 150 | criticalPathCycles[first_vertex].badFetchHitCycles = zero_vector; 151 | criticalPathCycles[first_vertex].badFetchMissCycles = zero_vector; 152 | criticalPathCycles[first_vertex].decodeCycles = zero_vector; 153 | criticalPathCycles[first_vertex].dispatchCycles = zero_vector; 154 | criticalPathCycles[first_vertex].intCycles = zero_vector; 155 | criticalPathCycles[first_vertex].fpCycles = zero_vector; 156 | criticalPathCycles[first_vertex].lsCycles = zero_vector; 157 | criticalPathCycles[first_vertex].loadL1HitCycles = zero_vector; 158 | criticalPathCycles[first_vertex].loadL2HitCycles = zero_vector; 159 | criticalPathCycles[first_vertex].loadMissCycles = zero_vector; 160 | criticalPathCycles[first_vertex].storeL1HitCycles = zero_vector; 161 | criticalPathCycles[first_vertex].storeL2HitCycles = zero_vector; 162 | criticalPathCycles[first_vertex].storeMissCycles = zero_vector; 163 | criticalPathCycles[first_vertex].branchCycles = zero_vector; 164 | criticalPathCycles[first_vertex].syscallCycles = zero_vector; 165 | criticalPathCycles[first_vertex].atomicCycles = zero_vector; 166 | criticalPathCycles[first_vertex].otherCycles = zero_vector; 167 | criticalPathCycles[first_vertex].commitCycles = zero_vector; 168 | criticalPathInstructions[first_vertex].intInstructions = zero_vector; 169 | criticalPathInstructions[first_vertex].fpInstructions = zero_vector; 170 | criticalPathInstructions[first_vertex].loadInstructions = zero_vector; 171 | criticalPathInstructions[first_vertex].storeInstructions = zero_vector; 172 | criticalPathInstructions[first_vertex].branchInstructions = zero_vector; 173 | criticalPathInstructions[first_vertex].otherInstructions = zero_vector; 174 | 175 | for (int i = 0; i <= VertexType::Last; ++i) 176 | { 177 | parents[i] = 0; 178 | } 179 | 180 | for (uint32_t i = 0; i < AnalysisWindow; ++i) 181 | { 182 | executionType[i] = -1; 183 | } 184 | 185 | for (uint32_t i = 0; i < maxMemAccesses; ++i) 186 | { 187 | ldStWindow[i].second = UINT32_MAX; 188 | } 189 | } 190 | 191 | void InorderCoreGraph::model(Instruction* instr) 192 | { 193 | sys_nanoseconds my_time = chrono::system_clock::now(); 194 | 195 | Vertex fetch_vertex(VertexType::InstrFetch, instrCount); 196 | Vertex dispatch_vertex(VertexType::InstrDispatch, instrCount); 197 | Vertex execute_vertex(VertexType::InstrExecute, instrCount); 198 | Vertex mem_vertex(VertexType::MemExecute, instrCount); 199 | Vertex commit_vertex(VertexType::InstrCommit, instrCount); 200 | 201 | uint32_t execution_cycles; 202 | uint32_t source_independent_cycles; 203 | 204 | bool is_load = (instr->memLoadCount == 1); // Also covers atomic instructions 205 | bool is_store = (instr->memStoreCount == 1); // Also covers atomic instructions 206 | bool is_load_store = is_load || is_store; 207 | bool is_branch = (instr->executionType == ExecutionType::BranchCond) || 208 | (instr->executionType == ExecutionType::BranchUncond); 209 | bool is_int = (instr->executionType == ExecutionType::IntBase) || is_branch; 210 | bool is_int_mul = (instr->executionType == ExecutionType::IntMul); 211 | bool is_int_div = (instr->executionType == ExecutionType::IntDiv); 212 | bool is_fp = (instr->executionType == ExecutionType::FpBase) || 213 | (instr->executionType == ExecutionType::FpMul) || 214 | (instr->executionType == ExecutionType::FpDiv); 215 | 216 | // Keeping track of current execute_vertex's dependence to previous execute_vertex'es 217 | unordered_map execute_parent; // Key: Instruction num, Value: Weight 218 | 219 | executionType[instrCount % AnalysisWindow] = instr->executionType; 220 | 221 | // 0: int, 1: fp, 2: load, 3: store, 4: branch, 5: other 222 | if (is_branch) 223 | { 224 | ++instructionMix[4]; 225 | } 226 | else if (is_int || is_int_mul || is_int_div) 227 | { 228 | ++instructionMix[0]; 229 | } 230 | else if (is_fp) 231 | { 232 | ++instructionMix[1]; 233 | } 234 | else if (is_load) 235 | { 236 | ++instructionMix[2]; 237 | } 238 | else if (is_store) 239 | { 240 | ++instructionMix[3]; 241 | } 242 | else 243 | { 244 | ++instructionMix[5]; 245 | } 246 | 247 | if (is_load_store) 248 | { 249 | execution_cycles = scoreboard.resourceTotalCycles(Resource::RscLsu) + instr->lsCycles; 250 | source_independent_cycles = 251 | scoreboard.resourceSourceIndependentCycles(Resource::RscLsu); 252 | } 253 | else if (is_int) 254 | { 255 | execution_cycles = scoreboard.resourceTotalCycles(Resource::RscIntAlu); 256 | source_independent_cycles = 257 | scoreboard.resourceSourceIndependentCycles(Resource::RscIntAlu); 258 | } 259 | else if (is_int_mul) 260 | { 261 | execution_cycles = scoreboard.resourceTotalCycles(Resource::RscIntMul); 262 | source_independent_cycles = 263 | scoreboard.resourceSourceIndependentCycles(Resource::RscIntMul); 264 | } 265 | else if (is_int_div) 266 | { 267 | execution_cycles = scoreboard.resourceTotalCycles(Resource::RscIntDiv); 268 | source_independent_cycles = 269 | scoreboard.resourceSourceIndependentCycles(Resource::RscIntDiv); 270 | } 271 | else if (is_fp) 272 | { 273 | execution_cycles = scoreboard.resourceTotalCycles(Resource::RscFpu); 274 | source_independent_cycles = 275 | scoreboard.resourceSourceIndependentCycles(Resource::RscFpu); 276 | } 277 | else 278 | { 279 | execution_cycles = 1; 280 | source_independent_cycles = 0; 281 | } 282 | 283 | modelPipeline(fetch_vertex, dispatch_vertex, 284 | execute_vertex, mem_vertex, commit_vertex, 285 | instr, execution_cycles, execute_parent); 286 | 287 | if (is_load_store) 288 | { 289 | modelMemoryOrderConstraint(mem_vertex, is_load, is_store); 290 | } 291 | 292 | trackDataDependencies(instr, source_independent_cycles, 293 | execute_vertex, execute_parent); 294 | 295 | modelResourceDependenciesSimple(is_int, is_int_mul, is_int_div, is_fp, is_load_store, 296 | execute_vertex, execute_parent); 297 | 298 | // Add the required InstrExecute to InstrExecute vertices 299 | for (auto i = execute_parent.begin(); i != execute_parent.end(); ++i) 300 | { 301 | if ((instrCount - i->first) / issueBandwidth < i->second) 302 | { 303 | Vertex prev_execute_vertex(VertexType::InstrExecute, i->first); 304 | OutgoingEdge dependence_edge(execute_vertex, (int64_t)i->second); 305 | addEdge(prev_execute_vertex, dependence_edge); 306 | 307 | if ((neededRsc[instrCount % AnalysisWindow].first != -1) && 308 | (neededRsc[instrCount % AnalysisWindow].first == 309 | neededRsc[i->first % AnalysisWindow].first) && 310 | (neededRsc[instrCount % AnalysisWindow].second == 311 | neededRsc[i->first % AnalysisWindow].second)) 312 | { 313 | //cout << "Maintaining resource pipeline distance: " << instrCount 314 | // << " to " << i->first << endl; 315 | Vertex prev_commit_vertex(VertexType::InstrCommit, i->first); 316 | OutgoingEdge dependence_edge(commit_vertex, (int64_t)i->second); 317 | addEdge(prev_commit_vertex, dependence_edge); 318 | } 319 | } 320 | } 321 | 322 | graphConstructionTime += (chrono::system_clock::now() - my_time).count(); 323 | my_time = chrono::system_clock::now(); 324 | 325 | calculateInstructionCriticalPath(); 326 | 327 | 328 | // Update bookkeeping variables: 329 | 330 | if (is_load_store) 331 | { 332 | lastMemLdSt = instrCount; 333 | ldStWindow[ldStWindowPointer].first = instrCount; 334 | ldStWindow[ldStWindowPointer].second = instr->lsCycles; 335 | ldStWindowPointer = (ldStWindowPointer + 1) % maxMemAccesses; 336 | } 337 | 338 | if (!loadEarlyIssue && (is_int || is_int_mul || is_int_div || is_fp)) 339 | { 340 | lastLdStCriticalNum = instrCount; 341 | lastLdStCriticalCycles = execution_cycles; 342 | 343 | //if (is_branch) 344 | //lastLdStCriticalCycles += predictionCycles / 2; 345 | } 346 | 347 | previousWasBranch = is_branch; 348 | linearPC = instr->pc + instr->bytes; 349 | 350 | for (uint32_t i = 0; i < instr->regWriteCount; ++i) 351 | { 352 | int reg_write = instr->regWrite[i]; 353 | regLastWrittenBy[reg_write].first = instrCount; 354 | if (is_load) 355 | { 356 | if (loadDependentEarlyIssue) 357 | { 358 | regLastWrittenBy[reg_write].second = extraLoadLatency; 359 | } 360 | else 361 | { 362 | regLastWrittenBy[reg_write].second = instr->lsCycles; 363 | } 364 | regLastWrittenByLoad[reg_write] = true; 365 | } 366 | else 367 | { 368 | regLastWrittenBy[reg_write].second = execution_cycles; 369 | regLastWrittenByLoad[reg_write] = false; 370 | } 371 | } 372 | 373 | 374 | // Update miss statistics: 375 | 376 | if (instr->fetchCycles > l2iThreshold) 377 | { 378 | ++l2iMisses; 379 | } 380 | else if (instr->fetchCycles > l1iThreshold) 381 | { 382 | ++l1iMisses; 383 | } 384 | 385 | if (is_load_store) 386 | { 387 | if (instr->lsCycles > l2dThreshold) 388 | { 389 | ++l2dMisses; 390 | } 391 | else if (instr->lsCycles > l1dThreshold) 392 | { 393 | ++l1dMisses; 394 | } 395 | } 396 | 397 | if (is_branch) 398 | { 399 | ++branchCount; 400 | if (instr->mispredicted) 401 | { 402 | ++bpMisses; 403 | } 404 | } 405 | 406 | graphAnalysisTime += (chrono::system_clock::now() - my_time).count(); 407 | } 408 | 409 | void InorderCoreGraph::modelPipeline(Vertex& fetch_vertex, Vertex& dispatch_vertex, 410 | Vertex& execute_vertex, Vertex& mem_vertex, 411 | Vertex& commit_vertex, Instruction* instr, 412 | uint32_t execution_cycles, 413 | unordered_map& execute_parent) 414 | { 415 | bool mispredicted; 416 | uint32_t fetch_cycles = instr->fetchCycles; 417 | bool is_load_store = (instr->memLoadCount == 1) || (instr->memStoreCount == 1); 418 | 419 | bool no_need_for_ino_dispatch = (instrCount == 0) || (dispatchBandwidth == 1); 420 | bool no_need_for_ino_issue = (instrCount == 0) || (issueBandwidth == 1); 421 | bool no_need_for_ino_commit = (instrCount == 0) || (commitBandwidth == 1); 422 | 423 | mispredicted = previousInstrMispredicted; 424 | previousInstrMispredicted = instr->mispredicted; 425 | 426 | // Dispatch after fetch 427 | OutgoingEdge fetch_after_dispatch(dispatch_vertex, (int64_t)decodeCycles); 428 | //cout << "Dispatch after fetch" << endl; 429 | addEdge(fetch_vertex, fetch_after_dispatch); 430 | 431 | // Execute after dispatch 432 | OutgoingEdge execute_after_dispatch(execute_vertex, (int64_t)dispatchCycles); 433 | //cout << "Execute after dispatch" << endl; 434 | addEdge(dispatch_vertex, execute_after_dispatch); 435 | 436 | if (is_load_store) 437 | { 438 | // Memory execute (actual memory operation) after instruction execute (address calculation) 439 | OutgoingEdge mem_after_instr(mem_vertex, 440 | (int64_t)scoreboard.resourceTotalCycles(Resource::RscLsu)); 441 | //cout << "Memory execute after instruction execute" << endl; 442 | addEdge(execute_vertex, mem_after_instr); 443 | 444 | // Commit after execute 445 | OutgoingEdge commit_after_execute(commit_vertex, 446 | (int64_t)(instr->lsCycles + executeToCommitCycles)); 447 | //cout << "Commit after memory execute" << endl; 448 | addEdge(mem_vertex, commit_after_execute); 449 | } 450 | else 451 | { 452 | // Commit after execute 453 | OutgoingEdge commit_after_execute(commit_vertex, 454 | (int64_t)(execution_cycles + executeToCommitCycles)); 455 | //cout << "Commit after execute" << endl; 456 | addEdge(execute_vertex, commit_after_execute); 457 | } 458 | 459 | // Limited fetch bandwidth 460 | if ((instrCount >= fetchBandwidth) && 461 | ((lastMisprediction == UINT64_MAX) || 462 | (instrCount - lastMisprediction > fetchBandwidth))) 463 | { 464 | Vertex prev_fetch_vertex(VertexType::InstrFetch, instrCount - fetchBandwidth); 465 | OutgoingEdge limited_fetch_bw(fetch_vertex, 1); 466 | //cout << "Limited fetch bandwidth" << endl; 467 | addEdge(prev_fetch_vertex, limited_fetch_bw); 468 | } 469 | 470 | // Limited dispatch bandwidth 471 | if ((instrCount >= dispatchBandwidth) && 472 | ((lastMisprediction == UINT64_MAX) || 473 | (instrCount - lastMisprediction > dispatchBandwidth))) 474 | { 475 | Vertex prev_dispatch_vertex(VertexType::InstrDispatch, instrCount - dispatchBandwidth); 476 | OutgoingEdge limited_dispatch_bw(dispatch_vertex, 1); 477 | //cout << "Limited dispatch bandwidth" << endl; 478 | addEdge(prev_dispatch_vertex, limited_dispatch_bw); 479 | } 480 | 481 | // Limited issue bandwidth 482 | if ((instrCount >= issueBandwidth) && 483 | ((lastMisprediction == UINT64_MAX) || 484 | (instrCount - lastMisprediction > issueBandwidth))) 485 | { 486 | Vertex prev_execute_vertex(VertexType::InstrExecute, instrCount - issueBandwidth); 487 | OutgoingEdge limited_issue_bw(execute_vertex, 1); 488 | //cout << "Limited issue bandwidth" << endl; 489 | addEdge(prev_execute_vertex, limited_issue_bw); 490 | 491 | execute_parent[instrCount - issueBandwidth] = 1; 492 | } 493 | 494 | // Limited memory issue bandwidth 495 | if ((lastMemLdSt != UINT64_MAX) && 496 | (instrCount - lastMemLdSt <= memIssueBandwidth) && 497 | ((lastMisprediction == UINT64_MAX) || 498 | (instrCount - lastMisprediction > memIssueBandwidth))) 499 | { 500 | if (execute_parent[lastMemLdSt] < 1) 501 | { 502 | Vertex prev_execute_vertex(VertexType::InstrExecute, lastMemLdSt); 503 | OutgoingEdge limited_mem_issue_bw(execute_vertex, 1); 504 | //cout << "Limited memory issue bandwidth" << endl; 505 | addEdge(prev_execute_vertex, limited_mem_issue_bw); 506 | 507 | execute_parent[lastMemLdSt] = 1; 508 | no_need_for_ino_issue = no_need_for_ino_issue || ((instrCount - lastMemLdSt) == 1); 509 | } 510 | } 511 | 512 | // Limited commit bandwidth 513 | if ((instrCount >= commitBandwidth) && 514 | ((lastMisprediction == UINT64_MAX) || 515 | (instrCount - lastMisprediction > commitBandwidth))) 516 | { 517 | Vertex prev_commit_vertex(VertexType::InstrCommit, instrCount - commitBandwidth); 518 | OutgoingEdge limited_commit_bw(commit_vertex, 1); 519 | //cout << "Limited commit bandwidth" << endl; 520 | addEdge(prev_commit_vertex, limited_commit_bw); 521 | } 522 | 523 | // Limited memory commit bandwidth 524 | if ((lastMemLdSt != UINT64_MAX) && 525 | (instrCount - lastMemLdSt <= memCommitBandwidth) && 526 | ((lastMisprediction == UINT64_MAX) || 527 | (instrCount - lastMisprediction > memCommitBandwidth))) 528 | { 529 | Vertex prev_commit_vertex(VertexType::InstrCommit, lastMemLdSt); 530 | OutgoingEdge limited_mem_commit_bw(commit_vertex, 1); 531 | //cout << "Limited memory commit bandwidth" << endl; 532 | addEdge(prev_commit_vertex, limited_mem_commit_bw); 533 | 534 | no_need_for_ino_commit = no_need_for_ino_commit || ((instrCount - lastMemLdSt) == 1); 535 | } 536 | 537 | if (mispredicted) 538 | { 539 | //Vertex prev_branch_vertex(VertexType::InstrCommit, instrCount - 1); 540 | //OutgoingEdge mispredicted_fetch(fetch_vertex, 541 | // (int64_t)(mispredictionPenalty + fetch_cycles)); 542 | Vertex prev_branch_vertex(VertexType::InstrExecute, instrCount - 1); 543 | OutgoingEdge mispredicted_fetch(fetch_vertex, 544 | (int64_t)(scoreboard.resourceTotalCycles(Resource::RscIntAlu) + 545 | mispredictionPenalty + fetch_cycles)); 546 | //cout << "Bad fetch" << endl; 547 | addEdge(prev_branch_vertex, mispredicted_fetch); 548 | lastMisprediction = instrCount - 1; 549 | } 550 | else 551 | { 552 | if (instrCount != 0) 553 | { 554 | // In-order fetch 555 | uint32_t fetch_weight; 556 | if (previousWasBranch && (instr->pc != linearPC)) // Correctly taken branch 557 | { 558 | fetch_weight = predictionCycles + fetch_cycles; 559 | } 560 | else // No branch or correctly not taken branch 561 | { 562 | fetch_weight = fetch_cycles; 563 | } 564 | 565 | Vertex prev_fetch_vertex(VertexType::InstrFetch, instrCount - 1); 566 | OutgoingEdge in_order_fetch(fetch_vertex, (int64_t)fetch_weight); 567 | //cout << "Good fetch" << endl; 568 | addEdge(prev_fetch_vertex, in_order_fetch); 569 | } 570 | 571 | // In-order dispatch 572 | if (!no_need_for_ino_dispatch) 573 | { 574 | Vertex prev_dispatch_vertex(VertexType::InstrDispatch, instrCount - 1); 575 | OutgoingEdge in_order_dispatch(dispatch_vertex, 0); 576 | //cout << "In-order dispatch" << endl; 577 | addEdge(prev_dispatch_vertex, in_order_dispatch); 578 | } 579 | 580 | // In-order issue 581 | if (!no_need_for_ino_issue) 582 | { 583 | Vertex prev_execute_vertex(VertexType::InstrExecute, instrCount - 1); 584 | OutgoingEdge in_order_issue(execute_vertex, 0); 585 | //cout << "In-order issue" << endl; 586 | addEdge(prev_execute_vertex, in_order_issue); 587 | } 588 | 589 | // In-order commit 590 | if (!no_need_for_ino_commit) 591 | { 592 | Vertex prev_commit_vertex(VertexType::InstrCommit, instrCount - 1); 593 | OutgoingEdge in_order_commit(commit_vertex, 0); 594 | //cout << "In-order commit" << endl; 595 | addEdge(prev_commit_vertex, in_order_commit); 596 | } 597 | } 598 | } 599 | 600 | void InorderCoreGraph::modelMemoryOrderConstraint(Vertex& mem_vertex, bool is_load, bool is_store) 601 | { 602 | // For loads, add an edge from the earliest load/store in the load/store window 603 | if (is_load) 604 | { 605 | pair earliest_ld_st = ldStWindow[ldStWindowPointer % maxMemAccesses]; 606 | uint64_t earliest_ld_st_num = earliest_ld_st.first; 607 | uint32_t earliest_ld_st_cycles = earliest_ld_st.second; 608 | if (earliest_ld_st_cycles != UINT32_MAX) 609 | { 610 | Vertex prev_mem_vertex(VertexType::MemExecute, earliest_ld_st_num); 611 | OutgoingEdge limited_mem(mem_vertex, (int64_t)earliest_ld_st_cycles); 612 | //cout << "Maximum memory access constraint: load " 613 | // << instrCount << " to load/store " << earliest_ld_st_num << endl; 614 | addEdge(prev_mem_vertex, limited_mem); 615 | } 616 | } 617 | 618 | // For stores, add an edge from all loads/stores in the load/store window 619 | if (is_store) 620 | { 621 | for (uint32_t i = 0; i < maxMemAccesses; ++i) 622 | { 623 | pair previous_ld_st = ldStWindow[i]; 624 | uint64_t previous_ld_st_num = previous_ld_st.first; 625 | uint32_t previous_ld_st_cycles = previous_ld_st.second; 626 | if (previous_ld_st_cycles != UINT32_MAX) 627 | { 628 | Vertex prev_mem_vertex(VertexType::MemExecute, previous_ld_st_num); 629 | OutgoingEdge limited_store(mem_vertex, (int64_t)previous_ld_st_cycles); 630 | //cout << "Store order constraint: store " 631 | // << instrCount << " to load/store " << previous_ld_st_num << endl; 632 | addEdge(prev_mem_vertex, limited_store); 633 | } 634 | } 635 | } 636 | 637 | // It seems that in gem5's MinorCPU, a load/store is never sent to the LSQ 638 | // before any previous instruction is completed in its functional unit pipeline. 639 | if ((lastLdStCriticalNum != UINT64_MAX) && 640 | ((instrCount - lastLdStCriticalNum) / issueBandwidth < lastLdStCriticalCycles)) 641 | { 642 | Vertex prev_mem_critical_vertex(VertexType::InstrExecute, lastLdStCriticalNum); 643 | OutgoingEdge mem_wait(mem_vertex, (int64_t)lastLdStCriticalCycles); 644 | //Vertex prev_mem_critical_vertex(VertexType::InstrCommit, lastLdStCriticalNum); 645 | //OutgoingEdge mem_wait(mem_vertex, 0); 646 | //cout << "Load/store " << instrCount << " must be sent to memory after completion of " 647 | // << lastLdStCriticalNum << endl; 648 | addEdge(prev_mem_critical_vertex, mem_wait); 649 | } 650 | } 651 | 652 | void InorderCoreGraph::trackDataDependencies(Instruction* instr, 653 | uint32_t source_independent_cycles, 654 | Vertex& execute_vertex, 655 | unordered_map& execute_parent) 656 | { 657 | // Check for data dependence through registers 658 | for (uint32_t i = 0; i < instr->regReadCount; ++i) 659 | { 660 | int reg_read = instr->regRead[i]; 661 | if (regLastWrittenBy.count(reg_read) != 0) 662 | { 663 | //cout << "Register data dependence: " << instrCount << " to " 664 | // << regLastWrittenBy[reg_read].first << endl; 665 | uint32_t weight; // Not differentiating between address and value registers for stores 666 | if (regLastWrittenByLoad[reg_read]) 667 | { 668 | weight = regLastWrittenBy[reg_read].second; 669 | if (loadDependentEarlyIssue) 670 | { 671 | if (weight > source_independent_cycles) 672 | { 673 | weight -= source_independent_cycles; 674 | } 675 | else 676 | { 677 | weight = 0; 678 | } 679 | 680 | if ((instrCount - regLastWrittenBy[reg_read].first) / issueBandwidth < weight) 681 | { 682 | Vertex prev_mem_vertex(VertexType::MemExecute, 683 | regLastWrittenBy[reg_read].first); 684 | OutgoingEdge dependence_edge(execute_vertex, (int64_t)weight); 685 | addEdge(prev_mem_vertex, dependence_edge); 686 | } 687 | } 688 | else 689 | { 690 | if ((instrCount - regLastWrittenBy[reg_read].first) / issueBandwidth < weight) 691 | { 692 | Vertex prev_commit_vertex(VertexType::InstrCommit, 693 | regLastWrittenBy[reg_read].first); 694 | OutgoingEdge dependence_edge(execute_vertex, 0); 695 | addEdge(prev_commit_vertex, dependence_edge); 696 | } 697 | } 698 | } 699 | else 700 | { 701 | if (regLastWrittenBy[reg_read].second > source_independent_cycles) 702 | { 703 | weight = regLastWrittenBy[reg_read].second - source_independent_cycles; 704 | } 705 | else 706 | { 707 | weight = 0; 708 | } 709 | if (execute_parent[regLastWrittenBy[reg_read].first] < weight) 710 | { 711 | execute_parent[regLastWrittenBy[reg_read].first] = weight; 712 | } 713 | } 714 | } 715 | } 716 | } 717 | 718 | // This is called "simple" because resource instances are assigned to instructions 719 | // in program order. Critical path information can be used in a more complex model, 720 | // where resource assignment can be done using the LRU method, i.e., the resource 721 | // instance which has the shortest critical path to its youngest user's execute_vertex 722 | // is chosen to be assigned to the current instruction. Moreover, stalls can be 723 | // detected using critical path information, which can be used to more accurately 724 | // model the structural hazard related to the limited pipeline length of a resource. 725 | // However, in our experiments, we found that this more complex model just slightly 726 | // improves the accuracy (with gem5 as the baseline). 727 | void InorderCoreGraph::modelResourceDependenciesSimple( 728 | bool is_int, bool is_int_mul, 729 | bool is_int_div, bool is_fp, 730 | bool is_load_store, Vertex& execute_vertex, 731 | unordered_map& execute_parent) 732 | { 733 | int type; 734 | if (is_int) 735 | { 736 | type = Resource::RscIntAlu; 737 | } 738 | else if (is_int_mul) 739 | { 740 | type = Resource::RscIntMul; 741 | } 742 | else if (is_int_div) 743 | { 744 | type = Resource::RscIntDiv; 745 | } 746 | else if (is_fp) 747 | { 748 | type = Resource::RscFpu; 749 | } 750 | else if (is_load_store) 751 | { 752 | type = Resource::RscLsu; 753 | } 754 | else 755 | { 756 | neededRsc[instrCount % AnalysisWindow].first = -1; 757 | return; 758 | } 759 | 760 | uint32_t instance; 761 | uint64_t previous_instr; 762 | uint32_t wait_cycles; 763 | uint64_t head_of_pipeline; 764 | scoreboard.scheduleResource(type, instrCount, 765 | instance, previous_instr, 766 | wait_cycles, head_of_pipeline); 767 | neededRsc[instrCount % AnalysisWindow].first = type; 768 | neededRsc[instrCount % AnalysisWindow].second = instance; 769 | 770 | if (previous_instr != UINT64_MAX) 771 | { 772 | //cout << "Resource dependence of " << instrCount 773 | // << " to " << previous_instr << endl; 774 | if (execute_parent[previous_instr] < wait_cycles) 775 | execute_parent[previous_instr] = wait_cycles; 776 | } 777 | 778 | if ((head_of_pipeline != UINT64_MAX) && 779 | (instrCount - head_of_pipeline < AnalysisWindow)) 780 | { 781 | //cout << "Limited pipeline length dependence: " << instrCount 782 | // << " to " << head_of_pipeline << endl; 783 | if (is_load_store) 784 | { 785 | Vertex prev_execute_vertex(VertexType::MemExecute, head_of_pipeline); 786 | OutgoingEdge pipeline_limit(execute_vertex, 0); 787 | addEdge(prev_execute_vertex, pipeline_limit); 788 | } 789 | else 790 | { 791 | Vertex prev_commit_vertex(VertexType::InstrCommit, head_of_pipeline); 792 | OutgoingEdge pipeline_limit(execute_vertex, 0); 793 | addEdge(prev_commit_vertex, pipeline_limit); 794 | } 795 | } 796 | 797 | if (is_load_store) 798 | { 799 | neededRsc[instrCount % AnalysisWindow].first = -1; // Will take care of this later 800 | } 801 | } 802 | 803 | void InorderCoreGraph::addEdge(Vertex& parent, OutgoingEdge& e) 804 | { 805 | // It may have been better to define this function with a 806 | // child vertex and an incoming edge. But in an earlier version 807 | // of addEdge, it was like this, and I left it this way. 808 | 809 | //printEdge(parent, e); 810 | 811 | int child_type = e.child.type; 812 | int parents_count = parents[child_type]; 813 | 814 | if (e.child.instrNum - parent.instrNum > AnalysisWindow) 815 | { 816 | printEdge(parent, e); 817 | CALIPERS_ERROR("The parent-child distance exceeds the window size"); 818 | } 819 | 820 | if (parents_count == MAX_PARENTS) 821 | { 822 | printEdge(parent, e); 823 | CALIPERS_ERROR("The vertex has the maximum number of parents"); 824 | } 825 | 826 | miniGraph[child_type][parents_count].parent = parent; 827 | miniGraph[child_type][parents_count].weight = e.weight; 828 | ++parents_count; 829 | parents[child_type] = parents_count; 830 | } 831 | 832 | void InorderCoreGraph::calculateInstructionCriticalPath() 833 | { 834 | for (int i = 0; i <= VertexType::Last; ++i) 835 | { 836 | Vertex child(i, instrCount); 837 | for (uint32_t j = 0; j < parents[i]; ++j) 838 | { 839 | Vertex& parent = miniGraph[i][j].parent; 840 | OutgoingEdge e(child, miniGraph[i][j].weight); 841 | updateCriticalPathCycles(parent, e); 842 | } 843 | parents[i] = 0; 844 | } 845 | } 846 | --------------------------------------------------------------------------------