├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── branch
├── bimodal.bpred
├── gshare.bpred
├── hashed_perceptron.bpred
└── perceptron.bpred
├── build_champsim.sh
├── cvp_tracer
├── README.md
└── cvp2champsim.cc
├── download.sh
├── download_links
├── get_stats.py
├── inc
├── block.h
├── cache.h
├── champsim.h
├── dram_controller.h
├── instruction.h
├── kpcp.h
├── memory_class.h
├── ooo_cpu.h
├── set.h
├── spp_dev.h
└── uncore.h
├── ml_prefetch_sim.py
├── model.py
├── prefetcher
├── bo.h
├── bo.llc_pref
├── from_file.llc_pref
├── ip_stride.l2c_pref
├── kpcp.l2c_pref
├── kpcp_util.cc
├── next_line.l1d_pref
├── next_line.l1i_pref
├── next_line.l2c_pref
├── next_line.llc_pref
├── no.l1d_pref
├── no.l1i_pref
├── no.l2c_pref
├── no.llc_pref
├── spp_dev.l2c_pref
└── trace.llc_pref
├── replacement
├── base_replacement.cc
├── drrip.llc_repl
├── lru.llc_repl
├── ship.llc_repl
└── srrip.llc_repl
├── run_4core.sh
├── run_champsim.sh
├── scripts
├── download_dpc3_traces.sh
├── dpc3_max_simpoint.txt
├── multiworkload.cc
└── seeds.txt
├── src
├── block.cc
├── cache.cc
├── dram_controller.cc
├── main.cc
├── ooo_cpu.cc
└── uncore.cc
└── tracer
├── champsim_tracer.cpp
├── clean_tracer.sh
├── make_tracer.sh
├── makefile
└── makefile.rules
/.gitignore:
--------------------------------------------------------------------------------
1 | prefetcher/l1i_prefetcher.cc
2 | prefetcher/l1d_prefetcher.cc
3 | prefetcher/l2c_prefetcher.cc
4 | prefetcher/llc_prefetcher.cc
5 | branch/branch_predictor.cc
6 | replacement/llc_replacement.cc
7 |
8 | inc/champsim.h.bak
9 |
10 | bin/
11 | obj/
12 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | app = champsim
2 |
3 | srcExt = cc
4 | srcDir = src branch replacement prefetcher
5 | objDir = obj
6 | binDir = bin
7 | inc = inc
8 |
9 | debug = 1
10 |
11 | CFlags = -Wall -O3 -std=c++11
12 | LDFlags =
13 | libs =
14 | libDir =
15 |
16 |
17 | #************************ DO NOT EDIT BELOW THIS LINE! ************************
18 |
19 | ifeq ($(debug),1)
20 | debug=-g
21 | else
22 | debug=
23 | endif
24 | inc := $(addprefix -I,$(inc))
25 | libs := $(addprefix -l,$(libs))
26 | libDir := $(addprefix -L,$(libDir))
27 | CFlags += -c $(debug) $(inc) $(libDir) $(libs)
28 | sources := $(shell find $(srcDir) -name '*.$(srcExt)')
29 | srcDirs := $(shell find . -name '*.$(srcExt)' -exec dirname {} \; | uniq)
30 | objects := $(patsubst %.$(srcExt),$(objDir)/%.o,$(sources))
31 |
32 | ifeq ($(srcExt),cc)
33 | CC = $(CXX)
34 | else
35 | CFlags += -std=gnu99
36 | endif
37 |
38 | .phony: all clean distclean
39 |
40 |
41 | all: $(binDir)/$(app)
42 |
43 | $(binDir)/$(app): buildrepo $(objects)
44 | @mkdir -p `dirname $@`
45 | @echo "Linking $@..."
46 | @$(CC) $(objects) $(LDFlags) -o $@
47 |
48 | $(objDir)/%.o: %.$(srcExt)
49 | @echo "Generating dependencies for $<..."
50 | @$(call make-depend,$<,$@,$(subst .o,.d,$@))
51 | @echo "Compiling $<..."
52 | @$(CC) $(CFlags) $< -o $@
53 |
54 | clean:
55 | $(RM) -r $(objDir)
56 |
57 | distclean: clean
58 | $(RM) -r $(binDir)/$(app)
59 |
60 | buildrepo:
61 | @$(call make-repo)
62 |
63 | define make-repo
64 | for dir in $(srcDirs); \
65 | do \
66 | mkdir -p $(objDir)/$$dir; \
67 | done
68 | endef
69 |
70 |
71 | # usage: $(call make-depend,source-file,object-file,depend-file)
72 | define make-depend
73 | $(CC) -MM \
74 | -MF $3 \
75 | -MP \
76 | -MT $2 \
77 | $(CFlags) \
78 | $1
79 | endef
80 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Modified ChampSim for ML Prefetching Competition
2 |
3 | We will use ChampSim to evaluate the effectiveness of your ML prefetchers. You
4 | prefetching models will be trained using the Load Traces that we provide (details below),
5 | and they will generate an Ouput File with a list of prefetches that will be fed back into
6 | ChampSim to compute coverage, accuracy and instructions per cycle (IPC).
7 |
8 | ## Traces:
9 |
10 | The traces can be found at [this link](https://utexas.box.com/s/2k54kp8zvrqdfaa8cdhfquvcxwh7yn85).
11 | Alternatively, the `download.sh` file can be used to download all of the files to
12 | avoid bulk download restrictions from Box. You can also use the information found
13 | in the `download_links` file to download the data in another fashion.
14 |
15 | There are two types of traces that can be found here:
16 | - Load traces under the folder LoadTraces that you will use to train your ML models. The
17 | load trace is a series of program's LLC accesses, and the trace format is as follows:
18 | ```
19 | Unique Instr Id, Cycle Count, Load Address, Instruction Pointer of the Load, LLC hit/miss
20 | ```
21 | The load traces are plain text CSV.
22 |
23 | - Execution traces under the folder ChampSimTraces that ChampSim will need to
24 | compute IPC. You do not need these traces to train your models, they are
25 | only provided to facilitate an evaluation using IPCs. Note that you do not
26 | unzip execution traces as ChampSim expects it to be in the zipped format.
27 |
28 | ## Output File
29 |
30 | For a given Load Trace, your code should generate an output file that contains one
31 | prefetch per line. Each line should consist of two space-separated integral
32 | values, the unique instruction ID for which you want to issue a prefetch and the
33 | load address you want to prefetch. The unique instruction ID corresponds to
34 | the ID of the triggering load in the input Load Trace. You can include up to two
35 | prefetches per load listed in the Load Trace. You can choose not to prefetch
36 | for a load. Note that the prefetches should be in the order that they occur in the trace.
37 | Should you exceed the maximum number of prefetches per load, the first two will
38 | be kept and the remaining excess prefetches for that load will be discarded.
39 |
40 | For example, consider a Load Trace as follows:
41 | ```
42 | 3659 cycle1 A ip1 1
43 | 5433 cycle2 B ip2 0
44 | 6928 cycle3 C ip3 0
45 | ```
46 |
47 | Your output file could look something like this:
48 | ```
49 | 3659 A+1 # Issue first prefetch for Instruction 3569
50 | 3659 A+2 # Issue second prefetch for Instruction 3569
51 | 5433 B+8 # Issue only one prefetch for Instruction 5433
52 | ```
53 |
54 | ## Your Code:
55 |
56 | Your code should have two modes of functioning:
57 |
58 | 1. Taking in a Training Load Trace that your model trains on
59 | 2. Taking in a Test Load Trace for which your model will produce predictions in
60 | the format explained above.
61 |
62 | ## Building, Running, and Evaluating
63 |
64 | This has been rolled into one script `ml_prefetch_sim.py`. Below there are some
65 | common use cases highlighted, but more information can be found for each of the
66 | subcommands by running:
67 |
68 | ```
69 | ./ml_prefetch_sim.py help subcommand
70 | ```
71 |
72 | where subcommand is any of `build|run|eval`
73 |
74 | ### Building
75 |
76 | The following command will compile two ChampSim binaries: (1) A ChampSim binary
77 | that reads your ML model's output from a file and uses that as a prefetcher,
78 | and (2) A ChampSim binary with no prefetching that is to be used as a baseline
79 |
80 | ```
81 | ./ml_prefetch_sim.py build
82 | ```
83 |
84 | ### Training
85 |
86 | ```
87 | ./ml_prefetch_sim.py train path_to_load_trace --model save_path --num-prefetch-warmup-instructions num_in_millions
88 | ```
89 |
90 | To use the above, you need to modify the `model.py` file with your model. The
91 | prefetch warm-up instructions specify how many to include in the training set.
92 | The remainder of the instructions are the evaluation set.
93 |
94 | ### Generating the Prefetch File
95 |
96 | ```
97 | ./ml_prefetch_sim.py generate path_to_load_trace path_to_output_prefetch_file --model save_path --num-prefetch-warmup-instructions num_in_millions
98 | ```
99 |
100 | To use the above, you need to modify the `model.py` file with your model. The
101 | prefetch warm-up instructions specify how many to include in the training set.
102 | The remainder of the instructions are the evaluation set.
103 |
104 | ### Running
105 |
106 | To run the baseline ChampSim binaries on an execution trace:
107 |
108 | ```
109 | ./ml_prefetch_sim.py run path_to_champsim_trace_here
110 | ```
111 |
112 | To additionally run the ChampSim binary with your prefetcher:
113 |
114 | ```
115 | ./ml_prefetch_sim.py run path_to_champsim_trace_here --prefetch path_to_prefetcher_file
116 | ```
117 |
118 | To run the ChampSim binary with your prefetcher only:
119 |
120 | ```
121 | ./ml_prefetch_sim.py run path_to_trace_here --prefetch path_to_prefetcher_file --no-base
122 | ```
123 |
124 | ### Evaluation
125 |
126 | To evaluate the performance of ML prefetcher (and compare it against the baseline
127 | of no prefetcher, Best Offset, SISB, and SISB Best Offset), run:
128 |
129 | ```
130 | ./ml_prefetch_sim.py eval
131 | ```
132 |
133 | ## Competition Judging
134 |
135 | To test how submissions generalize, our test set evaluation will have two components:
136 |
137 | - Undisclosed execution samples for the training traces: You can submit a
138 | pre-trained model for each benchmark in the training set, and we will
139 | evaluate it on a different sample of the same benchmark
140 |
141 | - Undisclosed benchmarks: We will train and test your model on unseen
142 | benchmarks using the training routines that you provide
143 |
144 | ## Changes made to ChampSim for the competition:
145 |
146 | - Add LLC prefetcher (from\_file) to load ML model prefetch predictions into ChampSim
147 | - Modify the LLC prefetcher to provide unique instruction IDs and cycle counts
148 | - Remove same-page restriction in src/cache.cc for more irregular prefetching
149 | opportunity
150 | - Add ml\_prefetch\_sim.py to handle all of the building, running, and evaluation.
151 |
152 | ---
153 |
154 |
155 |
ChampSim
156 | ChampSim is a trace-based simulator for a microarchitecture study. You can sign up to the public mailing list by sending an empty mail to champsim+subscribe@googlegroups.com. Traces for the 3rd Data Prefetching Championship (DPC-3) can be found from here (https://dpc3.compas.cs.stonybrook.edu/?SW_IS). A set of traces used for the 2nd Cache Replacement Championship (CRC-2) can be found from this link. (http://bit.ly/2t2nkUj)
157 |
158 |
159 | # Clone ChampSim repository
160 | ```
161 | git clone https://github.com/ChampSim/ChampSim.git
162 | ```
163 |
164 | # Compile
165 |
166 | ChampSim takes five parameters: Branch predictor, L1D prefetcher, L2C prefetcher, LLC replacement policy, and the number of cores.
167 | For example, `./build_champsim.sh bimodal no no lru 1` builds a single-core processor with bimodal branch predictor, no L1/L2 data prefetchers, and the baseline LRU replacement policy for the LLC.
168 | ```
169 | $ ./build_champsim.sh bimodal no no no no lru 1
170 |
171 | $ ./build_champsim.sh ${BRANCH} ${L1I_PREFETCHER} ${L1D_PREFETCHER} ${L2C_PREFETCHER} ${LLC_PREFETCHER} ${LLC_REPLACEMENT} ${NUM_CORE}
172 | ```
173 |
174 | # Download DPC-3 trace
175 |
176 | Professor Daniel Jimenez at Texas A&M University kindly provided traces for DPC-3. Use the following script to download these traces (~20GB size and max simpoint only).
177 | ```
178 | $ cd scripts
179 |
180 | $ ./download_dpc3_traces.sh
181 | ```
182 |
183 | # Run simulation
184 |
185 | Execute `run_champsim.sh` with proper input arguments. The default `TRACE_DIR` in `run_champsim.sh` is set to `$PWD/dpc3_traces`.
186 |
187 | * Single-core simulation: Run simulation with `run_champsim.sh` script.
188 |
189 | ```
190 | Usage: ./run_champsim.sh [BINARY] [N_WARM] [N_SIM] [TRACE] [OPTION]
191 | $ ./run_champsim.sh bimodal-no-no-no-no-lru-1core 1 10 400.perlbench-41B.champsimtrace.xz
192 |
193 | ${BINARY}: ChampSim binary compiled by "build_champsim.sh" (bimodal-no-no-lru-1core)
194 | ${N_WARM}: number of instructions for warmup (1 million)
195 | ${N_SIM}: number of instructinos for detailed simulation (10 million)
196 | ${TRACE}: trace name (400.perlbench-41B.champsimtrace.xz)
197 | ${OPTION}: extra option for "-low_bandwidth" (src/main.cc)
198 | ```
199 | Simulation results will be stored under "results_${N_SIM}M" as a form of "${TRACE}-${BINARY}-${OPTION}.txt".
200 |
201 | * Multi-core simulation: Run simulation with `run_4core.sh` script.
202 | ```
203 | Usage: ./run_4core.sh [BINARY] [N_WARM] [N_SIM] [N_MIX] [TRACE0] [TRACE1] [TRACE2] [TRACE3] [OPTION]
204 | $ ./run_4core.sh bimodal-no-no-no-lru-4core 1 10 0 400.perlbench-41B.champsimtrace.xz \\
205 | 401.bzip2-38B.champsimtrace.xz 403.gcc-17B.champsimtrace.xz 410.bwaves-945B.champsimtrace.xz
206 | ```
207 | Note that we need to specify multiple trace files for `run_4core.sh`. `N_MIX` is used to represent a unique ID for mixed multi-programmed workloads.
208 |
209 |
210 | # Add your own branch predictor, data prefetchers, and replacement policy
211 | **Copy an empty template**
212 | ```
213 | $ cp branch/branch_predictor.cc branch/mybranch.bpred
214 | $ cp prefetcher/l1d_prefetcher.cc prefetcher/mypref.l1d_pref
215 | $ cp prefetcher/l2c_prefetcher.cc prefetcher/mypref.l2c_pref
216 | $ cp prefetcher/llc_prefetcher.cc prefetcher/mypref.llc_pref
217 | $ cp replacement/llc_replacement.cc replacement/myrepl.llc_repl
218 | ```
219 |
220 | **Work on your algorithms with your favorite text editor**
221 | ```
222 | $ vim branch/mybranch.bpred
223 | $ vim prefetcher/mypref.l1d_pref
224 | $ vim prefetcher/mypref.l2c_pref
225 | $ vim prefetcher/mypref.llc_pref
226 | $ vim replacement/myrepl.llc_repl
227 | ```
228 |
229 | **Compile and test**
230 | ```
231 | $ ./build_champsim.sh mybranch mypref mypref mypref myrepl 1
232 | $ ./run_champsim.sh mybranch-mypref-mypref-mypref-myrepl-1core 1 10 bzip2_183B
233 | ```
234 |
235 | # How to create traces
236 |
237 | We have included only 4 sample traces, taken from SPEC CPU 2006. These
238 | traces are short (10 million instructions), and do not necessarily cover the range of behaviors your
239 | replacement algorithm will likely see in the full competition trace list (not
240 | included). We STRONGLY recommend creating your own traces, covering
241 | a wide variety of program types and behaviors.
242 |
243 | The included Pin Tool champsim_tracer.cpp can be used to generate new traces.
244 | We used Pin 3.2 (pin-3.2-81205-gcc-linux), and it may require
245 | installing libdwarf.so, libelf.so, or other libraries, if you do not already
246 | have them. Please refer to the Pin documentation (https://software.intel.com/sites/landingpage/pintool/docs/81205/Pin/html/)
247 | for working with Pin 3.2.
248 |
249 | Get this version of Pin:
250 | ```
251 | wget http://software.intel.com/sites/landingpage/pintool/downloads/pin-3.2-81205-gcc-linux.tar.gz
252 | ```
253 |
254 | **Note on compatibility**: If you are using newer linux kernels/Ubuntu versions (eg. 20.04LTS), you might run into issues (such as [[1](https://github.com/ChampSim/ChampSim/issues/102)],[[2](https://stackoverflow.com/questions/55698095/intel-pin-tools-32-bit-processsectionheaders-560-assertion-failed)],[[3](https://stackoverflow.com/questions/43589174/pin-tool-segmentation-fault-for-ubuntu-17-04)]) with the PIN3.2. ChampSim tracer works fine with newer PIN tool versions that can be downloaded from [here](https://software.intel.com/content/www/us/en/develop/articles/pin-a-binary-instrumentation-tool-downloads.html). PIN3.17 is [confirmed](https://github.com/ChampSim/ChampSim/issues/102) to work with Ubuntu 20.04.1 LTS.
255 |
256 | Once downloaded, open tracer/make_tracer.sh and change PIN_ROOT to Pin's location.
257 | Run ./make_tracer.sh to generate champsim_tracer.so.
258 |
259 | **Use the Pin tool like this**
260 | ```
261 | pin -t obj-intel64/champsim_tracer.so --
262 | ```
263 |
264 | The tracer has three options you can set:
265 | ```
266 | -o
267 | Specify the output file for your trace.
268 | The default is default_trace.champsim
269 |
270 | -s
271 | Specify the number of instructions to skip in the program before tracing begins.
272 | The default value is 0.
273 |
274 | -t
275 | The number of instructions to trace, after -s instructions have been skipped.
276 | The default value is 1,000,000.
277 | ```
278 | For example, you could trace 200,000 instructions of the program ls, after
279 | skipping the first 100,000 instructions, with this command:
280 | ```
281 | pin -t obj/champsim_tracer.so -o traces/ls_trace.champsim -s 100000 -t 200000 -- ls
282 | ```
283 | Traces created with the champsim_tracer.so are approximately 64 bytes per instruction,
284 | but they generally compress down to less than a byte per instruction using xz compression.
285 |
286 | # Evaluate Simulation
287 |
288 | ChampSim measures the IPC (Instruction Per Cycle) value as a performance metric.
289 | There are some other useful metrics printed out at the end of simulation.
290 |
291 | Good luck and be a champion!
292 |
--------------------------------------------------------------------------------
/branch/bimodal.bpred:
--------------------------------------------------------------------------------
1 | #include "ooo_cpu.h"
2 |
3 | #define BIMODAL_TABLE_SIZE 16384
4 | #define BIMODAL_PRIME 16381
5 | #define MAX_COUNTER 3
6 | int bimodal_table[NUM_CPUS][BIMODAL_TABLE_SIZE];
7 |
8 | void O3_CPU::initialize_branch_predictor()
9 | {
10 | cout << "CPU " << cpu << " Bimodal branch predictor" << endl;
11 |
12 | for(int i = 0; i < BIMODAL_TABLE_SIZE; i++)
13 | bimodal_table[cpu][i] = 0;
14 | }
15 |
16 | uint8_t O3_CPU::predict_branch(uint64_t ip)
17 | {
18 | uint32_t hash = ip % BIMODAL_PRIME;
19 | uint8_t prediction = (bimodal_table[cpu][hash] >= ((MAX_COUNTER + 1)/2)) ? 1 : 0;
20 |
21 | return prediction;
22 | }
23 |
24 | void O3_CPU::last_branch_result(uint64_t ip, uint8_t taken)
25 | {
26 | uint32_t hash = ip % BIMODAL_PRIME;
27 |
28 | if (taken && (bimodal_table[cpu][hash] < MAX_COUNTER))
29 | bimodal_table[cpu][hash]++;
30 | else if ((taken == 0) && (bimodal_table[cpu][hash] > 0))
31 | bimodal_table[cpu][hash]--;
32 | }
33 |
--------------------------------------------------------------------------------
/branch/gshare.bpred:
--------------------------------------------------------------------------------
1 | #include "ooo_cpu.h"
2 |
3 | #define GLOBAL_HISTORY_LENGTH 14
4 | #define GLOBAL_HISTORY_MASK (1 << GLOBAL_HISTORY_LENGTH) - 1
5 | int branch_history_vector[NUM_CPUS];
6 |
7 | #define GS_HISTORY_TABLE_SIZE 16384
8 | int gs_history_table[NUM_CPUS][GS_HISTORY_TABLE_SIZE];
9 | int my_last_prediction[NUM_CPUS];
10 |
11 | void O3_CPU::initialize_branch_predictor()
12 | {
13 | cout << "CPU " << cpu << " GSHARE branch predictor" << endl;
14 |
15 | branch_history_vector[cpu] = 0;
16 | my_last_prediction[cpu] = 0;
17 |
18 | for(int i=0; i>GLOBAL_HISTORY_LENGTH)^(ip>>(GLOBAL_HISTORY_LENGTH*2))^bh_vector;
25 | hash = hash%GS_HISTORY_TABLE_SIZE;
26 |
27 | //printf("%d\n", hash);
28 |
29 | return hash;
30 | }
31 |
32 | uint8_t O3_CPU::predict_branch(uint64_t ip)
33 | {
34 | int prediction = 1;
35 |
36 | int gs_hash = gs_table_hash(ip, branch_history_vector[cpu]);
37 |
38 | if(gs_history_table[cpu][gs_hash] >= 2)
39 | prediction = 1;
40 | else
41 | prediction = 0;
42 |
43 | my_last_prediction[cpu] = prediction;
44 |
45 | return prediction;
46 | }
47 |
48 | void O3_CPU::last_branch_result(uint64_t ip, uint8_t taken)
49 | {
50 | int gs_hash = gs_table_hash(ip, branch_history_vector[cpu]);
51 |
52 | if(taken == 1) {
53 | if(gs_history_table[cpu][gs_hash] < 3)
54 | gs_history_table[cpu][gs_hash]++;
55 | } else {
56 | if(gs_history_table[cpu][gs_hash] > 0)
57 | gs_history_table[cpu][gs_hash]--;
58 | }
59 |
60 | // update branch history vector
61 | branch_history_vector[cpu] <<= 1;
62 | branch_history_vector[cpu] &= GLOBAL_HISTORY_MASK;
63 | branch_history_vector[cpu] |= taken;
64 | }
65 |
--------------------------------------------------------------------------------
/branch/hashed_perceptron.bpred:
--------------------------------------------------------------------------------
1 | /*
2 |
3 | This code implements a hashed perceptron branch predictor using geometric
4 | history lengths and dynamic threshold setting.
5 |
6 | It was written by Daniel A. Jiménez in March 2019. To the extent allowed by
7 | law, the author abdicates all rights to this work and places it in the public
8 | domain.
9 |
10 | The original perceptron branch predictor is from Jiménez and Lin, "Dynamic
11 | Branch Prediction with Perceptrons," HPCA 2001.
12 |
13 | The idea of using multiple independently indexed tables of perceptron weights
14 | is from Jiménez, "Fast Path-Based Neural Branch Prediction," MICRO 2003 and
15 | later expanded in "Piecewise Linear Branch Prediction" from ISCA 2005.
16 |
17 | The idea of using hashes of branch history to reduce the number of independent
18 | tables is documented in three contemporaneous papers:
19 |
20 | 1. Seznec, "Revisiting the Perceptron Predictor," IRISA technical report, 2004.
21 |
22 | 2. Tarjan and Skadron, "Revisiting the Perceptron Predictor Again," UVA
23 | technical report, 2004, expanded and published in ACM TACO 2005 as "Merging
24 | path and gshare indexing in perceptron branch prediction"; introduces the term
25 | "hashed perceptron."
26 |
27 | 3. Loh and Jiménez, "Reducing the Power and Complexity of Path-Based Neural
28 | Branch Prediction," WCED 2005.
29 |
30 | The ideas of using "geometric history lengths" i.e. hashing into tables with
31 | histories of exponentially increasing length, as well as dynamically adjusting
32 | the theta parameter, are from Seznec, "The O-GEHL Branch Predictor," from CBP
33 | 2004, expanded later as "Analysis of the O-GEometric History Length Branch
34 | Predictor" in ISCA 2005.
35 |
36 | This code uses these ideas, but prefers simplicity over absolute accuracy (I
37 | wrote it in about an hour and later spent more time on this comment block than
38 | I did on the code). These papers and subsequent papers by Jiménez and other
39 | authors significantly improve the accuracy of perceptron-based predictors but
40 | involve tricks and analysis beyond the needs of a tool like ChampSim that
41 | targets cache optimizations. If you want accuracy at any cost, see the winners
42 | of the latest branch prediction contest, CBP 2016 as of this writing, but
43 | prepare to have your face melted off by the complexity of the code you find
44 | there. If you are a student being asked to code a good branch predictor for
45 | your computer architecture class, don't copy this code; there are much better
46 | sources for you to plagiarize.
47 |
48 | */
49 |
50 | #include
51 | #include
52 | #include
53 | #include
54 |
55 | #include "ooo_cpu.h"
56 |
57 | // this many tables
58 |
59 | #define NTABLES 16
60 |
61 | // maximum history length
62 |
63 | #define MAXHIST 232
64 |
65 | // minimum history length (for table 1; table 0 is biases)
66 |
67 | #define MINHIST 3
68 |
69 | // speed for dynamic threshold setting
70 |
71 | #define SPEED 18
72 |
73 | // geometric global history lengths
74 |
75 | int history_lengths[NTABLES] = { 0, 3, 4, 6, 8, 10, 14, 19, 26, 36, 49, 67, 91, 125, 170, MAXHIST };
76 |
77 | // 12-bit indices for the tables
78 |
79 | #define LOG_TABLE_SIZE 12
80 | #define TABLE_SIZE (1<= 1;
174 | }
175 |
176 | void O3_CPU::last_branch_result(uint64_t pc, uint8_t taken) {
177 |
178 | // was this prediction correct?
179 |
180 | bool correct = taken == (yout[cpu] >= 1);
181 |
182 | // insert this branch outcome into the global history
183 |
184 | bool b = taken;
185 | for (int i=0; i -128) (*c)--;
217 | }
218 | }
219 |
220 | // dynamic threshold setting from Seznec's O-GEHL paper
221 |
222 | if (!correct) {
223 |
224 | // increase theta after enough mispredictions
225 |
226 | tc[cpu]++;
227 | if (tc[cpu] >= SPEED) {
228 | theta[cpu]++;
229 | tc[cpu] = 0;
230 | }
231 | } else if (a < theta[cpu]) {
232 |
233 | // decrease theta after enough weak but correct predictions
234 |
235 | tc[cpu]--;
236 | if (tc[cpu] <= -SPEED) {
237 | theta[cpu]--;
238 | tc[cpu] = 0;
239 | }
240 | }
241 | }
242 | }
243 |
--------------------------------------------------------------------------------
/branch/perceptron.bpred:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2001 University of Texas at Austin
3 | *
4 | * Daniel A. Jimenez
5 | * Calvin Lin
6 | *
7 | * Permission is hereby granted, free of charge, to any person
8 | * obtaining a copy of this software (the "Software"), to deal in
9 | * the Software without restriction, including without limitation
10 | * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 | * and/or sell copies of the Software, and to permit persons to whom the
12 | * Software is furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be
15 | * included in all copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT
21 | * AUSTIN BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
22 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
23 | * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | * THE SOFTWARE.
25 | *
26 | * This file implements the simulated perceptron branch predictor from:
27 | *
28 | * Jimenez, D. A. & Lin, C., Dynamic branch prediction with perceptrons,
29 | * Proceedings of the Seventh International Symposium on High Performance
30 | * Computer Architecture (HPCA), Monterrey, NL, Mexico 2001
31 | *
32 | * The #define's here specify a perceptron predictor with a history
33 | * length of 24, 163 perceptrons, and 8-bit weights. This represents
34 | * a hardware budget of (24+1)*8*163 = 32600 bits, or about 4K bytes,
35 | * which is comparable to the hardware budget of the Alpha 21264 hybrid
36 | * branch predictor.
37 | *
38 | * There are three important functions defined in this file:
39 | *
40 | * 1. void initialize_perceptron_predictor (void);
41 | * Initialize the perceptron predictor
42 | *
43 | * 2. perceptron_state *perceptron_dir_lookup (unsigned int);
44 | * Get a branch prediction, given a branch address. This function returns a
45 | * pointer to a 'perceptron_state' struct, which contains the prediction, the
46 | * perceptron output, and other information necessary for using and updating
47 | * the predictor. The first member of a 'perceptron_state' struct is a char
48 | * that is assigned 3 if the branch is predicted taken, 0 otherwise; this way,
49 | * a pointer to 'perceptron_state' can be cast to (char *) and passed around
50 | * SimpleScalar as though it were a pointer to a pattern history table entry.
51 | *
52 | * 3. void perceptron_update (perceptron_state *, int);
53 | * Update the branch predictor using the 'perceptron_state' pointer
54 | * returned by perceptron_dir_lookup() and an int that is 1 if the branch
55 | * was taken, 0 otherwise.
56 | */
57 |
58 | #include "ooo_cpu.h"
59 |
60 | /* history length for the global history shift register */
61 |
62 | #define PERCEPTRON_HISTORY 24
63 |
64 | /* number of perceptrons */
65 |
66 | #define NUM_PERCEPTRONS 163
67 |
68 | /* number of bits per weight */
69 |
70 | #define PERCEPTRON_BITS 8
71 |
72 | /* maximum and minimum weight values */
73 |
74 | #define MAX_WEIGHT ((1<<(PERCEPTRON_BITS-1))-1)
75 | #define MIN_WEIGHT (-(MAX_WEIGHT+1))
76 |
77 | /* threshold for training */
78 |
79 | #define THETA ((int) (1.93 * PERCEPTRON_HISTORY + 14))
80 |
81 | /* size of buffer for keeping 'perceptron_state' for update */
82 |
83 | #define NUM_UPDATE_ENTRIES 100
84 |
85 | /* perceptron data structure */
86 |
87 | typedef struct {
88 | int
89 | /* just a vector of integers */
90 |
91 | weights[PERCEPTRON_HISTORY+1];
92 | } perceptron;
93 |
94 | /* 'perceptron_state' - stores the branch prediction and keeps information
95 | * such as output and history needed for updating the perceptron predictor
96 | */
97 | typedef struct {
98 | char
99 | /* this char emulates a pattern history table entry
100 | * with a value of 0 for "predict not taken" or 3 for
101 | * "predict taken," so a perceptron_state pointer can
102 | * be passed around SimpleScalar's branch prediction
103 | * infrastructure without changing too much stuff.
104 | */
105 | dummy_counter;
106 |
107 | int
108 | /* prediction: 1 for taken, 0 for not taken */
109 |
110 | prediction,
111 |
112 | /* perceptron output */
113 |
114 | output;
115 |
116 | unsigned long long int
117 | /* value of the history register yielding this prediction */
118 |
119 | history;
120 |
121 | perceptron
122 | /* pointer to the perceptron yielding this prediction */
123 |
124 | *perc;
125 | } perceptron_state;
126 |
127 | perceptron
128 | /* table of perceptrons */
129 |
130 | perceptrons[NUM_CPUS][NUM_PERCEPTRONS];
131 |
132 | perceptron_state
133 | /* state for updating perceptron predictor */
134 |
135 | perceptron_state_buf[NUM_CPUS][NUM_UPDATE_ENTRIES];
136 |
137 | int
138 | /* index of the next "free" perceptron_state */
139 |
140 | perceptron_state_buf_ctr[NUM_CPUS];
141 |
142 | unsigned long long int
143 |
144 | /* speculative global history - updated by predictor */
145 |
146 | spec_global_history[NUM_CPUS],
147 |
148 | /* real global history - updated when the predictor is updated */
149 |
150 | global_history[NUM_CPUS];
151 |
152 | perceptron_state *u[NUM_CPUS];
153 |
154 | /* initialize a single perceptron */
155 | void initialize_perceptron (perceptron *p) {
156 | int i;
157 |
158 | for (i=0; i<=PERCEPTRON_HISTORY; i++) p->weights[i] = 0;
159 | }
160 |
161 | void O3_CPU::initialize_branch_predictor()
162 | {
163 | spec_global_history[cpu] = 0;
164 | global_history[cpu] = 0;
165 | perceptron_state_buf_ctr[cpu] = 0;
166 | for (int i=0; i= NUM_UPDATE_ENTRIES)
190 | perceptron_state_buf_ctr[cpu] = 0;
191 |
192 | /* hash the address to get an index into the table of perceptrons */
193 |
194 | index = address % NUM_PERCEPTRONS;
195 |
196 | /* get pointers to that perceptron and its weights */
197 |
198 | p = &perceptrons[cpu][index];
199 | w = &p->weights[0];
200 |
201 | /* initialize the output to the bias weight, and bump the pointer
202 | * to the weights
203 | */
204 |
205 | output = *w++;
206 |
207 | /* find the (rest of the) dot product of the history register
208 | * and the perceptron weights. note that, instead of actually
209 | * doing the expensive multiplies, we simply add a weight when the
210 | * corresponding branch in the history register is taken, or
211 | * subtract a weight when the branch is not taken. this also lets
212 | * us use binary instead of bipolar logic to represent the history
213 | * register
214 | */
215 | for (mask=1,i=0; ioutput = output;
225 | u[cpu]->perc = p;
226 | u[cpu]->history = spec_global_history[cpu];
227 | u[cpu]->prediction = output >= 0;
228 | u[cpu]->dummy_counter = u[cpu]->prediction ? 3 : 0;
229 |
230 | /* update the speculative global history register */
231 |
232 | spec_global_history[cpu] <<= 1;
233 | spec_global_history[cpu] |= u[cpu]->prediction;
234 | return u[cpu]->prediction;
235 | }
236 |
237 | void O3_CPU::last_branch_result(uint64_t ip, uint8_t taken)
238 | {
239 | int
240 | i,
241 | y,
242 | *w;
243 |
244 | unsigned long long int
245 | mask,
246 | history;
247 |
248 | /* update the real global history shift register */
249 |
250 | global_history[cpu] <<= 1;
251 | global_history[cpu] |= taken;
252 |
253 | /* if this branch was mispredicted, restore the speculative
254 | * history to the last known real history
255 | */
256 |
257 | if (u[cpu]->prediction != taken) spec_global_history[cpu] = global_history[cpu];
258 |
259 | /* if the output of the perceptron predictor is outside of
260 | * the range [-THETA,THETA] *and* the prediction was correct,
261 | * then we don't need to adjust the weights
262 | */
263 |
264 | if (u[cpu]->output > THETA)
265 | y = 1;
266 | else if (u[cpu]->output < -THETA)
267 | y = 0;
268 | else
269 | y = 2;
270 | if (y == 1 && taken) return;
271 | if (y == 0 && !taken) return;
272 |
273 | /* w is a pointer to the first weight (the bias weight) */
274 |
275 | w = &u[cpu]->perc->weights[0];
276 |
277 | /* if the branch was taken, increment the bias weight,
278 | * else decrement it, with saturating arithmetic
279 | */
280 |
281 | if (taken)
282 | (*w)++;
283 | else
284 | (*w)--;
285 | if (*w > MAX_WEIGHT) *w = MAX_WEIGHT;
286 | if (*w < MIN_WEIGHT) *w = MIN_WEIGHT;
287 |
288 | /* now w points to the next weight */
289 |
290 | w++;
291 |
292 | /* get the history that led to this prediction */
293 |
294 | history = u[cpu]->history;
295 |
296 | /* for each weight and corresponding bit in the history register... */
297 |
298 | for (mask=1,i=0; i !!x is 1 iff x is not zero, in this case history is positively correlated with branch outcome
306 | (*w)++;
307 | if (*w > MAX_WEIGHT) *w = MAX_WEIGHT;
308 | } else {
309 | (*w)--;
310 | if (*w < MIN_WEIGHT) *w = MIN_WEIGHT;
311 | }
312 | }
313 | }
314 |
--------------------------------------------------------------------------------
/build_champsim.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [ "$#" -ne 7 ]; then
4 | echo "Illegal number of parameters"
5 | echo "Usage: ./build_champsim.sh [branch_pred] [l1d_pref] [l2c_pref] [llc_pref] [llc_repl] [num_core]"
6 | exit 1
7 | fi
8 |
9 | # ChampSim configuration
10 | BRANCH=$1 # branch/*.bpred
11 | L1I_PREFETCHER=$2 # prefetcher/*.l1i_pref
12 | L1D_PREFETCHER=$3 # prefetcher/*.l1d_pref
13 | L2C_PREFETCHER=$4 # prefetcher/*.l2c_pref
14 | LLC_PREFETCHER=$5 # prefetcher/*.llc_pref
15 | LLC_REPLACEMENT=$6 # replacement/*.llc_repl
16 | NUM_CORE=$7 # tested up to 8-core system
17 |
18 | ############## Some useful macros ###############
19 | BOLD=$(tput bold)
20 | NORMAL=$(tput sgr0)
21 | #################################################
22 |
23 | # Sanity check
24 | if [ ! -f ./branch/${BRANCH}.bpred ]; then
25 | echo "[ERROR] Cannot find branch predictor"
26 | echo "[ERROR] Possible branch predictors from branch/*.bpred "
27 | find branch -name "*.bpred"
28 | exit 1
29 | fi
30 |
31 | if [ ! -f ./prefetcher/${L1I_PREFETCHER}.l1i_pref ]; then
32 | echo "[ERROR] Cannot find L1I prefetcher"
33 | echo "[ERROR] Possible L1I prefetchers from prefetcher/*.l1i_pref "
34 | find prefetcher -name "*.l1i_pref"
35 | exit 1
36 | fi
37 |
38 | if [ ! -f ./prefetcher/${L1D_PREFETCHER}.l1d_pref ]; then
39 | echo "[ERROR] Cannot find L1D prefetcher"
40 | echo "[ERROR] Possible L1D prefetchers from prefetcher/*.l1d_pref "
41 | find prefetcher -name "*.l1d_pref"
42 | exit 1
43 | fi
44 |
45 | if [ ! -f ./prefetcher/${L2C_PREFETCHER}.l2c_pref ]; then
46 | echo "[ERROR] Cannot find L2C prefetcher"
47 | echo "[ERROR] Possible L2C prefetchers from prefetcher/*.l2c_pref "
48 | find prefetcher -name "*.l2c_pref"
49 | exit 1
50 | fi
51 |
52 | if [ ! -f ./prefetcher/${LLC_PREFETCHER}.llc_pref ]; then
53 | echo "[ERROR] Cannot find LLC prefetcher"
54 | echo "[ERROR] Possible LLC prefetchers from prefetcher/*.llc_pref "
55 | find prefetcher -name "*.llc_pref"
56 | exit 1
57 | fi
58 |
59 | if [ ! -f ./replacement/${LLC_REPLACEMENT}.llc_repl ]; then
60 | echo "[ERROR] Cannot find LLC replacement policy"
61 | echo "[ERROR] Possible LLC replacement policy from replacement/*.llc_repl"
62 | find replacement -name "*.llc_repl"
63 | exit 1
64 | fi
65 |
66 | # Check num_core
67 | re='^[0-9]+$'
68 | if ! [[ $NUM_CORE =~ $re ]] ; then
69 | echo "[ERROR]: num_core is NOT a number" >&2;
70 | exit 1
71 | fi
72 |
73 | # Check for multi-core
74 | if [ "$NUM_CORE" -gt "1" ]; then
75 | echo "Building multi-core ChampSim..."
76 | sed -i.bak 's/\/NUM_CPUS '${NUM_CORE}'/g' inc/champsim.h
77 | # sed -i.bak 's/\/DRAM_CHANNELS 2/g' inc/champsim.h
78 | # sed -i.bak 's/\/DRAM_CHANNELS_LOG2 1/g' inc/champsim.h
79 | else
80 | if [ "$NUM_CORE" -lt "1" ]; then
81 | echo "Number of core: $NUM_CORE must be greater or equal than 1"
82 | exit 1
83 | else
84 | echo "Building single-core ChampSim..."
85 | fi
86 | fi
87 | echo
88 |
89 | # Change prefetchers and replacement policy
90 | cp branch/${BRANCH}.bpred branch/branch_predictor.cc
91 | cp prefetcher/${L1I_PREFETCHER}.l1i_pref prefetcher/l1i_prefetcher.cc
92 | cp prefetcher/${L1D_PREFETCHER}.l1d_pref prefetcher/l1d_prefetcher.cc
93 | cp prefetcher/${L2C_PREFETCHER}.l2c_pref prefetcher/l2c_prefetcher.cc
94 | cp prefetcher/${LLC_PREFETCHER}.llc_pref prefetcher/llc_prefetcher.cc
95 | cp replacement/${LLC_REPLACEMENT}.llc_repl replacement/llc_replacement.cc
96 |
97 | # Build
98 | mkdir -p bin
99 | rm -f bin/champsim
100 | make clean
101 | make
102 |
103 | # Sanity check
104 | echo ""
105 | if [ ! -f bin/champsim ]; then
106 | echo "${BOLD}ChampSim build FAILED!"
107 | echo ""
108 | exit 1
109 | fi
110 |
111 | echo "${BOLD}ChampSim is successfully built"
112 | echo "Branch Predictor: ${BRANCH}"
113 | echo "L1I Prefetcher: ${L1I_PREFETCHER}"
114 | echo "L1D Prefetcher: ${L1D_PREFETCHER}"
115 | echo "L2C Prefetcher: ${L2C_PREFETCHER}"
116 | echo "LLC Prefetcher: ${LLC_PREFETCHER}"
117 | echo "LLC Replacement: ${LLC_REPLACEMENT}"
118 | echo "Cores: ${NUM_CORE}"
119 | BINARY_NAME="${BRANCH}-${L1I_PREFETCHER}-${L1D_PREFETCHER}-${L2C_PREFETCHER}-${LLC_PREFETCHER}-${LLC_REPLACEMENT}-${NUM_CORE}core"
120 | echo "Binary: bin/${BINARY_NAME}"
121 | echo ""
122 | mv bin/champsim bin/${BINARY_NAME}
123 |
124 |
125 | # Restore to the default configuration
126 | sed -i.bak 's/\/NUM_CPUS 1/g' inc/champsim.h
127 | #sed -i.bak 's/\/DRAM_CHANNELS 1/g' inc/champsim.h
128 | #sed -i.bak 's/\/DRAM_CHANNELS_LOG2 0/g' inc/champsim.h
129 |
130 | cp branch/bimodal.bpred branch/branch_predictor.cc
131 | cp prefetcher/no.l1i_pref prefetcher/l1i_prefetcher.cc
132 | cp prefetcher/no.l1d_pref prefetcher/l1d_prefetcher.cc
133 | cp prefetcher/no.l2c_pref prefetcher/l2c_prefetcher.cc
134 | cp prefetcher/no.llc_pref prefetcher/llc_prefetcher.cc
135 | cp replacement/lru.llc_repl replacement/llc_replacement.cc
136 |
--------------------------------------------------------------------------------
/cvp_tracer/README.md:
--------------------------------------------------------------------------------
1 | The cvp2champsim tracer comes as is with no guarantee that it covers every conversion case.
2 |
3 | The tracer is used to convert the traces from the 2nd Championship Value
4 | Prediction (CVP) to a ChampSim-friendly format.
5 |
6 | CVP-1 Site: https://www.microarch.org/cvp1/
7 | CVP-2 Site: https://www.microarch.org/cvp1/cvp2/rules.html
8 |
9 | To use the tracer first compile it using g++:
10 |
11 | g++ cvp2champsim.cc -o cvp_tracer
12 |
13 | To convert a trace execute:
14 |
15 | ./cvp_tracer TRACE_NAME.gz
16 |
17 | The ChampSim trace will be sent to standard output so to keep and compress the
18 | output trace run:
19 |
20 | ./cvp_tracer TRACE_NAME.gz | gzip > NEW_TRACE.champsim.gz
21 |
22 | Adding the "-v" flag will print the dissassembly of the CVP trace to standard
23 | error output as well as the ChampSim format to standard output.
24 |
--------------------------------------------------------------------------------
/download.sh:
--------------------------------------------------------------------------------
1 | while read -r line; do
2 | arr=($line)
3 | mkdir -p $(dirname ${arr[0]})
4 | done < download_links
5 |
6 | while read -r line; do
7 | arr=($line)
8 | echo Downloading ${arr[0]} from ${arr[1]}
9 | if ! [[ -e "${arr[0]}" ]]; then
10 | curl -L -o ${arr[0]} ${arr[1]}
11 | echo Downloading ${arr[0]} Done
12 | else
13 | echo ${arr[0]} File already exists
14 | fi
15 | done < download_links
16 |
--------------------------------------------------------------------------------
/get_stats.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import argparse
4 |
5 |
6 | def get_args():
7 | parser = argparse.ArgumentParser()
8 | parser.add_argument('results_file', help='Path to ChampSim results file')
9 | parser.add_argument('--cache-level', default='LLC', choices=('L2', 'LLC'), help='Cache level to compute stats for (default: %(default)s)')
10 | parser.add_argument('--base', default=None, help='Path to ChampSim base settings results file with no prefetcher for more accurate statistics')
11 |
12 | return parser.parse_args()
13 |
14 |
15 | def read_file(path, cache_level):
16 | if path is None:
17 | return None
18 |
19 | expected_keys = ('ipc', 'total_miss', 'useful', 'useless', 'load_miss', 'rfo_miss', 'kilo_inst')
20 | data = {}
21 | with open(path, 'r') as f:
22 | for line in f:
23 | if 'Finished CPU' in line:
24 | data['ipc'] = float(line.split()[9])
25 | data['kilo_inst'] = int(line.split()[4]) / 1000
26 | if cache_level not in line:
27 | continue
28 | line = line.strip()
29 | if 'LOAD' in line:
30 | data['load_miss'] = int(line.split()[-1])
31 | elif 'RFO' in line:
32 | data['rfo_miss'] = int(line.split()[-1])
33 | elif 'TOTAL' in line:
34 | data['total_miss'] = int(line.split()[-1])
35 | elif 'USEFUL' in line:
36 | data['useful'] = int(line.split()[-3])
37 | data['useless'] = int(line.split()[-1])
38 |
39 | if not all(key in data for key in expected_keys):
40 | return None
41 |
42 | return data
43 |
44 | def main(args=None):
45 | print(args)
46 | results = read_file(args.results_file, args.cache_level)
47 | useful, useless, ipc, load_miss, rfo_miss, kilo_inst = (
48 | results['useful'], results['useless'], results['ipc'], results['load_miss'], results['rfo_miss'], results['kilo_inst']
49 | )
50 | results_total_miss = load_miss + rfo_miss + useful
51 | total_miss = results_total_miss
52 |
53 | results_mpki = (load_miss + rfo_miss) / kilo_inst
54 |
55 | base = read_file(args.base, args.cache_level)
56 | if base is not None:
57 | base_total_miss, base_ipc = base['total_miss'], base['ipc']
58 | base_mpki = base_total_miss / kilo_inst
59 |
60 | if useful + useless == 0:
61 | print('Accuracy: N/A [All prefetches were merged and were not useful or useless]')
62 | else:
63 | print('Accuracy:', useful / (useful + useless) * 100, '%')
64 | if total_miss == 0:
65 | print('Coverage: N/A [No misses. Did you run this simulation for long enough?]')
66 | else:
67 | print('Coverage:', useful / total_miss * 100, '%')
68 | print('MPKI:', results_mpki)
69 | if base is not None:
70 | print('MPKI Improvement:', (base_mpki - results_mpki) / base_mpki * 100, '%')
71 | print('IPC:', ipc)
72 | if base is not None:
73 | print('IPC Improvement:', (ipc - base_ipc) / base_ipc * 100, '%')
74 |
75 | if __name__ == '__main__':
76 | main(args=get_args())
77 |
--------------------------------------------------------------------------------
/inc/block.h:
--------------------------------------------------------------------------------
1 | #ifndef BLOCK_H
2 | #define BLOCK_H
3 |
4 | #include "champsim.h"
5 | #include "instruction.h"
6 | #include "set.h"
7 |
8 | // CACHE BLOCK
9 | class BLOCK {
10 | public:
11 | uint8_t valid,
12 | prefetch,
13 | dirty,
14 | used;
15 |
16 | int delta,
17 | depth,
18 | signature,
19 | confidence;
20 |
21 | uint64_t address,
22 | full_addr,
23 | tag,
24 | data,
25 | ip,
26 | cpu,
27 | instr_id;
28 |
29 | // replacement state
30 | uint32_t lru;
31 |
32 | BLOCK() {
33 | valid = 0;
34 | prefetch = 0;
35 | dirty = 0;
36 | used = 0;
37 |
38 | delta = 0;
39 | depth = 0;
40 | signature = 0;
41 | confidence = 0;
42 |
43 | address = 0;
44 | full_addr = 0;
45 | tag = 0;
46 | data = 0;
47 | cpu = 0;
48 | instr_id = 0;
49 |
50 | lru = 0;
51 | };
52 | };
53 |
54 | // DRAM CACHE BLOCK
55 | class DRAM_ARRAY {
56 | public:
57 | BLOCK **block;
58 |
59 | DRAM_ARRAY() {
60 | block = NULL;
61 | };
62 | };
63 |
64 | // message packet
65 | class PACKET {
66 | public:
67 | uint8_t instruction,
68 | is_data,
69 | fill_l1i,
70 | fill_l1d,
71 | tlb_access,
72 | scheduled,
73 | translated,
74 | fetched,
75 | prefetched,
76 | drc_tag_read;
77 |
78 | int fill_level,
79 | pf_origin_level,
80 | rob_signal,
81 | rob_index,
82 | producer,
83 | delta,
84 | depth,
85 | signature,
86 | confidence;
87 |
88 | uint32_t pf_metadata;
89 |
90 | uint8_t is_producer,
91 | //rob_index_depend_on_me[ROB_SIZE],
92 | //lq_index_depend_on_me[ROB_SIZE],
93 | //sq_index_depend_on_me[ROB_SIZE],
94 | instr_merged,
95 | load_merged,
96 | store_merged,
97 | returned,
98 | asid[2],
99 | type;
100 |
101 | fastset
102 | rob_index_depend_on_me,
103 | lq_index_depend_on_me,
104 | sq_index_depend_on_me;
105 |
106 | uint32_t cpu, data_index, lq_index, sq_index;
107 |
108 | uint64_t address,
109 | full_addr,
110 | instruction_pa,
111 | data_pa,
112 | data,
113 | instr_id,
114 | ip,
115 | event_cycle,
116 | cycle_enqueued;
117 |
118 | PACKET() {
119 | instruction = 0;
120 | is_data = 1;
121 | fill_l1i = 0;
122 | fill_l1d = 0;
123 | tlb_access = 0;
124 | scheduled = 0;
125 | translated = 0;
126 | fetched = 0;
127 | prefetched = 0;
128 | drc_tag_read = 0;
129 |
130 | returned = 0;
131 | asid[0] = UINT8_MAX;
132 | asid[1] = UINT8_MAX;
133 | type = 0;
134 |
135 | fill_level = -1;
136 | rob_signal = -1;
137 | rob_index = -1;
138 | producer = -1;
139 | delta = 0;
140 | depth = 0;
141 | signature = 0;
142 | confidence = 0;
143 |
144 | #if 0
145 | for (uint32_t i=0; i
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include
14 |
15 | #include
16 | #include
17 | #include