├── python-codes ├── example.py ├── bench_name.py ├── bench_graph_name.py ├── first_line_perf_rfile.py ├── first_line_toplev.py ├── mod_pc_dist_list.py ├── cal-avg-inner-iters.py ├── cal-avg-dist-outerloop.py ├── cal-avg-inner-iter-time.py ├── test-plot.py ├── sort-data.py ├── filter_samples.py ├── temp.py ├── find_dest_in_branches.py ├── find_src_in_branches.py ├── dist-between-2-occur-outerloop.py ├── read-func.py ├── toplev_rfile.py ├── llc_missed_pcs_rfile.py ├── inner-iters.py ├── inner-avg-iter-time.py ├── first_filter_samples.py ├── calculate-dist-crono.py ├── calculate-dist.py ├── perf_rfile_baseline.py ├── find-peaks.py ├── perf_rfile_pref.py └── plot-scatter.py ├── SWPrefetchingLLVMPass.so ├── SWPrefetchingLLVMPass ├── CMakeLists.txt └── SWPrefetchingLLVMPass.cpp ├── README.md └── scripts ├── capture_PCs_real.sh ├── capture_PCs_syn.sh └── run-CRONO-benchmarks.sh /python-codes/example.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /SWPrefetchingLLVMPass.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SabaJamilan/Profile-Guided-Software-Prefetching/HEAD/SWPrefetchingLLVMPass.so -------------------------------------------------------------------------------- /python-codes/bench_name.py: -------------------------------------------------------------------------------- 1 | import sys 2 | output_file = open(sys.argv[1], "a") 3 | app_name = sys.argv[2] 4 | output_file.write(app_name+"/") 5 | 6 | -------------------------------------------------------------------------------- /python-codes/bench_graph_name.py: -------------------------------------------------------------------------------- 1 | import sys 2 | output_file = open(sys.argv[1], "a") 3 | app_name = sys.argv[2] 4 | graph_name = sys.argv[3] 5 | output_file.write(app_name+"/"+graph_name+"/") 6 | 7 | -------------------------------------------------------------------------------- /SWPrefetchingLLVMPass/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | add_llvm_library(SWPrefetchingLLVMPass MODULE 3 | SWPrefetchingLLVMPass.cpp 4 | PLUGIN_TOOL 5 | opt 6 | ) 7 | add_dependencies(SWPrefetchingLLVMPass intrinsics_gen) 8 | -------------------------------------------------------------------------------- /python-codes/first_line_perf_rfile.py: -------------------------------------------------------------------------------- 1 | import sys 2 | output_file = open(sys.argv[1], "a") 3 | output_file.write("config/optional:(dist)/input-graph/exe_time/IPC/instructions/LLC-load-misses/SW_PREFETCH_ACCESS.T0/LOAD_HIT_PRE.SW_PF\n") 4 | 5 | -------------------------------------------------------------------------------- /python-codes/first_line_toplev.py: -------------------------------------------------------------------------------- 1 | import sys 2 | output_file = open(sys.argv[1], "a") 3 | output_file.write("benchmark/input-graph/Backend_Bound/Backend_Bound.Memory_Bound/Backend_Bound.Memory_Bound.L3_Bound/Backend_Bound.Memory_Bound.DRAM_Bound\n") 4 | 5 | -------------------------------------------------------------------------------- /python-codes/mod_pc_dist_list.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | # opening the file in read mode 4 | file = open(sys.argv[1], "r") 5 | fout = open(sys.argv[2], "w") 6 | replacement = "" 7 | dist= sys.argv[3] 8 | 9 | 10 | # using the for loop 11 | for line in file: 12 | line = line.strip() 13 | changes = ","+str(dist)+",nta" 14 | replacement = line + changes + "\n" 15 | fout.write(replacement) 16 | 17 | file.close() 18 | fout.close() 19 | -------------------------------------------------------------------------------- /python-codes/cal-avg-inner-iters.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | import seaborn as sns 4 | from collections import Counter 5 | import sys 6 | import glob 7 | import re 8 | import matplotlib.cm as cm 9 | import numpy as np 10 | import pprint 11 | import collections 12 | 13 | src=sys.argv[2] 14 | dst=sys.argv[3] 15 | pc=sys.argv[4] 16 | lines=[] 17 | 18 | with open(sys.argv[1]) as file_in: 19 | for line in file_in: 20 | if line !="\n": 21 | lines.append(line) 22 | 23 | sum=0 24 | total_avg=0 25 | 26 | output_file= str(src)+"-"+str(dst)+ "-avg-inner-iters-PC-"+str(pc)+".txt" 27 | with open (output_file, 'a') as out: 28 | for i in range(0, len(lines)-1): 29 | sum = sum + float(lines[i]) 30 | if len(lines) !=0: 31 | total_avg = sum/len(lines) 32 | out.write(str(total_avg)+ "\n") 33 | 34 | -------------------------------------------------------------------------------- /python-codes/cal-avg-dist-outerloop.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | import seaborn as sns 4 | from collections import Counter 5 | import sys 6 | import glob 7 | import re 8 | import matplotlib.cm as cm 9 | import numpy as np 10 | import pprint 11 | import collections 12 | 13 | src=sys.argv[2] 14 | dst=sys.argv[3] 15 | pc=sys.argv[4] 16 | lines=[] 17 | 18 | with open(sys.argv[1]) as file_in: 19 | for line in file_in: 20 | if line !="\n": 21 | lines.append(line) 22 | 23 | sum=0 24 | total_avg=0 25 | 26 | output_file= str(src)+"-"+str(dst)+ "-avg-dist-outerloop-PC-"+str(pc)+".txt" 27 | with open (output_file, 'a') as out: 28 | for i in range(0, len(lines)-1): 29 | sum = sum + float(lines[i]) 30 | if len(lines) !=0: 31 | total_avg = sum/len(lines) 32 | out.write(str(total_avg)+ "\n") 33 | 34 | -------------------------------------------------------------------------------- /python-codes/cal-avg-inner-iter-time.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | import seaborn as sns 4 | from collections import Counter 5 | import sys 6 | import glob 7 | import re 8 | import matplotlib.cm as cm 9 | import numpy as np 10 | import pprint 11 | import collections 12 | 13 | src=sys.argv[2] 14 | dst=sys.argv[3] 15 | pc=sys.argv[4] 16 | lines=[] 17 | 18 | with open(sys.argv[1]) as file_in: 19 | for line in file_in: 20 | if line !="\n": 21 | lines.append(line) 22 | 23 | sum=0 24 | total_avg=0 25 | 26 | output_file= str(src)+"-"+str(dst)+ "-avg-avg-inner-iter-time-PC-"+str(pc)+".txt" 27 | with open (output_file, 'a') as out: 28 | for i in range(0, len(lines)-1): 29 | sum = sum + float(lines[i]) 30 | if len(lines) !=0: 31 | total_avg = sum/len(lines) 32 | out.write(str(total_avg)+ "\n") 33 | 34 | -------------------------------------------------------------------------------- /python-codes/test-plot.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from matplotlib import pyplot as plt 4 | from scipy.signal import find_peaks_cwt 5 | from collections import Counter 6 | import peakutils 7 | from scipy.signal import find_peaks 8 | import sys 9 | 10 | data = np.loadtxt(sys.argv[1], dtype=int) 11 | #data = pd.read_csv("0x4015ad-0x401560-cycles-PC-401599-new.csv") 12 | com_dist_value=[] 13 | com_dist_freq=[] 14 | distances=Counter(data) 15 | for key,value in distances.most_common(100000000): 16 | if (value > 0): 17 | com_dist_value.append(key) 18 | com_dist_freq.append(value) 19 | 20 | 21 | #print(len(com_dist_value)) 22 | 23 | output_file=str(sys.argv[2])+"-test-plot.csv" 24 | with open(output_file, 'wt') as out: 25 | # out.write('x,y\n') 26 | for i in range(0, len(com_dist_value)): 27 | out.write(str(com_dist_value[i])+","+str(com_dist_freq[i])+"\n") 28 | 29 | 30 | -------------------------------------------------------------------------------- /python-codes/sort-data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from matplotlib import pyplot as plt 4 | from scipy.signal import find_peaks_cwt 5 | from collections import Counter 6 | import peakutils 7 | from scipy.signal import find_peaks 8 | import matplotlib.pyplot as plt 9 | 10 | #import plotly.plotly as py 11 | import plotly.graph_objs as go 12 | #from plotly.tools import FigureFactory as FF 13 | 14 | import numpy as np 15 | import pandas as pd 16 | import scipy 17 | import peakutils 18 | import sys 19 | 20 | 21 | estimated_data = pd.read_csv(sys.argv[1], header=None) 22 | 23 | col1 = estimated_data[:][0] # First column data 24 | col2 = estimated_data[:][1] # Second column data 25 | 26 | #print("col1: ",col1) 27 | #print("col2: ", col2) 28 | 29 | 30 | output_file=str(sys.argv[2])+"-sorted-data.csv" 31 | with open(output_file, 'wt') as out: 32 | # out.write('x,y\n') 33 | for i in range(0, len(col1)): 34 | for x in range(0, len(col1)): 35 | if int(col1[x])==i: 36 | out.write(str(i)+","+str(col2[x])+"\n") 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /python-codes/filter_samples.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | import seaborn as sns 4 | from collections import Counter 5 | import sys 6 | import glob 7 | import re 8 | import matplotlib.cm as cm 9 | import numpy as np 10 | import pprint 11 | import collections 12 | 13 | 14 | PC = sys.argv[2] 15 | if len(PC) <8: 16 | PC_str ='0000000000'+str(PC) 17 | else: 18 | PC_str =str(PC) 19 | lines=[] 20 | 21 | #print("PC in code: ",PC_str ) 22 | output_file= "filter-"+str(PC)+".txt" 23 | count =0 24 | prev_count =0 25 | prev='' 26 | 27 | with open(sys.argv[1]) as file_in: 28 | with open (output_file, 'wt') as out: 29 | for line in file_in: 30 | for branch_record in line.split(): 31 | if '/' in branch_record: 32 | prev=line 33 | prev_count=count 34 | #out.write(str(line)) 35 | #lines.append(line) 36 | count=count+1 37 | 38 | if PC_str in branch_record: 39 | if count == (prev_count+1): 40 | #print(prev.split()[0][0]) 41 | if prev.split()[0][0]!='f': 42 | if int(prev.split()[0][0])==4: 43 | out.write(prev) 44 | lines.append(line) 45 | count=count+1 46 | 47 | -------------------------------------------------------------------------------- /python-codes/temp.py: -------------------------------------------------------------------------------- 1 | #python3 max-branch-freq.py dump-brstack-bfs.txt 0x401406 0x4013a0 0x4013ef 0x4013d0 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | import seaborn as sns 5 | from collections import Counter 6 | import sys 7 | import glob 8 | import re 9 | import matplotlib.cm as cm 10 | import numpy as np 11 | import pprint 12 | 13 | 14 | dict = {} 15 | seen = False 16 | 17 | outer_loop_counter = 0 18 | cycle_sum = 0 19 | PC=sys.argv[4] 20 | 21 | with open(sys.argv[1]) as file_in: 22 | 23 | src = sys.argv[2] 24 | dst = sys.argv[3] 25 | 26 | 27 | output_file= str(src) + "-" + str(dst) + "-cycles-PC-"+str(PC)+"-new.txt" 28 | with open (output_file, 'wt') as out: 29 | for line in file_in: 30 | for branch_record in line.split(): 31 | if '/' not in branch_record: 32 | continue 33 | branch_rec_parts = branch_record.split('/') 34 | cur_src = branch_rec_parts[0] 35 | cur_dst = branch_rec_parts[1] 36 | cycle = int(branch_rec_parts[-1]) 37 | 38 | if cur_src == src and cur_dst == dst: 39 | if seen == False: 40 | seen = True 41 | cycle_sum = cycle_sum + cycle 42 | else: 43 | #if(cycle_sum>2 ): 44 | out.write(str(cycle_sum) + "\n") 45 | cycle_sum = cycle 46 | 47 | elif seen == True: 48 | cycle_sum = cycle_sum + cycle 49 | 50 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Profile-Guided-Software-Prefetching 2 | ## Description 3 | APT-GET is a novel profile-guided technique that ensures prefetch timeliness by leveraging dynamic execution time information. In this page, we provide the profile guided software prefetching LLVM pass code for indirect memory access patterns that is designed for APT-GET. You can also find scripts and python codes that you need to run the experiments. 4 | ## Instructions 5 | How to run APT-GET (as an example for CRONO benchmarks) 6 | 1) First you need to clone the APT-GET git-repository. 7 | 2) You should create a "results" folder besides the other folders. 8 | 3) You need to set the required PATHs in "run-CRONO-benchmarks.sh". 9 | 4) You can get CRONO benchmark suite fro "https://github.com/masabahmad/CRONO" and set the path to its "app" folder. 10 | 5) You can get the input graphs from SNAP website: "http://snap.stanford.edu/data/web-Google.html". (You can get the other benchmarks in the paper from "https://github.com/SamAinsworth/reproduce-cgo2017-paper/tree/master/program".) 11 | 7) run "./scripts/run-CRONO-benchmarks.sh". 12 | ## APT-GET Paper link 13 | Here is the link to the paper: https://dl.acm.org/doi/abs/10.1145/3492321.3519583 14 | ## Please cite APT-GET Paper if you use our proposed technique: 15 | ``` 16 | @inproceedings{jamilan2022apt, 17 | title={APT-GET: profile-guided timely software prefetching}, 18 | author={Jamilan, Saba and Khan, Tanvir Ahmed and Ayers, Grant and Kasikci, Baris and Litz, Heiner}, 19 | booktitle={Proceedings of the Seventeenth European Conference on Computer Systems}, 20 | pages={747--764}, 21 | year={2022} 22 | } 23 | ``` 24 | -------------------------------------------------------------------------------- /python-codes/find_dest_in_branches.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | import seaborn as sns 4 | from collections import Counter 5 | import sys 6 | import glob 7 | import re 8 | import matplotlib.cm as cm 9 | import numpy as np 10 | import pprint 11 | import collections 12 | 13 | pc=sys.argv[3] 14 | PC_src = sys.argv[2] 15 | #PC_str ='0000000000'+str(PC_src) 16 | if len(PC_src) <8: 17 | PC_str ='0000000000'+str(PC_src) 18 | else: 19 | PC_str =str(PC_src) 20 | #print("PC_str: ", PC_str) 21 | 22 | lines=[] 23 | 24 | with open(sys.argv[1]) as file_in: 25 | for line in file_in: 26 | if line !="\n": 27 | if "insn:" in line.split(): 28 | lines.append(line) 29 | 30 | branch_in_rec=[] 31 | output_file= "in-branches-dest-PC-"+str(pc)+".txt" 32 | with open (output_file, 'a') as out2: 33 | for i in range(0, len(lines)-1): 34 | for branch_record in lines[i].split(): 35 | if i+1 < len(lines): 36 | if PC_str in branch_record: 37 | if len(lines[i+1]) >0: 38 | branch_in_rec.append(lines[i+1].split()[0]) 39 | 40 | counter=collections.Counter(branch_in_rec) 41 | index=0 42 | #for key, value in counter.items(): 43 | for key, value in counter.most_common(1000): 44 | if index==0: 45 | if key[-7]=='0' and key[-8]=='0' : 46 | #print("key -7: ", key) 47 | out2.write(str(key[-6:])+ "\n") 48 | else: 49 | out2.write(str(key[-12:])+ "\n") 50 | #print("inner branch dest: ", key, " freq: ", value) 51 | index= index+1 52 | 53 | -------------------------------------------------------------------------------- /python-codes/find_src_in_branches.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | import seaborn as sns 4 | from collections import Counter 5 | import sys 6 | import glob 7 | import re 8 | import matplotlib.cm as cm 9 | import numpy as np 10 | import pprint 11 | import collections 12 | 13 | 14 | PC = sys.argv[2] 15 | if len(PC) <8: 16 | PC_str ='0000000000'+str(PC) 17 | else: 18 | PC_str =str(PC) 19 | lines=[] 20 | 21 | #print(" PC in code: ",PC_str ) 22 | 23 | with open(sys.argv[1]) as file_in: 24 | output_file= "temp-"+str(PC)+".txt" 25 | with open (output_file, 'wt') as out: 26 | for line in file_in: 27 | for branch_record in line.split(): 28 | if '#' in branch_record: 29 | out.write(str(line) + "\n") 30 | lines.append(line) 31 | if PC_str in branch_record: 32 | out.write(str(line) + "\n") 33 | lines.append(line) 34 | 35 | branch_in_rec=[] 36 | branch_in_frequency=[] 37 | 38 | output_file= "in-branches-src-PC-"+str(PC)+".txt" 39 | #with open (output_file, 'wt') as out2: 40 | with open (output_file, 'a') as out2: 41 | for i in range(0, len(lines)): 42 | for branch_record in lines[i].split(): 43 | if PC_str in branch_record: 44 | branch_in_rec.append(lines[i-1].split()[0]) 45 | 46 | counter=collections.Counter(branch_in_rec) 47 | #print("counter: ",counter) 48 | index=0 49 | #for key, value in counter.items(): 50 | for key, value in counter.most_common(1000): 51 | if index==0 or index==1: 52 | if key[-7]=='0' and key[-8]=='0' : 53 | out2.write(str(key[-6:])+ "\n") 54 | else: 55 | out2.write(str(key[-12:])+ "\n") 56 | 57 | #print("inner branch src: ", key, " freq: ", value) 58 | index= index+1 59 | 60 | -------------------------------------------------------------------------------- /python-codes/dist-between-2-occur-outerloop.py: -------------------------------------------------------------------------------- 1 | #python3 max-branch-freq.py dump-brstack-bfs.txt 0x401406 0x4013a0 0x4013ef 0x4013d0 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | import seaborn as sns 5 | from collections import Counter 6 | import sys 7 | import glob 8 | import re 9 | import matplotlib.cm as cm 10 | import numpy as np 11 | import pprint 12 | 13 | 14 | dict = {} 15 | seen = False 16 | 17 | inner_freq = 0 18 | inner_cycle = 0 19 | 20 | seen_inner = False 21 | 22 | with open(sys.argv[1]) as file_in: 23 | 24 | # src = sys.argv[2] 25 | # dst = sys.argv[3] 26 | 27 | inner_src = sys.argv[2] 28 | inner_dst = sys.argv[3] 29 | PC = sys.argv[4] 30 | 31 | output_file= str(inner_src) + "-" + str(inner_dst) + "-dist-between-2-occur-outerloop-PC-"+str(PC)+".txt" 32 | 33 | with open(output_file, 'wt') as out: 34 | for line in file_in: 35 | for branch_record in line.split(): 36 | if '/' not in branch_record: 37 | continue 38 | branch_rec_parts = branch_record.split('/') 39 | cur_src = branch_rec_parts[0] 40 | cur_dst = branch_rec_parts[1] 41 | cycle = int(branch_rec_parts[-1]) 42 | 43 | if cur_src != inner_src and cur_dst != inner_src: 44 | if seen_inner == True: 45 | if inner_freq > 1: 46 | out.write(str(inner_cycle) + "\n") 47 | inner_freq = 0 48 | inner_cycle = 0 49 | seen_inner = False 50 | 51 | if cur_src == inner_src and cur_dst == inner_dst: 52 | seen_inner = True 53 | inner_freq = inner_freq + 1 54 | inner_cycle = inner_cycle + cycle 55 | # print("cycle: ", cycle) 56 | 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /python-codes/read-func.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import re 3 | 4 | pc=0 5 | percent=[] 6 | pc_list=[] 7 | max=0.00 8 | pos_in_line=0 9 | 10 | funcNum =0 11 | percentSum=0 12 | funcList=[] 13 | percentList=[] 14 | bench=sys.argv[4] 15 | 16 | 17 | with open(sys.argv[1]) as file_in: 18 | lines = [] 19 | for line in file_in: 20 | if(funcNum<20): 21 | if line.split()[0]=="#": 22 | continue 23 | elif line.split()[-3]==bench and line.split()[-4]==bench: 24 | print(" ", line) 25 | funcList.append(line.split()[-1]) 26 | #percentList.append(line.split()[0]) 27 | #print(line.split()[0][:2]) 28 | if line.split()[0][1]!= "." : 29 | percentSum= percentSum + int(line.split()[0][:2]) 30 | percentList.append(line.split()[0][:2]) 31 | else: 32 | percentSum= percentSum + int(line.split()[0][:1]) 33 | percentList.append(line.split()[0][:1]) 34 | #print("sum:",percentSum) 35 | funcNum=funcNum+1 36 | 37 | 38 | output_file_func_percent_list = open(sys.argv[2], "a") 39 | output_file_func_list = open(sys.argv[3], "a") 40 | 41 | sum =0 42 | 43 | 44 | if percentList!=[]: 45 | if(int(percentList[0])<10): 46 | output_file_func_percent_list.write("percent: "+ str(percentList[0])+ " func: "+ str(funcList[0])+"\n") 47 | output_file_func_list.write(str(funcList[0])+"\n") 48 | else: 49 | for x in range(0,len(funcList)): 50 | #print("funcList[x]: ", funcList[x]) 51 | #while(sum< 60): 52 | #while(sum< 90): 53 | if sum< 60: 54 | output_file_func_percent_list.write("percent: "+ str(percentList[x])+ " func: "+ str(funcList[x])+"\n") 55 | output_file_func_list.write(str(funcList[x])+"\n") 56 | sum = sum + int(percentList[x]) 57 | # print("func name: ", funcList[x], " percent: ", percentList[x],"\n") 58 | 59 | 60 | -------------------------------------------------------------------------------- /python-codes/toplev_rfile.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import re 3 | 4 | BackEnd_location =0 5 | BackEnd_MemEnd_location =0 6 | DRAM_location =0 7 | L3_location =0 8 | 9 | BackEnd_value=0 10 | BackEnd_MemEnd_value=0 11 | DRAM_value=0 12 | L3_value=0 13 | 14 | 15 | 16 | with open(sys.argv[1]) as file_in: 17 | lines = [] 18 | for line in file_in: 19 | lines.append(line.strip()) 20 | if 'Backend_Bound' and "BE" in line.strip(): 21 | for value in line.split(): 22 | if(BackEnd_location ==5): 23 | BackEnd_value=value 24 | BackEnd_location+=1 25 | if 'Backend_Bound.Memory_Bound' in line.strip(): 26 | if "S0-C0-" not in line.strip(): 27 | for value in line.split(): 28 | if(BackEnd_MemEnd_location ==5): 29 | BackEnd_MemEnd_value=value 30 | BackEnd_MemEnd_location+=1 31 | if 'Backend_Bound.Memory_Bound.L3_Bound' in line.strip(): 32 | if 'S0-C0-T0' in line.strip(): 33 | for value in line.split(): 34 | if(L3_location ==5): 35 | L3_value=value 36 | L3_location+=1 37 | if 'Backend_Bound.Memory_Bound.DRAM_Bound'in line.strip(): 38 | if 'S0-C0-T0' in line.strip(): 39 | for value in line.split(): 40 | if(DRAM_location ==5): 41 | DRAM_value=value 42 | DRAM_location+=1 43 | 44 | 45 | 46 | output_file = open(sys.argv[2], "a") 47 | output_file.write(str(BackEnd_value)+"/"+str(BackEnd_MemEnd_value)+"/"+str(L3_value)+"/"+str(DRAM_value)+"\n") 48 | 49 | 50 | #output_file.write("baseline/"+str(graph_value)+"/"+str(exe_value)+"/"+str(IPC_value)+"/"+str(inst_value)+"/"+str(llc_value)+"/"+str(swPref_value)+"/"+str(LoadHints_value)+"\n") 51 | 52 | 53 | 54 | 55 | 56 | 57 | #print("======================") 58 | 59 | #for x in range(0,len(lines)): 60 | #print(lines[x]) 61 | -------------------------------------------------------------------------------- /python-codes/llc_missed_pcs_rfile.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import re 3 | 4 | pc=0 5 | percent=[] 6 | pc_list=[] 7 | max=0.00 8 | pos_in_line=0 9 | 10 | with open(sys.argv[1]) as file_in: 11 | lines = [] 12 | for line in file_in: 13 | lines.append(line.strip()) 14 | if '40' in line: 15 | #if '4' in line.split()[2]: 16 | if '.' in line: 17 | #print(line) 18 | #print(re.sub(r"\s+", "", line) 19 | #for value in line.split(): 20 | #print(line.split()[0]) 21 | if(int(float(line.split()[0]))> 0): 22 | percent_value =int(float(line.split()[0])) 23 | percent.append(percent_value) 24 | pc_value=line.split()[2] 25 | pc_list.append(pc_value[:-1]) 26 | if(max < percent_value): 27 | max = percent_value 28 | pc = line.split()[2][:-1] 29 | # print("max: ",max) 30 | # print("pc: ",pc) 31 | 32 | 33 | sum_percent =0 34 | 35 | output_file_pc_list = open(sys.argv[2], "a") 36 | output_file_most_missed_pc = open(sys.argv[3], "a") 37 | 38 | sorted_percent= sorted(percent,reverse=True) 39 | for x in range(0,len(sorted_percent)): 40 | #print("sorted_percent: ", sorted_percent[x]) 41 | for y in range(0,len(percent)): 42 | 43 | #if(percent[y]==sorted_percent[x] and sum_percent < 40): 44 | if(percent[y]==sorted_percent[x] and sum_percent < 70): 45 | print(" PC: ", pc_list[y], " percent: ", percent[y]) 46 | sum_percent = sum_percent + percent[y] 47 | output_file_pc_list.write("percent: "+ str(percent[y])+ " PC: "+ str(pc_list[y])+"\n") 48 | output_file_most_missed_pc.write(str(pc_list[y])+"\n") 49 | 50 | 51 | #for x in range(0,len(pc_list)): 52 | # print("percent: ", percent[x]) 53 | # output_file_pc_list.write("percent: "+ str(percent[x])+ " pc: "+ str(pc_list[x])+"\n") 54 | 55 | #output_file_most_missed_pc.write(pc) 56 | -------------------------------------------------------------------------------- /python-codes/inner-iters.py: -------------------------------------------------------------------------------- 1 | #python3 max-branch-freq.py dump-brstack-bfs.txt 0x401406 0x4013a0 0x4013ef 0x4013d0 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | import seaborn as sns 5 | from collections import Counter 6 | import sys 7 | import glob 8 | import re 9 | import matplotlib.cm as cm 10 | import numpy as np 11 | import pprint 12 | 13 | 14 | dict = {} 15 | seen = False 16 | 17 | inner_freq = 0 18 | inner_cycle = 0 19 | 20 | seen_inner = False 21 | 22 | with open(sys.argv[1]) as file_in: 23 | 24 | # src = sys.argv[2] 25 | # dst = sys.argv[3] 26 | 27 | inner_src = sys.argv[2] 28 | inner_dst = sys.argv[3] 29 | PC=sys.argv[4] 30 | output_file= str(inner_src) + "-" + str(inner_dst) + "-innet-iters-PC-"+str(PC)+".txt" 31 | 32 | with open(output_file, 'wt') as out: 33 | for line in file_in: 34 | for branch_record in line.split(): 35 | if '/' not in branch_record: 36 | continue 37 | branch_rec_parts = branch_record.split('/') 38 | cur_src = branch_rec_parts[0] 39 | cur_dst = branch_rec_parts[1] 40 | cycle = int(branch_rec_parts[-1]) 41 | 42 | # if cur_src == src and cur_dst == dst: 43 | if cur_src != inner_src and cur_dst != inner_src: 44 | if seen == False: 45 | seen = True 46 | else: 47 | if seen_inner == True: 48 | #out.write(str(inner_avg/inner_freq) + "\n") 49 | out.write(str(inner_freq) + "\n") 50 | 51 | inner_freq = 0 52 | inner_cycle = 0 53 | seen_inner = False 54 | 55 | #elif seen == True: 56 | if cur_src == inner_src and cur_dst == inner_dst: 57 | seen_inner = True 58 | inner_freq = inner_freq + 1 59 | inner_cycle = inner_cycle + cycle 60 | 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /python-codes/inner-avg-iter-time.py: -------------------------------------------------------------------------------- 1 | #python3 max-branch-freq.py dump-brstack-bfs.txt 0x401406 0x4013a0 0x4013ef 0x4013d0 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | import seaborn as sns 5 | from collections import Counter 6 | import sys 7 | import glob 8 | import re 9 | import matplotlib.cm as cm 10 | import numpy as np 11 | import pprint 12 | 13 | 14 | dict = {} 15 | seen = False 16 | 17 | inner_freq = 0 18 | inner_cycle = 0 19 | 20 | seen_inner = False 21 | 22 | with open(sys.argv[1]) as file_in: 23 | 24 | # src = sys.argv[2] 25 | # dst = sys.argv[3] 26 | 27 | inner_src = sys.argv[2] 28 | inner_dst = sys.argv[3] 29 | PC = sys.argv[4] 30 | 31 | output_file= str(inner_src) + "-" + str(inner_dst) + "-avg-inner-iter-time-PC-"+str(PC)+".txt" 32 | 33 | with open(output_file, 'wt') as out: 34 | for line in file_in: 35 | for branch_record in line.split(): 36 | if '/' not in branch_record: 37 | continue 38 | branch_rec_parts = branch_record.split('/') 39 | cur_src = branch_rec_parts[0] 40 | cur_dst = branch_rec_parts[1] 41 | cycle = int(branch_rec_parts[-1]) 42 | 43 | # if cur_src == src and cur_dst == dst: 44 | if cur_src != inner_src and cur_dst != inner_src: 45 | if seen == False: 46 | seen = True 47 | else: 48 | if seen_inner == True: 49 | out.write(str(inner_cycle/inner_freq) + "\n") 50 | #out.write(str(inner_freq) + "\n") 51 | 52 | inner_freq = 0 53 | inner_cycle = 0 54 | seen_inner = False 55 | 56 | #elif seen == True: 57 | if cur_src == inner_src and cur_dst == inner_dst: 58 | seen_inner = True 59 | inner_freq = inner_freq + 1 60 | inner_cycle = inner_cycle + cycle 61 | 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /python-codes/first_filter_samples.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | import seaborn as sns 4 | from collections import Counter 5 | import sys 6 | import glob 7 | import re 8 | import matplotlib.cm as cm 9 | import numpy as np 10 | import pprint 11 | import collections 12 | 13 | 14 | PC = sys.argv[2] 15 | if len(PC) <8: 16 | PC_str ='0000000000'+str(PC) 17 | else: 18 | PC_str =str(PC) 19 | 20 | 21 | lines=[] 22 | #print("PC in code: ",PC_str ) 23 | output_file= "first-filter-"+str(PC)+".txt" 24 | 25 | seen_1=False 26 | pc_seen =False 27 | 28 | count =0 29 | prev_count =0 30 | prev='' 31 | 32 | with open(sys.argv[1]) as file_in: 33 | with open (output_file, 'wt') as out: 34 | for line in file_in: 35 | #for branch_record in line.split(): 36 | #if '#' in branch_record and not pc_seen and not seen_1: 37 | #if '#' in line.split() and not pc_seen and not seen_1: 38 | if '#' in line.split() and not pc_seen and not seen_1: 39 | #print(" 1") 40 | lines.append(line) 41 | seen_1=True 42 | #if seen_1 and '#' not in branch_record: 43 | if seen_1 and '#' not in line.split() and PC_str not in line.split(): 44 | #print(" 2") 45 | lines.append(line) 46 | #if PC_str in branch_record and seen_1: 47 | if PC_str in line.split() and seen_1: 48 | #print(" 3") 49 | pc_seen=True 50 | lines.append(line) 51 | #if'#' in branch_record and pc_seen and seen_1: 52 | if'#' in line.split() and pc_seen and seen_1: 53 | #print(" 4") 54 | for i in lines: 55 | out.write(i) 56 | pc_seen=False 57 | #seen_1=False 58 | lines=[] 59 | lines.append(line) 60 | #if'#' in branch_record and not pc_seen and seen_1: 61 | if'#' in line.split() and not pc_seen and seen_1: 62 | # print("line: ", line) 63 | # print(" 5") 64 | # pc_seen=False 65 | # seen_1=False 66 | lines=[] 67 | lines.append(line) 68 | 69 | -------------------------------------------------------------------------------- /python-codes/calculate-dist-crono.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | import seaborn as sns 4 | from collections import Counter 5 | import sys 6 | import glob 7 | import re 8 | import matplotlib.cm as cm 9 | import numpy as np 10 | 11 | num_files=0 12 | names = ['dist'] 13 | #print("\n") 14 | PC = sys.argv[2] 15 | 16 | for fname in glob.glob(sys.argv[1]): 17 | #print("Database Name: "+fname+"\n") 18 | a=re.split("[-.]", fname)[-3] 19 | globals()[f"data_{a}"]= pd.read_csv(fname) 20 | globals()[f"data_{a}"].columns = names 21 | num_files+=1 22 | distances=Counter(globals()[f"data_{a}"]['dist']) 23 | globals()[f"com_dist_value_{a}"]=[] 24 | globals()[f"com_dist_freq_{a}"]=[] 25 | total=0 26 | for key,value in distances.most_common(100000000): 27 | if (value > 0): 28 | globals()[f"com_dist_value_{a}"].append(key) 29 | globals()[f"com_dist_freq_{a}"].append(value) 30 | total += value 31 | 32 | DataPoint1 =sys.argv[3] 33 | DataPoint2= sys.argv[4] 34 | # print("DataPoint 1 : ", DataPoint1) 35 | # print("DataPoint 2 : ", DataPoint2) 36 | total_freq_1=0 37 | total_sum_1=0 38 | total_avg_1=0 39 | 40 | for x in range(0, len( globals()[f"com_dist_value_{a}"])): 41 | if(globals()[f"com_dist_value_{a}"][x]< int(DataPoint1)): 42 | total_sum_1+=globals()[f"com_dist_value_{a}"][x]*globals()[f"com_dist_freq_{a}"][x] 43 | total_freq_1+=globals()[f"com_dist_freq_{a}"][x] 44 | total_avg_1= total_sum_1/total_freq_1 45 | #print("The average value before data point1: : ", total_avg_1) 46 | 47 | 48 | total_freq_2=0 49 | total_sum_2=0 50 | total_avg_2=0 51 | 52 | for x in range(0, len( globals()[f"com_dist_value_{a}"])): 53 | if(globals()[f"com_dist_value_{a}"][x]>= int(DataPoint2)): 54 | total_sum_2+=globals()[f"com_dist_value_{a}"][x]*globals()[f"com_dist_freq_{a}"][x] 55 | total_freq_2+=globals()[f"com_dist_freq_{a}"][x] 56 | total_avg_2= total_sum_2/total_freq_2 57 | #print("The average value after data point2: ", total_avg_2) 58 | 59 | a=0 60 | preftech_dist1=0 61 | preftech_dist2=0 62 | 63 | 64 | output_file=str(sys.argv[5])+"-ALL-dist1.csv" 65 | with open(output_file, 'a') as out: 66 | #print("---------------------") 67 | if total_avg_1 !=0: 68 | prefetch_dist1 = total_avg_2/total_avg_1 69 | # print(prefetch_dist1) 70 | out.write(str(PC)+","+str(round(prefetch_dist1))+",nta\n") 71 | 72 | 73 | 74 | 75 | 76 | #print("prefetch dist1 = ",prefetch_dist1 ) 77 | 78 | a=0 79 | output_file2=str(sys.argv[5])+"-ALL-dist2.csv" 80 | with open(output_file2, 'a') as out2: 81 | prefetch_dist2 = (int(DataPoint2)-int(DataPoint1))/int(DataPoint1) 82 | #print(round(preftech_dist2)) 83 | print(round(prefetch_dist2)) 84 | a=round(prefetch_dist2)*1000 85 | out2.write(str(PC)+","+str(a)+",nta\n") 86 | 87 | 88 | 89 | 90 | 91 | 92 | -------------------------------------------------------------------------------- /python-codes/calculate-dist.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | import seaborn as sns 4 | from collections import Counter 5 | import sys 6 | import glob 7 | import re 8 | import matplotlib.cm as cm 9 | import numpy as np 10 | 11 | num_files=0 12 | names = ['dist'] 13 | #print("\n") 14 | PC = sys.argv[2] 15 | 16 | for fname in glob.glob(sys.argv[1]): 17 | #print("Database Name: "+fname+"\n") 18 | a=re.split("[-.]", fname)[-3] 19 | globals()[f"data_{a}"]= pd.read_csv(fname) 20 | globals()[f"data_{a}"].columns = names 21 | num_files+=1 22 | distances=Counter(globals()[f"data_{a}"]['dist']) 23 | globals()[f"com_dist_value_{a}"]=[] 24 | globals()[f"com_dist_freq_{a}"]=[] 25 | total=0 26 | for key,value in distances.most_common(100000000): 27 | if (value > 0): 28 | globals()[f"com_dist_value_{a}"].append(key) 29 | globals()[f"com_dist_freq_{a}"].append(value) 30 | total += value 31 | 32 | DataPoint1 =sys.argv[3] 33 | DataPoint2= sys.argv[4] 34 | # print("DataPoint 1 : ", DataPoint1) 35 | # print("DataPoint 2 : ", DataPoint2) 36 | total_freq_1=0 37 | total_sum_1=0 38 | total_avg_1=0 39 | 40 | for x in range(0, len( globals()[f"com_dist_value_{a}"])): 41 | if(globals()[f"com_dist_value_{a}"][x]< int(DataPoint1)): 42 | total_sum_1+=globals()[f"com_dist_value_{a}"][x]*globals()[f"com_dist_freq_{a}"][x] 43 | total_freq_1+=globals()[f"com_dist_freq_{a}"][x] 44 | total_avg_1= total_sum_1/total_freq_1 45 | #print("The average value before data point1: : ", total_avg_1) 46 | 47 | 48 | total_freq_2=0 49 | total_sum_2=0 50 | total_avg_2=0 51 | 52 | for x in range(0, len( globals()[f"com_dist_value_{a}"])): 53 | if(globals()[f"com_dist_value_{a}"][x]>= int(DataPoint2)): 54 | total_sum_2+=globals()[f"com_dist_value_{a}"][x]*globals()[f"com_dist_freq_{a}"][x] 55 | total_freq_2+=globals()[f"com_dist_freq_{a}"][x] 56 | total_avg_2= total_sum_2/total_freq_2 57 | #print("The average value after data point2: ", total_avg_2) 58 | 59 | a=0 60 | preftech_dist1=0 61 | preftech_dist2=0 62 | 63 | 64 | # output_file=str(sys.argv[5])+"-ALL-dist1.csv" 65 | # with open(output_file, 'a') as out: 66 | #print("---------------------") 67 | # if total_avg_1 !=0: 68 | # prefetch_dist1 = total_avg_2/total_avg_1 69 | # print(prefetch_dist1) 70 | # out.write(str(PC)+","+str(round(prefetch_dist1))+",nta\n") 71 | 72 | 73 | 74 | 75 | 76 | #print("prefetch dist1 = ",prefetch_dist1 ) 77 | 78 | 79 | output_file2=str(sys.argv[5])+"-ALL-dist2.csv" 80 | with open(output_file2, 'a') as out2: 81 | prefetch_dist2 = (int(DataPoint2)-int(DataPoint1))/int(DataPoint1) 82 | #prefetch_dist2 = ((int(DataPoint2)-int(DataPoint1))/int(DataPoint1))*1000 83 | #print(round(preftech_dist2)) 84 | print(round(prefetch_dist2)) 85 | out2.write(str(PC)+","+str(round(prefetch_dist2))+",nta\n") 86 | 87 | 88 | 89 | 90 | 91 | 92 | -------------------------------------------------------------------------------- /python-codes/perf_rfile_baseline.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import re 3 | 4 | IPC_location =0 5 | exe_location =0 6 | graph_location =0 7 | n_location =0 8 | d_location =0 9 | inst_location =0 10 | llc_loaction =0 11 | swPref_location =0 12 | LoadHints_location =0 13 | 14 | IPC_value=0 15 | exe_value=0 16 | graph_value=0 17 | n_value=0 18 | d_value=0 19 | inst_value =0 20 | llc_value =0 21 | swPref_value =0 22 | LoadHints_value =0 23 | 24 | 25 | with open(sys.argv[1]) as file_in: 26 | lines = [] 27 | for line in file_in: 28 | lines.append(line.strip()) 29 | if 'insn per cycle' in line: 30 | for value in line.split(): 31 | if(IPC_location ==3): 32 | IPC_value=value 33 | IPC_location+=1 34 | if 'seconds user' in line: 35 | for value in line.split(): 36 | if(exe_location ==0): 37 | exe_value=value 38 | exe_location+=1 39 | if 'Input_graph' in line: 40 | for value in line.split(): 41 | if(graph_location ==2): 42 | graph_value=value 43 | graph_location+=1 44 | if 'Nodes' in line: 45 | for value in line.split(): 46 | #print("v: ", value) 47 | if(n_location ==1): 48 | n_value=value 49 | #print("n_value: ", n_value) 50 | n_location+=1 51 | if 'Degree' in line: 52 | for value in line.split(): 53 | if(d_location ==1): 54 | d_value=value 55 | #print("d_value: ", d_value) 56 | d_location+=1 57 | 58 | 59 | 60 | if 'LLC-load-misses' in line: 61 | for value in line.split(): 62 | if(llc_loaction ==0): 63 | llc_value=value 64 | llc_loaction+=1 65 | if 'instructions' in line: 66 | for value in line.split(): 67 | if(inst_location ==0): 68 | inst_value=value 69 | inst_location+=1 70 | if 'SW_PREFETCH_ACCESS.T0' in line: 71 | for value in line.split(): 72 | if(swPref_location ==0): 73 | swPref_value=value 74 | swPref_location+=1 75 | if 'LOAD_HIT_PRE.SW_PF' in line: 76 | for value in line.split(): 77 | if(LoadHints_location ==0): 78 | LoadHints_value=value 79 | LoadHints_location+=1 80 | 81 | 82 | output_file = open(sys.argv[2], "a") 83 | if d_location == 0: 84 | output_file.write("baseline/"+str(graph_value)+"/"+str(exe_value)+"/"+str(IPC_value)+"/"+str(inst_value)+"/"+str(llc_value)+"/"+str(swPref_value)+"/"+str(LoadHints_value)+"\n") 85 | 86 | else: 87 | output_file.write("baseline/N"+str(n_value)+"-D"+str(d_value)+"/"+str(exe_value)+"/"+str(IPC_value)+"/"+str(inst_value)+"/"+str(llc_value)+"/"+str(swPref_value)+"/"+str(LoadHints_value)+"\n") 88 | 89 | 90 | 91 | 92 | 93 | #print("======================") 94 | 95 | #for x in range(0,len(lines)): 96 | #print(lines[x]) 97 | -------------------------------------------------------------------------------- /python-codes/find-peaks.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from matplotlib import pyplot as plt 4 | from scipy.signal import find_peaks_cwt 5 | from collections import Counter 6 | import peakutils 7 | from scipy.signal import find_peaks 8 | import matplotlib.pyplot as plt 9 | import scipy.signal as signal 10 | #import plotly.plotly as py 11 | import plotly.graph_objs as go 12 | #from plotly.tools import FigureFactory as FF 13 | 14 | import numpy as np 15 | import pandas as pd 16 | import scipy 17 | import peakutils 18 | import sys 19 | 20 | 21 | estimated_data = pd.read_csv(sys.argv[1], header=None) 22 | 23 | col1 = estimated_data[:][0] # First column data 24 | col2 = estimated_data[:][1] # Second column data 25 | 26 | #print("col1: ",col1) 27 | #print("col2: ", col2) 28 | 29 | 30 | 31 | plt.ylim(0,5000) 32 | plt.xlim(0,2000) 33 | 34 | col3=[] 35 | col4=[] 36 | #for i in range(0,len(col2)-100): 37 | # if i==0 and col2[i]-col2[i+100]>col2[i]/2: 38 | # col3.append(col1[i]) 39 | # elif i>100: 40 | # if col2[i]-col2[i+100]<-col2[i]/2 and col2[i]-col2[i-100]>col2[i]/2: 41 | # col3.append(col1[i]) 42 | 43 | #for i in col3: 44 | # print(i) 45 | # for x in range(0, len(col1)): 46 | # if col1[x]==i: 47 | # col4.append(col2[x]) 48 | 49 | 50 | 51 | #for i, d in enumerate(data[0:]): 52 | # if abs(d_l - d) > dy_lim: 53 | # if in_lock: 54 | # targets.append(i_l) 55 | # targets.append(i + 1) 56 | # in_lock = False 57 | # i_l, d_l = i, d 58 | # else: 59 | #in_lock = True 60 | 61 | 62 | 63 | #for t in targets: 64 | # print(t) 65 | 66 | 67 | 68 | peaks, _ = find_peaks(col2, threshold=300) 69 | #print(peaks) 70 | 71 | 72 | #peakidx = signal.find_peaks_cwt(col2, np.arange(15,20), noise_perc=0.1) 73 | peakidx = signal.find_peaks_cwt(col2[20:], np.arange(5,15), noise_perc=0.1) 74 | #peakidx = signal.find_peaks_cwt(col2[5:], order=5, noise_perc=0.1)-1 75 | #print(peakidx) 76 | #print(col2[1]) 77 | 78 | 79 | plt.scatter(col1,col2, lw=0.4, alpha=0.4 ) 80 | #plt.scatter(peakidx,col2[peakidx], color='orange' ) 81 | #plt.scatter(col3, col4 ,color='orange') 82 | 83 | peaks=[] 84 | i=0 85 | 86 | 87 | while i< (len(peakidx)-1): 88 | if peakidx[i+1]-peakidx[i]< 200: 89 | if col2[peakidx[i]]> col2[peakidx[i+1]]: 90 | peaks.append(peakidx[i]) 91 | i=i+2 92 | else: 93 | peaks.append(peakidx[i+1]) 94 | i=i+2 95 | else: 96 | peaks.append(peakidx[i]) 97 | i=i+1 98 | # print(peaks) 99 | 100 | print(col1[peaks]) 101 | 102 | count=0 103 | sum=0 104 | new_peaks=[] 105 | for i in range(0, len(peaks)): 106 | if(peaks[i]< 250): 107 | sum = sum +peaks[i] 108 | count =count+1 109 | else: 110 | new_peaks.append(peaks[i]) 111 | 112 | if count!=0: 113 | new_peaks.append(round(sum/count)) 114 | #for i in new_peaks: 115 | # print("new_peaks: ",new_peaks) 116 | 117 | #plt.scatter(peaks,col2[peaks], color='red' ) 118 | #plt.scatter(col1[peaks],col2[peaks], color='red' ) 119 | plt.scatter(col1[new_peaks],col2[new_peaks], color='red' ) 120 | 121 | 122 | #if new_peaks!=[]: 123 | if len(new_peaks)>3: 124 | output_file=str(sys.argv[2])+"-peaks.csv" 125 | with open(output_file, 'wt') as out: 126 | for i in range(0, len(peaks)): 127 | out.write(str(col1[peaks[i]])+"\n") 128 | else: 129 | output_file=str(sys.argv[2])+"-peaks.csv" 130 | with open(output_file, 'wt') as out: 131 | out.write("32\n") 132 | out.write("32\n") 133 | out.write("1056\n") 134 | out.write("1056\n") 135 | 136 | 137 | 138 | plt.show() 139 | plt.savefig(str(sys.argv[2])+"-scatter-plot-with-peaks.png") 140 | 141 | 142 | -------------------------------------------------------------------------------- /python-codes/perf_rfile_pref.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import re 3 | x=0 4 | IPC_location =0 5 | exe_location =0 6 | graph_location =0 7 | inst_location =0 8 | llc_loaction =0 9 | dist_location =0 10 | swPref_location =0 11 | LoadHints_location =0 12 | d_location=0 13 | d_value=0 14 | n_location=0 15 | n_value=0 16 | IPC_value=0 17 | exe_value=0 18 | graph_value=0 19 | inst_value =0 20 | llc_value =0 21 | dist_value =0 22 | swPref_value =0 23 | LoadHints_value =0 24 | 25 | 26 | with open(sys.argv[1]) as file_in: 27 | lines = [] 28 | for line in file_in: 29 | lines.append(line.strip()) 30 | if 'insn per cycle' in line: 31 | for value in line.split(): 32 | if(IPC_location ==3): 33 | IPC_value=value 34 | IPC_location+=1 35 | if 'seconds user' in line: 36 | for value in line.split(): 37 | if(exe_location ==0): 38 | exe_value=value 39 | exe_location+=1 40 | if 'Input_graph' in line: 41 | for value in line.split(): 42 | if(graph_location ==2): 43 | graph_value=value 44 | graph_location+=1 45 | if 'prefetch-distance' in line: 46 | for value in line.split(): 47 | if(dist_location ==2): 48 | dist_value=value 49 | #print(dist_value) 50 | dist_location+=1 51 | if 'Nodes' in line: 52 | for value in line.split(): 53 | #print("v: ", value) 54 | if(n_location ==1): 55 | n_value=value 56 | #print("n_value: ", n_value) 57 | n_location+=1 58 | if 'Degree' in line: 59 | for value in line.split(): 60 | if(d_location ==1): 61 | d_value=value 62 | #print("d_value: ", d_value) 63 | d_location+=1 64 | if 'LLC-load-misses' in line: 65 | for value in line.split(): 66 | if(llc_loaction ==0): 67 | llc_value=value 68 | llc_loaction+=1 69 | if 'instructions' in line: 70 | for value in line.split(): 71 | if(inst_location ==0): 72 | inst_value=value 73 | inst_location+=1 74 | if 'SW_PREFETCH_ACCESS.T0' in line: 75 | for value in line.split(): 76 | if(swPref_location ==0): 77 | swPref_value=value 78 | swPref_location+=1 79 | if 'LOAD_HIT_PRE.SW_PF' in line: 80 | for value in line.split(): 81 | if(LoadHints_location ==0): 82 | LoadHints_value=value 83 | LoadHints_location+=1 84 | 85 | 86 | output_file = open(sys.argv[2], "a") 87 | if int(dist_value)< 1500: 88 | if d_location == 0: 89 | output_file.write("prefetch/"+str(dist_value)+"/"+str(graph_value)+"/"+str(exe_value)+"/"+str(IPC_value)+"/"+str(inst_value)+"/"+str(llc_value)+"/"+str(swPref_value)+"/"+str(LoadHints_value)+"\n") 90 | else: 91 | output_file.write("prefetch/"+str(dist_value)+"/N"+str(n_value)+"-D"+str(d_value)+"/"+str(exe_value)+"/"+str(IPC_value)+"/"+str(inst_value)+"/"+str(llc_value)+"/"+str(swPref_value)+"/"+str(LoadHints_value)+"\n") 92 | 93 | else: 94 | x=int(dist_value)/1000 95 | if d_location == 0: 96 | output_file.write("prefetch/"+str(x)+"/"+str(graph_value)+"/"+str(exe_value)+"/"+str(IPC_value)+"/"+str(inst_value)+"/"+str(llc_value)+"/"+str(swPref_value)+"/"+str(LoadHints_value)+"\n") 97 | else: 98 | output_file.write("prefetch/"+str(x)+"/N"+str(n_value)+"-D"+str(d_value)+"/"+str(exe_value)+"/"+str(IPC_value)+"/"+str(inst_value)+"/"+str(llc_value)+"/"+str(swPref_value)+"/"+str(LoadHints_value)+"\n") 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | #print("======================") 107 | 108 | #for x in range(0,len(lines)): 109 | #print(lines[x]) 110 | -------------------------------------------------------------------------------- /python-codes/plot-scatter.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | import seaborn as sns 4 | from collections import Counter 5 | import sys 6 | import glob 7 | import re 8 | import matplotlib.cm as cm 9 | import numpy as np 10 | from matplotlib import pyplot as plt 11 | from scipy.signal import find_peaks_cwt 12 | 13 | 14 | 15 | num_files=0 16 | names = ['dist'] 17 | 18 | #for fname in glob.glob("401e49v1-B512MW32-baseline-awk.csv"): 19 | for fname in glob.glob(sys.argv[1]): 20 | print(fname+"\n") 21 | a=re.split("[-.]", fname)[-2] 22 | print("a: ", a) 23 | globals()[f"data_{a}"]= pd.read_csv(fname) 24 | globals()[f"data_{a}"].columns = names 25 | num_files+=1 26 | distances=Counter(globals()[f"data_{a}"]['dist']) 27 | globals()[f"com_dist_value_{a}"]=[] 28 | globals()[f"com_dist_freq_{a}"]=[] 29 | 30 | for key,value in distances.most_common(100000000): 31 | if (value > 0): 32 | globals()[f"com_dist_value_{a}"].append(key) 33 | globals()[f"com_dist_freq_{a}"].append(value) 34 | 35 | 36 | #print(globals()[f"com_dist_value_{a}"]) 37 | #print(globals()[f"com_dist_freq_{a}"]) 38 | 39 | plt.rcParams.update({'figure.figsize':(10,8), 'figure.dpi':100}) 40 | 41 | #plt.ylim(0,500) 42 | plt.ylim(0,5000) 43 | plt.xlim(0,2000) 44 | #plt.xlim(0,250) 45 | #plt.xlim(250,500) 46 | #colors = cm.rainbow(np.linspace(0, 1, 20)) 47 | colors = ["red", "gold", "darkorchid", "royalblue", "chartreuse", "darkorange","dimgray" , "lightgreen", "rosybrown", "deepskyblue"] 48 | #plt.yticks(np.arange(0, 500, 4000), fontsize=14 ) 49 | plt.yticks(fontsize=14) 50 | plt.xticks(fontsize=14) 51 | 52 | #plt.scatter(com_dist_value_step3, com_dist_freq_step3 ,s=12,color=colors[0],label ='bfs-N80K-D8') 53 | #plt.scatter(com_dist_value_step2, com_dist_freq_step2 ,s=12,color=colors[0],label ='bfs-N80K-D8') 54 | #peaks = find_peaks_cwt(com_dist_value_new, com_dist_freq_new) 55 | plt.scatter(com_dist_value_new, com_dist_freq_new ,s=12,color=colors[0],label ='pr-com-liveJouranl') 56 | #plt.scatter(peaks,com_dist_freq_new[peaks] ,"x") 57 | #peaks = find_peaks_cwt(com_dist_value_new, com_dist_freq_new, widths=np.ones(data.shape)*2)-1 58 | 59 | 60 | #plt.scatter(com_dist_value_w4, com_dist_freq_w4 ,s=10,color=colors[0],label ='base_arr_size:64M , work_arr_size:4') 61 | #plt.scatter(com_dist_value_w8, com_dist_freq_w8 ,s=10,color=colors[6],label ='base_arr_size:64M , work_arr_size:8') 62 | #plt.scatter(com_dist_value_w16, com_dist_freq_w16 ,s=10,color=colors[2],label ='base_arr_size:64M , work_arr_size:16') 63 | #plt.scatter(com_dist_value_w32, com_dist_freq_w32 ,s=10,color=colors[3],label ='base_arr_size:64M , work_arr_size:32') 64 | #plt.scatter(com_dist_value_w64, com_dist_freq_w64 ,s=10,color=colors[4],label ='base_arr_size:64M , work_arr_size:64') 65 | #plt.scatter(com_dist_value_w128, com_dist_freq_w128 ,s=10,color=colors[5],label ='base_arr_size:64M , work_arr_size:128') 66 | #plt.scatter(com_dist_value_w256, com_dist_freq_w256 ,s=10,color=colors[6],label ='base_arr_size:64M , work_arr_size:256') 67 | #plt.scatter(com_dist_value_w512, com_dist_freq_w512 ,s=10,color=colors[7],label ='base_arr_size:64M , work_arr_size:512') 68 | #plt.scatter(com_dist_value_w1024, com_dist_freq_w1024 ,s=10,color=colors[2],label ='base_arr_size:64M , work_arr_size:1024') 69 | #plt.scatter(com_dist_value_w2048, com_dist_freq_w2048 ,s=10,color=colors[2],label ='base_arr_size:64M , work_arr_size:2048') 70 | #plt.scatter(com_dist_value_B1GW64, com_dist_freq_B1GW64 ,s=10,color=colors[3],label ='base_arr_size:1G , work_arr_size:64') 71 | #plt.scatter(com_dist_value_B1GW128, com_dist_freq_B1GW128 ,s=10,color=colors[5],label ='base_arr_size:1G , work_arr_size:128') 72 | #plt.scatter(com_dist_value_B1GW256, com_dist_freq_B1GW256 ,s=10,color=colors[6],label ='base_arr_size:1G , work_arr_size:256') 73 | 74 | #plt.scatter(com_dist_value_B512MW32, com_dist_freq_B512MW32 ,s=10,color=colors[1],label ='base_arr_size:512M , work_arr_size:32') 75 | #plt.scatter(com_dist_value_B512MW64, com_dist_freq_B512MW64 ,s=10,color=colors[2],label ='base_arr_size:512M , work_arr_size:64') 76 | #plt.scatter(com_dist_value_B512MW128, com_dist_freq_B512MW128 ,s=10,color=colors[0],label ='base_arr_size:512M , work_arr_size:128') 77 | #plt.scatter(com_dist_value_B512MW256, com_dist_freq_B512MW256 ,s=10,color=colors[7],label ='base_arr_size:512M , work_arr_size:256') 78 | 79 | 80 | #plt.scatter(com_dist_value_B2MW32, com_dist_freq_B2MW32 ,s=10,color=colors[1],label ='base_arr_size:2M , work_arr_size:32') 81 | #plt.scatter(com_dist_value_B2MW64, com_dist_freq_B2MW64 ,s=10,color=colors[2],label ='base_arr_size:2M , work_arr_size:64') 82 | #plt.scatter(com_dist_value_B2MW128, com_dist_freq_B2MW128 ,s=10,color=colors[0],label ='base_arr_size:2M , work_arr_size:128') 83 | #plt.scatter(com_dist_value_B2MW256, com_dist_freq_B2MW256 ,s=10,color=colors[7],label ='base_arr_size:2M , work_arr_size:256') 84 | 85 | #plt.scatter(com_dist_value_baseline, com_dist_freq_baseline ,s=10,color=colors[0],label ='baseline') 86 | #plt.scatter(com_dist_value_llc, com_dist_freq_llc ,s=10,color=colors[3],label ='llc') 87 | 88 | 89 | 90 | #plt.scatter(com_dist_value_4k, com_dist_freq_4k ,s=10,color=colors[1],label ='4k') 91 | #plt.scatter(com_dist_value_64k, com_dist_freq_64k ,s=10,color=colors[3],label ='64k') 92 | #plt.scatter(com_dist_value_512k, com_dist_freq_512k ,s=10,color=colors[8],label ='512k') 93 | #plt.scatter(com_dist_value_1M, com_dist_freq_1M ,s=10,color=colors[9],label ='1M') 94 | #plt.scatter(com_dist_value_4M, com_dist_freq_4M ,s=10,color=colors[4],label ='4M') 95 | #plt.scatter(com_dist_value_8M, com_dist_freq_8M ,s=10,color=colors[7],label ='8M') 96 | #plt.scatter(com_dist_value_64M, com_dist_freq_64M ,s=10,color=colors[0],label ='64M') 97 | #plt.scatter(com_dist_value_256M, com_dist_freq_256M ,s=10,color=colors[2],label ='256M') 98 | #plt.scatter(com_dist_value_512M, com_dist_freq_512M ,s=10,color=colors[5],label ='512M') 99 | #plt.scatter(com_dist_value_1G, com_dist_freq_1G ,s=10,color=colors[6],label ='1G') 100 | 101 | 102 | plt.xlabel('Cycles',weight='bold',fontsize=18) 103 | plt.ylabel('Number of Occurrences', weight='bold', fontsize=18) 104 | 105 | plt.grid (True) 106 | plt.legend (bbox_to_anchor = (1, 1), fontsize =18) # Display labels representing data groups outside the graph 107 | 108 | plt.title('The basic block execution time in terms of cycle from LBR samples', weight='bold', fontsize=18) 109 | plt.savefig(sys.argv[2]+".png") 110 | 111 | 112 | 113 | 114 | -------------------------------------------------------------------------------- /scripts/capture_PCs_real.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | benchmark_name=$1 3 | g=$2 4 | prefetch_distance=(4 8 16 32 64) 5 | ####################PATHs 6 | benchmark_path="" 7 | results_path="" 8 | input_graphs_path="" 9 | python_codes_path="" 10 | LLVM10_buildMyPasses="" 11 | 12 | 13 | echo "" 14 | echo "benchmark_name: "$benchmark_name 15 | echo "input-graph: "$g 16 | echo "################################## LLC misses" 17 | echo "Capture deliquent load PCs ... " 18 | 19 | gn=${g::-4} 20 | ####################LLC misses 21 | LLC_DIR="LLC-misses-"$benchmark_name"-INPUT-"$gn 22 | mkdir $LLC_DIR 23 | cd $LLC_DIR 24 | echo "" 25 | echo " 1) perf record LLC misses ...." 26 | perf record -e cpu/event=0xd1,umask=0x20,name=MEM_LOAD_RETIRED.L3_MISS/ppp -- ./../../$benchmark_name 1 1 $input_graphs_path/$g 27 | echo "" 28 | echo " 2) perf report --stdio ...." 29 | perf report --stdio > $benchmark_name"-INPUT-"$gn"-perfReport.txt" 30 | echo "" 31 | echo " 3) Capture Functions that cause most LLC misses ..." 32 | python3 $python_codes_path/read-func.py $benchmark_name"-INPUT-"$gn"-perfReport.txt" $benchmark_name"-INPUT-"$gn"-FuncPercentList.txt" $benchmark_name"-INPUT-"$gn"-FuncList.txt" $benchmark_name 33 | echo "" 34 | echo " 4) perf annotate --stdio & Capturing all deliquent load PCs for each function ...." 35 | while read FUNC; do 36 | echo " Function Name: $FUNC" 37 | perf annotate --stdio -M intel "$FUNC" > $benchmark_name"-INPUT-"$gn"-"$FUNC".txt" 38 | sed -i '1d' $benchmark_name"-INPUT-"$gn"-"$FUNC".txt" 39 | python3 $python_codes_path/llc_missed_pcs_rfile.py $benchmark_name"-INPUT-"$gn"-"$FUNC".txt" $benchmark_name"-INPUT-"$gn"-"$FUNC"-PCPersentList.txt" $benchmark_name"-INPUT-"$gn"-"$FUNC"-PCList.txt" 40 | cat $benchmark_name"-INPUT-"$gn"-"$FUNC"-PCList.txt" >> $benchmark_name"-INPUT-"$gn"-ALL-PCList.txt" 41 | #echo " done for Function: $FUNC" 42 | done < $benchmark_name"-INPUT-"$gn"-FuncList.txt" 43 | #echo "done!" 44 | echo "" 45 | 46 | 47 | echo "############################# LBR sampling" 48 | 49 | 50 | if [[ "$benchmark_name" == "bc" ]] 51 | then 52 | timeout 10s perf record -e cycles:u -j any,u -o perf.data -- ./../../$benchmark_name 1 1 $input_graphs_path/$g 53 | else 54 | timeout 10s perf record -e cycles:u -j any,u -o perf.data -- ./../../$benchmark_name 1 1 $input_graphs_path/$g 55 | fi 56 | time perf script -F ip,brstack -i perf.data > "dump-"$benchmark_name"-INPUT-"$gn"-whole-app-LBRsamples-brstack.txt" 57 | perf script -F ip,brstack,brstackinsn -i perf.data > "dump-"$benchmark_name"-INPUT-"$gn"-whole-app-LBRsamples-brstackinsn.txt" 58 | 59 | while read PC; do 60 | python3 $python_codes_path/first_filter_samples.py "dump-"$benchmark_name"-INPUT-"$gn"-whole-app-LBRsamples-brstackinsn.txt" $PC 61 | done < $benchmark_name"-INPUT-"$gn"-ALL-PCList.txt" 62 | 63 | while read PC; do 64 | python3 $python_codes_path/find_src_in_branches.py "first-filter-"$PC".txt" $PC 65 | while read PC_src; do 66 | python3 $python_codes_path/find_dest_in_branches.py "first-filter-"$PC".txt" $PC_src $PC 67 | done < "in-branches-src-PC-"$PC".txt" 68 | done < $benchmark_name"-INPUT-"$gn"-ALL-PCList.txt" 69 | ###### 70 | 71 | 72 | 73 | 74 | while read PC; do 75 | src="$(sed "1q;d" "in-branches-src-PC-"$PC".txt")" 76 | dst="$(sed "1q;d" "in-branches-dest-PC-"$PC".txt")" 77 | echo "PC: " $PC 78 | echo " src: " $src 79 | echo " dst: " $dst 80 | 81 | python3 $python_codes_path/filter_samples.py "dump-"$benchmark_name"-INPUT-"$gn"-whole-app-LBRsamples-brstackinsn.txt" $PC 82 | 83 | python3 $python_codes_path/temp.py "dump-"$benchmark_name"-INPUT-"$gn"-whole-app-LBRsamples-brstack.txt" "0x"$src "0x"$dst $PC 84 | 85 | python3 $python_codes_path/dist-between-2-occur-outerloop.py "dump-"$benchmark_name"-INPUT-"$gn"-whole-app-LBRsamples-brstack.txt" "0x"$src "0x"$dst $PC python3 $python_codes_path/dist-between-2-occur-outerloop.py "filter-"$PC".txt" "0x"$src "0x"$dst $PC 86 | tail -n 5000 "0x"$src"-0x"$dst"-dist-between-2-occur-outerloop-PC-"$PC".txt" > "x.txt" 87 | 88 | python3 $python_codes_path/cal-avg-dist-outerloop.py "0x"$src"-0x"$dst"-dist-between-2-occur-outerloop-PC-"$PC".txt" "0x"$src "0x"$dst $PC 89 | python3 $python_codes_path/cal-avg-dist-outerloop.py "x.txt" "0x"$src "0x"$dst $PC 90 | 91 | val="$(sed "2q;d" "0x"$src"-0x"$dst"-avg-dist-outerloop-PC-"$PC".txt")" 92 | python3 $python_codes_path/inner-iters.py "dump-"$benchmark_name"-INPUT-"$gn"-whole-app-LBRsamples-brstack.txt" "0x"$src "0x"$dst $PC 93 | python3 $python_codes_path/cal-avg-inner-iters.py "0x"$src"-0x"$dst"-innet-iters-PC-"$PC".txt" "0x"$src "0x"$dst $PC 94 | python3 $python_codes_path/inner-avg-iter-time.py "dump-"$benchmark_name"-INPUT-"$gn"-whole-app-LBRsamples-brstack.txt" "0x"$src "0x"$dst $PC 95 | python3 $python_codes_path/cal-avg-inner-iter-time.py "0x"$src"-0x"$dst"-avg-inner-iter-time-PC-"$PC".txt" "0x"$src "0x"$dst $PC 96 | python3 $python_codes_path/plot-scatter.py "0x"$src"-0x"$dst"-cycles-PC-"$PC"-new.txt" "0x"$src"-0x"$dst"-cycles-PC-"$PC"-new-plot" 97 | python3 $python_codes_path/test-plot.py "0x"$src"-0x"$dst"-cycles-PC-"$PC"-new.txt" "0x"$src"-0x"$dst"-cycles-PC-"$PC 98 | python3 $python_codes_path/sort-data.py "0x"$src"-0x"$dst"-cycles-PC-"$PC"-test-plot.csv" "0x"$src"-0x"$dst"-cycles-PC-"$PC 99 | python3 $python_codes_path/find-peaks.py "0x"$src"-0x"$dst"-cycles-PC-"$PC"-sorted-data.csv" "0x"$src"-0x"$dst"-cycles-PC-"$PC 100 | first_peak="$(sed "1q;d" "0x"$src"-0x"$dst"-cycles-PC-"$PC"-peaks.csv")" 101 | sec_peak="$(sed "2q;d" "0x"$src"-0x"$dst"-cycles-PC-"$PC"-peaks.csv")" 102 | three_peak="$(sed "3q;d" "0x"$src"-0x"$dst"-cycles-PC-"$PC"-peaks.csv")" 103 | four_peak="$(sed "4q;d" "0x"$src"-0x"$dst"-cycles-PC-"$PC"-peaks.csv")" 104 | val_r=`echo $val | awk '{print int($1)}'` 105 | INT=200 106 | if (( $val_r > $INT ));then 107 | dist=$(python3 $python_codes_path/calculate-dist.py "0x"$src"-0x"$dst"-cycles-PC-"$PC"-new.txt" $PC $first_peak $four_peak $benchmark_name"-INPUT-"$gn) 108 | else 109 | dist=$(python3 $python_codes_path/calculate-dist-crono.py "0x"$src"-0x"$dst"-cycles-PC-"$PC"-new.txt" $PC $first_peak $four_peak $benchmark_name"-INPUT-"$gn) 110 | fi 111 | done < $benchmark_name"-INPUT-"$gn"-ALL-PCList.txt" 112 | 113 | cp $benchmark_name"-INPUT-"$gn"-ALL-dist2.csv" ../ 114 | cp $benchmark_name"-INPUT-"$gn"-ALL-dist1.csv" ../ 115 | cd ../ 116 | 117 | 118 | echo "#############################Perf Stats for Baseline config" 119 | STATS_DIR="Perf-Stats-"$benchmark_name"-INPUT-"$gn 120 | mkdir $STATS_DIR 121 | cd $STATS_DIR 122 | 123 | python3 $python_codes_path/bench_name.py ../../../../"CRONO-benchmarks-perf-stats-output.txt" $benchmark_name 124 | perf stat -o $benchmark_name"-INPUT-"$gn"-perf-stats.out" -e L1-dcache-loads -e L1-dcache-load-misses -e L2-loads -e L2-load-misses -e LLC-loads -e LLC-load-misses -e cycles -e instructions -e SW_PREFETCH_ACCESS.T1_T2 -e SW_PREFETCH_ACCESS.T0 -e SW_PREFETCH_ACCESS.NTA -e LOAD_HIT_PRE.SW_PF -e cache-misses ./../../$benchmark_name 1 1 $input_graphs_path/$g 125 | 126 | echo "" >> $benchmark_name"-INPUT-"$gn"-perf-stats.out" 127 | echo "Config: Baseline">> $benchmark_name"-INPUT-"$gn"-perf-stats.out" 128 | echo " Input_graph = " $g>> $benchmark_name"-INPUT-"$gn"-perf-stats.out" 129 | echo "---------------------------------" >> $benchmark_name"-INPUT-"$gn"-perf-stats.out" 130 | echo "" 131 | python3 $python_codes_path/perf_rfile_baseline.py $benchmark_name"-INPUT-"$gn"-perf-stats.out" ../../../../"CRONO-benchmarks-perf-stats-output.txt" 132 | cp ../../$benchmark_name . 133 | cp ../../$benchmark_name".ll" . 134 | echo "" 135 | echo "" 136 | 137 | 138 | < $benchmark_name"-pref-INPUT-"$gn"-dist"$dist".ll" 153 | $LLVM10_buildMyPasses/bin/clang++ -gmlt -std=c++11 -O3 -fdebug-info-for-profiling -Wall $benchmark_name"-pref-INPUT-"$gn"-dist"$dist".ll" -c 154 | $LLVM10_buildMyPasses/bin/clang++ -gmlt -std=c++11 -O3 -fdebug-info-for-profiling -Wall $benchmark_name"-pref-INPUT-"$gn"-dist"$dist".o" -o $benchmark_name"-pref-INPUT-"$gn"-dist"$dist -lpthread -lrt 155 | python3 $python_codes_path/bench_name.py ../../../../"CRONO-benchmarks-perf-stats-output.txt" $benchmark_name 156 | perf stat -o $benchmark_name"-pref-INPUT-"$gn"-dist"$dist"-perf-stats.out" -e L1-dcache-loads -e L1-dcache-load-misses -e L2-loads -e L2-load-misses -e LLC-loads -e LLC-load-misses -e cycles -e instructions -e SW_PREFETCH_ACCESS.T1_T2 -e SW_PREFETCH_ACCESS.T0 -e SW_PREFETCH_ACCESS.NTA -e LOAD_HIT_PRE.SW_PF -e cache-misses ./$benchmark_name"-pref-INPUT-"$gn"-dist"$dist 1 1 $input_graphs_path/$g 157 | echo "" >> $benchmark_name"-pref-INPUT-"$gn"-dist"$dist"-perf-stats.out" 158 | echo "Config: Prefetching">> $benchmark_name"-pref-INPUT-"$gn"-dist"$dist"-perf-stats.out" 159 | echo " Input_graph = " $g>> $benchmark_name"-pref-INPUT-"$gn"-dist"$dist"-perf-stats.out" 160 | echo " prefetch-distance = " $dist>> $benchmark_name"-pref-INPUT-"$gn"-dist"$dist"-perf-stats.out" 161 | echo "---------------------------------" >> $benchmark_name"-pref-INPUT-"$gn"-dist"$dist"-perf-stats.out" 162 | echo "" 163 | python3 $python_codes_path/perf_rfile_pref.py $benchmark_name"-pref-INPUT-"$gn"-dist"$dist"-perf-stats.out" ../../../../"CRONO-benchmarks-perf-stats-output.txt" 164 | done 165 | 166 | 167 | com 168 | echo "#############################Perf stats for Prefetching with LBR prefetch distance" 169 | cp ../$benchmark_name"-INPUT-"$gn"-ALL-dist2.csv" . 170 | echo "prefetch-distance: "$dist 171 | $AutoFDO10//create_llvm_prof --binary=$benchmark_name --profile=$benchmark_name"-INPUT-"$gn"-ALL-dist2.csv" --profiler="prefetch" --format=text --out=$benchmark_name"-INPUT-"$gn"-prefetch.afdo" 172 | $LLVM10_buildMyPasses/bin/opt -load /soe/sjamilan/LLVM10/llvm-project-10.0.0/build_mypasses/lib/SWPrefetchingLLVMPass.so -S -SWPrefetchingLLVMPass -input-file $benchmark_name"-INPUT-"$gn"-prefetch.afdo" -dist $dist <$benchmark_name".ll"> $benchmark_name"-pref-INPUT-"$gn".ll" 173 | $LLVM10_buildMyPasses/bin/clang --std=c++0x -O3 -fdebug-info-for-profiling -Wall -Werror $benchmark_name"-pref-INPUT-"$gn".ll" -c 174 | $LLVM10_buildMyPasses/bin/clang -g --std=c++0x -O3 -fdebug-info-for-profiling -Wall -Werror $benchmark_name"-pref-INPUT-"$gn".o" -o $benchmark_name"-pref-INPUT-"$gn -lpthread -lrt 175 | python3 $python_codes_path/bench_name.py ../../../../"CGO17-benchmarks-perf-stats-output.txt" $benchmark_name 176 | 177 | if [[ "$benchmark_name" == "bc" ]] 178 | then 179 | perf stat -o $benchmark_name"-pref-INPUT-"$gn"-perf-stats.out" -e L1-dcache-loads -e L1-dcache-load-misses -e L2-loads -e L2-load-misses -e LLC-loads -e LLC-load-misses -e cycles -e instructions -e SW_PREFETCH_ACCESS.T1_T2 -e SW_PREFETCH_ACCESS.T0 -e SW_PREFETCH_ACCESS.NTA -e LOAD_HIT_PRE.SW_PF -e cache-misses ./$benchmark_name"-pref-INPUT-"$gn 1 1 $input_graphs_path/$g 180 | else 181 | perf stat -o $benchmark_name"-pref-INPUT-"$gn"-perf-stats.out" -e L1-dcache-loads -e L1-dcache-load-misses -e L2-loads -e L2-load-misses -e LLC-loads -e LLC-load-misses -e cycles -e instructions -e SW_PREFETCH_ACCESS.T1_T2 -e SW_PREFETCH_ACCESS.T0 -e SW_PREFETCH_ACCESS.NTA -e LOAD_HIT_PRE.SW_PF -e cache-misses ./$benchmark_name"-pref-INPUT-"$gn 1 1 $input_graphs_path/$g 182 | 183 | fi 184 | echo "" >> $benchmark_name"-pref-INPUT-"$gn"-perf-stats.out" 185 | echo "Config: Prefetching">> $benchmark_name"-pref-INPUT-"$gn"-perf-stats.out" 186 | echo " Input_graph = " $g>> $benchmark_name"-pref-INPUT-"$gn"-perf-stats.out" 187 | echo " prefetch-distance = "$dist>> $benchmark_name"-pref-INPUT-"$gn"-perf-stats.out" 188 | echo "---------------------------------" >> $benchmark_name"-pref-INPUT-"$gn"-perf-stats.out" 189 | echo "" 190 | python3 $python_codes_path/perf_rfile_pref.py $benchmark_name"-pref-INPUT-"$gn"-perf-stats.out" ../../../../"CRONO-benchmarks-perf-stats-output.txt" 191 | 192 | 193 | 194 | 195 | 196 | -------------------------------------------------------------------------------- /scripts/capture_PCs_syn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | benchmark_name=$1 3 | n=$2 4 | d=$3 5 | prefetch_distance=(4 8 16 32 64) 6 | ####################PATH 7 | benchmark_path="" 8 | results_path="" 9 | input_graphs_path="" 10 | python_codes_path="" 11 | LLVM10_buildMyPasses='' 12 | 13 | echo "" 14 | echo "benchmark_name: "$benchmark_name 15 | echo "Nodes: "$n 16 | echo "Degree: "$d 17 | echo "################################## LLC misses" 18 | echo "#Capture deliquent load PCs ... " 19 | 20 | gn=${g::-4} 21 | LLC_DIR="LLC-misses-"$benchmark_name"-INPUT-N"$n"-D"$d 22 | mkdir $LLC_DIR 23 | cd $LLC_DIR 24 | echo "" 25 | echo " 1) perf record LLC misses ...." 26 | if [[ "$benchmark_name" == "bc" ]] 27 | then 28 | perf record -e cpu/event=0xd1,umask=0x20,name=MEM_LOAD_RETIRED.L3_MISS/ppp -- ./../../$benchmark_name 1 $n $d 29 | else 30 | perf record -e cpu/event=0xd1,umask=0x20,name=MEM_LOAD_RETIRED.L3_MISS/ppp -- ./../../$benchmark_name 0 1 $n $d 31 | fi 32 | echo "" 33 | echo " 2) perf report --stdio ...." 34 | perf report --stdio > $benchmark_name"-INPUT-N"$n"-D"$d"-perfReport.txt" 35 | echo "" 36 | echo " 3) Capture Functions that cause most LLC misses ..." 37 | python3 $python_codes_path/read-func.py $benchmark_name"-INPUT-N"$n"-D"$d"-perfReport.txt" $benchmark_name"-INPUT-N"$n"-D"$d"-FuncPercentList.txt" $benchmark_name"-INPUT-N"$n"-D"$d"-FuncList.txt" $benchmark_name 38 | echo "" 39 | echo " 4) perf annotate --stdio & Capturing all deliquent load PCs for each function ...." 40 | while read FUNC; do 41 | echo " Function Name: $FUNC" 42 | perf annotate --stdio -M intel "$FUNC" > $benchmark_name"-INPUT-N"$n"-D"$d"-"$FUNC".txt" 43 | sed -i '1d' $benchmark_name"-INPUT-N"$n"-D"$d"-"$FUNC".txt" 44 | python3 $python_codes_path/llc_missed_pcs_rfile.py $benchmark_name"-INPUT-N"$n"-D"$d"-"$FUNC".txt" $benchmark_name"-INPUT-N"$n"-D"$d"-"$FUNC"-PCPersentList.txt" $benchmark_name"-INPUT-N"$n"-D"$d"-"$FUNC"-PCList.txt" 45 | cat $benchmark_name"-INPUT-N"$n"-D"$d"-"$FUNC"-PCList.txt" >> $benchmark_name"-INPUT-N"$n"-D"$d"-ALL-PCList.txt" 46 | #echo " done for Function: $FUNC" 47 | done < $benchmark_name"-INPUT-N"$n"-D"$d"-FuncList.txt" 48 | #echo "done!" 49 | echo "" 50 | 51 | echo "############################# LBR sampling" 52 | 53 | if [[ "$benchmark_name" == "bc" ]] 54 | then 55 | timeout 20s perf record -e cycles:u -j any,u -o perf.data -- ./../../$benchmark_name 1 $n $d 56 | else 57 | timeout 20s perf record -e cycles:u -j any,u -o perf.data -- ./../../$benchmark_name 0 1 $n $d 58 | fi 59 | g=$n"-D"$d 60 | time perf script -F ip,brstack -i perf.data > "dump-"$benchmark_name"-INPUT-"$g"-whole-app-LBRsamples-brstack.txt" 61 | perf script -F ip,brstack,brstackinsn -i perf.data > "dump-"$benchmark_name"-INPUT-"$g"-whole-app-LBRsamples-brstackinsn.txt" 62 | 63 | ###### we should filter samples first!!!!!! 64 | while read PC; do 65 | #echo " PC: $PC" 66 | python3 $python_codes_path/first_filter_samples.py "dump-"$benchmark_name"-INPUT-"$g"-whole-app-LBRsamples-brstackinsn.txt" $PC 67 | done < $benchmark_name"-INPUT-N"$n"-D"$d"-ALL-PCList.txt" 68 | 69 | while read PC; do 70 | #echo " PC: $PC" 71 | python3 $python_codes_path/find_src_in_branches.py "first-filter-"$PC".txt" $PC 72 | while read PC_src; do 73 | #echo " src: " $PC_src 74 | python3 $python_codes_path/find_dest_in_branches.py "first-filter-"$PC".txt" $PC_src $PC 75 | done < "in-branches-src-PC-"$PC".txt" 76 | done < $benchmark_name"-INPUT-N"$n"-D"$d"-ALL-PCList.txt" 77 | ###### 78 | 79 | 80 | 81 | while read PC; do 82 | src="$(sed "1q;d" "in-branches-src-PC-"$PC".txt")" 83 | dst="$(sed "1q;d" "in-branches-dest-PC-"$PC".txt")" 84 | echo " PC: $PC" 85 | echo " src: " $src 86 | echo " dst: " $dst 87 | 88 | python3 $python_codes_path/filter_samples.py "dump-"$benchmark_name"-INPUT-"$g"-whole-app-LBRsamples-brstackinsn.txt" $PC 89 | 90 | python3 $python_codes_path/temp.py "dump-"$benchmark_name"-INPUT-"$g"-whole-app-LBRsamples-brstack.txt" "0x"$src "0x"$dst $PC 91 | 92 | python3 $python_codes_path/dist-between-2-occur-outerloop.py "dump-"$benchmark_name"-INPUT-"$g"-whole-app-LBRsamples-brstack.txt" "0x"$src "0x"$dst $PC 93 | #python3 $python_codes_path/dist-between-2-occur-outerloop.py "filter-"$PC".txt" "0x"$src "0x"$dst $PC 94 | 95 | tail -n 5000 "0x"$src"-0x"$dst"-dist-between-2-occur-outerloop-PC-"$PC".txt" > "x.txt" 96 | python3 $python_codes_path/cal-avg-dist-outerloop.py "0x"$src"-0x"$dst"-dist-between-2-occur-outerloop-PC-"$PC".txt" "0x"$src "0x"$dst $PC 97 | python3 $python_codes_path/cal-avg-dist-outerloop.py "x.txt" "0x"$src "0x"$dst $PC 98 | 99 | val="$(sed "2q;d" "0x"$src"-0x"$dst"-avg-dist-outerloop-PC-"$PC".txt")" 100 | python3 $python_codes_path/inner-iters.py "filter-"$PC".txt" "0x"$src "0x"$dst $PC 101 | python3 $python_codes_path/cal-avg-inner-iters.py "0x"$src"-0x"$dst"-innet-iters-PC-"$PC".txt" "0x"$src "0x"$dst $PC 102 | 103 | python3 $python_codes_path/inner-avg-iter-time.py "filter-"$PC".txt" "0x"$src "0x"$dst $PC 104 | python3 $python_codes_path/cal-avg-inner-iter-time.py "0x"$src"-0x"$dst"-avg-inner-iter-time-PC-"$PC".txt" "0x"$src "0x"$dst $PC 105 | 106 | python3 $python_codes_path/plot-scatter.py "0x"$src"-0x"$dst"-cycles-PC-"$PC"-new.txt" "0x"$src"-0x"$dst"-cycles-PC-"$PC"-new-plot" 107 | python3 $python_codes_path/test-plot.py "0x"$src"-0x"$dst"-cycles-PC-"$PC"-new.txt" "0x"$src"-0x"$dst"-cycles-PC-"$PC 108 | python3 $python_codes_path/sort-data.py "0x"$src"-0x"$dst"-cycles-PC-"$PC"-test-plot.csv" "0x"$src"-0x"$dst"-cycles-PC-"$PC 109 | python3 $python_codes_path/find-peaks.py "0x"$src"-0x"$dst"-cycles-PC-"$PC"-sorted-data.csv" "0x"$src"-0x"$dst"-cycles-PC-"$PC 110 | first_peak="$(sed "1q;d" "0x"$src"-0x"$dst"-cycles-PC-"$PC"-peaks.csv")" 111 | sec_peak="$(sed "2q;d" "0x"$src"-0x"$dst"-cycles-PC-"$PC"-peaks.csv")" 112 | three_peak="$(sed "3q;d" "0x"$src"-0x"$dst"-cycles-PC-"$PC"-peaks.csv")" 113 | four_peak="$(sed "4q;d" "0x"$src"-0x"$dst"-cycles-PC-"$PC"-peaks.csv")" 114 | val_r=`echo $val | awk '{print int($1)}'` 115 | INT=200 116 | if (( $val_r > $INT ));then 117 | dist=$(python3 $python_codes_path/calculate-dist.py "0x"$src"-0x"$dst"-cycles-PC-"$PC"-new.txt" $PC $first_peak $four_peak $benchmark_name"-INPUT-"$g) 118 | else 119 | dist=$(python3 $python_codes_path/calculate-dist-crono.py "0x"$src"-0x"$dst"-cycles-PC-"$PC"-new.txt" $PC $first_peak $four_peak $benchmark_name"-INPUT-"$g) 120 | fi 121 | done < $benchmark_name"-INPUT-N"$n"-D"$d"-ALL-PCList.txt" 122 | 123 | 124 | 125 | cp $benchmark_name"-INPUT-"$g"-ALL-dist2.csv" ../ 126 | cd ../ 127 | 128 | 129 | ############################# END LBR sampling 130 | #############################Perf stats for Baseline config 131 | echo "#############################Perf stats for Baseline config" 132 | STATS_DIR="Perf-Stats-"$benchmark_name"-INPUT-N"$n"-D"$d 133 | mkdir $STATS_DIR 134 | cd $STATS_DIR 135 | python3 $python_codes_path/bench_name.py ../../../../"CRONO-benchmarks-perf-stats-output.txt" $benchmark_name 136 | 137 | if [[ "$benchmark_name" == "bc" ]] 138 | then 139 | perf stat -o $benchmark_name"-INPUT-N"$n"-D"$d"-perf-stats.out" -e L1-dcache-loads -e L1-dcache-load-misses -e L2-loads -e L2-load-misses -e LLC-loads -e LLC-load-misses -e cycles -e instructions -e SW_PREFETCH_ACCESS.T1_T2 -e SW_PREFETCH_ACCESS.T0 -e SW_PREFETCH_ACCESS.NTA -e LOAD_HIT_PRE.SW_PF -e cache-misses ./../../$benchmark_name 1 $n $d 140 | else 141 | perf stat -o $benchmark_name"-INPUT-N"$n"-D"$d"-perf-stats.out" -e L1-dcache-loads -e L1-dcache-load-misses -e L2-loads -e L2-load-misses -e LLC-loads -e LLC-load-misses -e cycles -e instructions -e SW_PREFETCH_ACCESS.T1_T2 -e SW_PREFETCH_ACCESS.T0 -e SW_PREFETCH_ACCESS.NTA -e LOAD_HIT_PRE.SW_PF -e cache-misses ./../../$benchmark_name 0 1 $n $d 142 | fi 143 | echo "" >> $benchmark_name"-INPUT-N"$n"-D"$d"-perf-stats.out" 144 | echo "Config: Baseline">> $benchmark_name"-INPUT-N"$n"-D"$d"-perf-stats.out" 145 | echo " Nodes: "$n >> $benchmark_name"-INPUT-N"$n"-D"$d"-perf-stats.out" 146 | echo " Degree: "$d >> $benchmark_name"-INPUT-N"$n"-D"$d"-perf-stats.out" 147 | echo "---------------------------------" >> $benchmark_name"-INPUT-N"$n"-D"$d"-perf-stats.out" 148 | echo "" 149 | python3 $python_codes_path/perf_rfile_baseline.py $benchmark_name"-INPUT-N"$n"-D"$d"-perf-stats.out" ../../../../"CRONO-benchmarks-perf-stats-output.txt" 150 | #############################Perf stats for Prefetching with different prefetch distances 151 | cp ../../$benchmark_name . 152 | cp ../../$benchmark_name".ll" . 153 | echo "" 154 | echo "" 155 | 156 | 157 | <<-com 158 | 159 | 160 | echo "Getting perf stats for different Prefetching distances ...." 161 | for dist in "${prefetch_distance[@]}" 162 | do 163 | echo "prefetch-distance: "$dist 164 | if (( $val_r < $INT ));then 165 | dist=$( expr 1000 '*' "$dist") 166 | else 167 | echo "" 168 | fi 169 | python3 $python_codes_path/mod_pc_dist_list.py ../$LLC_DIR/$benchmark_name"-INPUT-N"$n"-D"$d"-ALL-PCList.txt" $benchmark_name"-INPUT-N"$n"-D"$d"-ALL-PCList-Plus-dist"$dist".csv" $dist 170 | $AutoFDO10//create_llvm_prof --binary=$benchmark_name --profile=$benchmark_name"-INPUT-N"$n"-D"$d"-ALL-PCList-Plus-dist"$dist".csv" --profiler="prefetch" --format=text --out=$benchmark_name"-INPUT-N"$n"-D"$d"-dist"$dist"-prefetch.afdo" 171 | $LLVM10_buildMyPasses/bin/opt -load /soe/sjamilan/LLVM10/llvm-project-10.0.0/build_mypasses/lib/SWPrefetchingLLVMPass.so -S -SWPrefetchingLLVMPass -input-file $benchmark_name"-INPUT-N"$n"-D"$d"-dist"$dist"-prefetch.afdo" -dist $dist <$benchmark_name".ll"> $benchmark_name"-pref-INPUT-N"$n"-D"$d"-dist"$dist".ll" 172 | $LLVM10_buildMyPasses/bin/clang -g --std=c++0x -O3 -fdebug-info-for-profiling -Wall -Werror $benchmark_name"-pref-INPUT-N"$n"-D"$d"-dist"$dist".ll" -c 173 | $LLVM10_buildMyPasses/bin/clang -g --std=c++0x -O3 -fdebug-info-for-profiling -Wall -Werror $benchmark_name"-pref-INPUT-N"$n"-D"$d"-dist"$dist".o" -o $benchmark_name"-pref-INPUT-N"$n"-D"$d"-dist"$dist -lpthread -lrt 174 | python3 $python_codes_path/bench_name.py ../../../../"CRONO-benchmarks-perf-stats-output.txt" $benchmark_name 175 | if [[ "$benchmark_name" == "bc" ]] 176 | then 177 | perf stat -o $benchmark_name"-pref-INPUT-N"$n"-D"$d"-dist"$dist"-perf-stats.out" -e L1-dcache-loads -e L1-dcache-load-misses -e L2-loads -e L2-load-misses -e LLC-loads -e LLC-load-misses -e cycles -e instructions -e SW_PREFETCH_ACCESS.T1_T2 -e SW_PREFETCH_ACCESS.T0 -e SW_PREFETCH_ACCESS.NTA -e LOAD_HIT_PRE.SW_PF -e cache-misses ./$benchmark_name"-pref-INPUT-N"$n"-D"$d"-dist"$dist 1 $n $d 178 | else 179 | perf stat -o $benchmark_name"-pref-INPUT-N"$n"-D"$d"-dist"$dist"-perf-stats.out" -e L1-dcache-loads -e L1-dcache-load-misses -e L2-loads -e L2-load-misses -e LLC-loads -e LLC-load-misses -e cycles -e instructions -e SW_PREFETCH_ACCESS.T1_T2 -e SW_PREFETCH_ACCESS.T0 -e SW_PREFETCH_ACCESS.NTA -e LOAD_HIT_PRE.SW_PF -e cache-misses ./$benchmark_name"-pref-INPUT-N"$n"-D"$d"-dist"$dist 0 1 $n $d 180 | fi 181 | echo "" >> $benchmark_name"-pref-INPUT-N"$n"-D"$d"-dist"$dist"-perf-stats.out" 182 | echo "Config: Prefetching">> $benchmark_name"-pref-INPUT-N"$n"-D"$d"-dist"$dist"-perf-stats.out" 183 | echo " Nodes: " $n>> $benchmark_name"-pref-INPUT-N"$n"-D"$d"-dist"$dist"-perf-stats.out" 184 | echo " Degree: " $d>> $benchmark_name"-pref-INPUT-N"$n"-D"$d"-dist"$dist"-perf-stats.out" 185 | echo " prefetch-distance = " $dist>> $benchmark_name"-pref-INPUT-N"$n"-D"$d"-dist"$dist"-perf-stats.out" 186 | echo "---------------------------------" >> $benchmark_name"-pref-INPUT-N"$n"-D"$d"-dist"$dist"-perf-stats.out" 187 | echo "" 188 | python3 $python_codes_path/perf_rfile_pref.py $benchmark_name"-pref-INPUT-N"$n"-D"$d"-dist"$dist"-perf-stats.out" ../../../../"CRONO-benchmarks-perf-stats-output.txt" 189 | done 190 | 191 | com 192 | 193 | 194 | 195 | echo "#############################Perf stats for Prefetching with LBR prefetch distance" 196 | cp ../$benchmark_name"-INPUT-"$g"-ALL-dist2.csv" . 197 | echo "prefetch-distance: "$dist 198 | $AutoFDO10//create_llvm_prof --binary=$benchmark_name --profile=$benchmark_name"-INPUT-"$g"-ALL-dist2.csv" --profiler="prefetch" --format=text --out=$benchmark_name"-INPUT-"$g"-prefetch.afdo" 199 | $LLVM10_buildMyPasses/bin/opt -load /soe/sjamilan/LLVM10/llvm-project-10.0.0/build_mypasses/lib/SWPrefetchingLLVMPass.so -S -SWPrefetchingLLVMPass -input-file $benchmark_name"-INPUT-"$g"-prefetch.afdo" -dist $dist <$benchmark_name".ll"> $benchmark_name"-pref-INPUT-"$g".ll" 200 | $LLVM10_buildMyPasses/bin/clang --std=c++0x -O3 -fdebug-info-for-profiling -Wall -Werror $benchmark_name"-pref-INPUT-"$g".ll" -c 201 | $LLVM10_buildMyPasses/bin/clang -g --std=c++0x -O3 -fdebug-info-for-profiling -Wall -Werror $benchmark_name"-pref-INPUT-"$g".o" -o $benchmark_name"-pref-INPUT-"$g -lpthread -lrt 202 | python3 $python_codes_path/bench_name.py ../../../../"CGO17-benchmarks-perf-stats-output.txt" $benchmark_name 203 | 204 | if [[ "$benchmark_name" == "bc" ]] 205 | then 206 | perf stat -o $benchmark_name"-pref-INPUT-"$g"-perf-stats.out" -e L1-dcache-loads -e L1-dcache-load-misses -e L2-loads -e L2-load-misses -e LLC-loads -e LLC-load-misses -e cycles -e instructions -e SW_PREFETCH_ACCESS.T1_T2 -e SW_PREFETCH_ACCESS.T0 -e SW_PREFETCH_ACCESS.NTA -e LOAD_HIT_PRE.SW_PF -e cache-misses ./$benchmark_name"-pref-INPUT-"$g 1 $n $d 207 | else 208 | perf stat -o $benchmark_name"-pref-INPUT-"$g"-perf-stats.out" -e L1-dcache-loads -e L1-dcache-load-misses -e L2-loads -e L2-load-misses -e LLC-loads -e LLC-load-misses -e cycles -e instructions -e SW_PREFETCH_ACCESS.T1_T2 -e SW_PREFETCH_ACCESS.T0 -e SW_PREFETCH_ACCESS.NTA -e LOAD_HIT_PRE.SW_PF -e cache-misses ./$benchmark_name"-pref-INPUT-"$g 0 1 $n $d 209 | 210 | fi 211 | echo "" >> $benchmark_name"-pref-INPUT-"$g"-perf-stats.out" 212 | echo "Config: Prefetching">> $benchmark_name"-pref-INPUT-"$g"-perf-stats.out" 213 | echo " Input_graph = " $g>> $benchmark_name"-pref-INPUT-"$g"-perf-stats.out" 214 | echo " prefetch-distance = "$dist>> $benchmark_name"-pref-INPUT-"$g"-perf-stats.out" 215 | echo "---------------------------------" >> $benchmark_name"-pref-INPUT-"$g"-perf-stats.out" 216 | echo "" 217 | python3 $python_codes_path/perf_rfile_pref.py $benchmark_name"-pref-INPUT-"$g"-perf-stats.out" ../../../../"CRONO-benchmarks-perf-stats-output.txt" 218 | -------------------------------------------------------------------------------- /scripts/run-CRONO-benchmarks.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ###How to run: 4 | ### create a "results" folder. 5 | ### ./scripts/run-CRONO-benchmarks.sh 6 | ### You need to fist set the PATHs. 7 | ### You can get CRONO benchmark suite fro "https://github.com/masabahmad/CRONO" and set the path to its "app" folder 8 | ### You can get the input graphs from SNAP "http://snap.stanford.edu/data/web-Google.html" 9 | 10 | ####################PATH 11 | benchmark_path="" 12 | results_path="" 13 | input_graphs_path="" 14 | python_codes_path="" 15 | scripts_path="" 16 | LLVM10_buildMyPasses="" 17 | 18 | ####################INPUT 19 | benchmarks=(bfs dfs bc pagerank sssp) 20 | 21 | bfs_input_graphs=(p2p-Gnutella31.txt p2p-Gnutella30.txt loc-brightkite_edges.txt) 22 | bfs_N=(80000 100000 90000) 23 | bfs_DEG=(8 16 10) 24 | 25 | pagerank_input_graphs=(web-Google.txt web-BerkStan.txt web-Stanford.txt web-NotreDame.txt roadNet-PA.txt roadNet-CA.txt) 26 | pagerank_N=(100000) 27 | pagerank_DEG=(100) 28 | 29 | bc_N=(56384 40000 10000) 30 | bc_DEG=(8 10 1000) 31 | 32 | sssp_N=(100000 80000) 33 | sssp_DEG=(5 4) 34 | 35 | dfs_N=(800000 900000) 36 | dfs_DEG=(800 400) 37 | ####################RUN 38 | 39 | python3 $python_codes_path/first_line_perf_rfile.py "CRONO-benchmarks-perf-stats-output.txt" 40 | #python3 $python_codes_path/first_line_toplev.py "CRONO-benchmarks-baseline-toplev-l3-output.txt" 41 | 42 | 43 | cd $results_path 44 | for benchmark_name in "${benchmarks[@]}" 45 | do 46 | if [[ "$benchmark_name" == "bfs" ]] 47 | then 48 | echo "" 49 | echo "Compile bfs becnhmark ..." 50 | bench_dir=$benchmark_name 51 | mkdir $bench_dir 52 | cd $bench_dir 53 | $LLVM10_buildMyPasses/bin/clang++ -gmlt -std=c++11 -O3 -fdebug-info-for-profiling -Wall $benchmark_path/$benchmark_name/$benchmark_name"_atomic.cc" -S -emit-llvm -o $benchmark_name".ll" 54 | $LLVM10_buildMyPasses/bin/clang++ -gmlt -std=c++11 -O3 -fdebug-info-for-profiling -Wall $benchmark_path/$benchmark_name/$benchmark_name"_atomic.cc" -o $benchmark_name -lpthread -lrt 55 | cp $scripts_path/capture_PCs_real.sh . 56 | cp $scripts_path/capture_PCs_syn.sh . 57 | echo "done!" 58 | echo"" 59 | for g in "${bfs_input_graphs[@]}" 60 | do 61 | gn=${g::-4} 62 | res_DIR=$benchmark_name"-INPUT-"$gn 63 | mkdir $res_DIR 64 | cd $res_DIR 65 | ./../capture_PCs_real.sh $benchmark_name $g 66 | cd .. 67 | ####Top_DOWN 68 | #TOP_DIR="Toplev-"$benchmark_name"-INPUT-"$gn 69 | #mkdir $TOP_DIR 70 | #cd $TOP_DIR 71 | #echo "" 72 | #echo "" 73 | #echo "Toplevel analysis for bfs becnhmark ..." 74 | #python3 $python_codes_path/bench_graph_name.py ../../../"CRONO-benchmarks-baseline-toplev-l3-output.txt" $benchmark_name $gn 75 | #~/pmu-tools/toplev.py --core S0-C0 -l3 -v --no-desc taskset -c 0 -o $benchmark_name"-INPUT-"$gn"-toplev-l3.txt" -- ./../$benchmark_name 1 1 $input_graphs_path/$g 76 | #echo "-------" 77 | #python3 $python_codes_path/toplev_rfile.py $benchmark_name"-INPUT-"$gn"-toplev-l3.txt" ../../../"CRONO-benchmarks-baseline-toplev-l3-output.txt" 78 | #cd .. 79 | 80 | 81 | done 82 | len=${#bfs_N[@]} 83 | for (( i=0; i<$len; i++ )) 84 | do 85 | res_DIR=$benchmark_name"-INPUT-N"${bfs_N[$i]}"-DEG"${bfs_DEG[$i]} 86 | mkdir $res_DIR 87 | cd $res_DIR 88 | ./../capture_PCs_syn.sh $benchmark_name ${bfs_N[$i]} ${bfs_DEG[$i]} 89 | cd .. 90 | ###TOP-DOWN 91 | #TOP_DIR="Toplev-"$benchmark_name"-INPUT-N"${bfs_N[$i]}"-DEG"${bfs_DEG[$i]} 92 | #mkdir $TOP_DIR 93 | #cd $TOP_DIR 94 | #echo "" 95 | #echo "" 96 | #echo "Toplevel analysis for bfs becnhmark ..." 97 | #python3 $python_codes_path/bench_graph_name.py ../../../"CRONO-benchmarks-baseline-toplev-l3-output.txt" $benchmark_name "N"${bfs_N[$i]}"-D"${bfs_DEG[$i]} 98 | #~/pmu-tools/toplev.py --core S0-C0 -l3 -v --no-desc taskset -c 0 -o $benchmark_name"-INPUT-N"${bfs_N[$i]}"-DEG"${bfs_DEG[$i]}"-toplev-l3.txt" -- ./../$benchmark_name 0 1 ${bfs_N[$i]} ${bfs_DEG[$i]} 99 | #echo "-------" 100 | #python3 $python_codes_path/toplev_rfile.py $benchmark_name"-INPUT-N"${bfs_N[$i]}"-DEG"${bfs_DEG[$i]}"-toplev-l3.txt" ../../../"CRONO-benchmarks-baseline-toplev-l3-output.txt" 101 | #cd .. 102 | 103 | done 104 | cd .. 105 | fi 106 | 107 | 108 | if [[ "$benchmark_name" == "bc" ]] 109 | then 110 | echo "" 111 | echo "Compile bc becnhmark ..." 112 | bench_dir=$benchmark_name 113 | mkdir $bench_dir 114 | cd $bench_dir 115 | $LLVM10_buildMyPasses/bin/clang++ -gmlt -std=c++11 -O3 -fdebug-info-for-profiling -Wall $benchmark_path/$benchmark_name/$benchmark_name".cc" -S -emit-llvm -o $benchmark_name".ll" 116 | $LLVM10_buildMyPasses/bin/clang++ -gmlt -std=c++11 -O3 -fdebug-info-for-profiling -Wall $benchmark_path/$benchmark_name/$benchmark_name".cc" -o $benchmark_name -lpthread -lrt 117 | cp $scripts_path/capture_PCs_real.sh . 118 | cp $scripts_path/capture_PCs_syn.sh . 119 | 120 | echo "done!" 121 | echo"" 122 | for g in "${bc_input_graphs[@]}" 123 | do 124 | gn=${g::-4} 125 | res_DIR=$benchmark_name"-INPUT-"$gn 126 | mkdir $res_DIR 127 | cd $res_DIR 128 | ./../capture_PCs_real.sh $benchmark_name $g 129 | cd .. 130 | ####TOP-DOWN 131 | #TOP_DIR="Toplev-"$benchmark_name"-INPUT-"$gn 132 | #mkdir $TOP_DIR 133 | #cd $TOP_DIR 134 | #echo "" 135 | #echo "" 136 | #echo "Toplevel analysis bc becnhmark ..." 137 | #python3 $python_codes_path/bench_graph_name.py ../../../"CRONO-benchmarks-baseline-toplev-l3-output.txt" $benchmark_name $gn 138 | #~/pmu-tools/toplev.py --core S0-C0 -l3 -v --no-desc taskset -c 0 -o $benchmark_name"-INPUT-"$gn"-toplev-l3.txt" -- ./../$benchmark_name 1 1 $input_graphs_path/$g 139 | #echo "-------" 140 | #python3 $python_codes_path/toplev_rfile.py $benchmark_name"-INPUT-"$gn"-toplev-l3.txt" ../../../"CRONO-benchmarks-baseline-toplev-l3-output.txt" 141 | #cd .. 142 | done 143 | len=${#bc_N[@]} 144 | for (( i=0; i<$len; i++ )) 145 | do 146 | res_DIR=$benchmark_name"-INPUT-N"${bc_N[$i]}"-DEG"${bc_DEG[$i]} 147 | mkdir $res_DIR 148 | cd $res_DIR 149 | ./../capture_PCs_syn.sh $benchmark_name ${bc_N[$i]} ${bc_DEG[$i]} 150 | cd .. 151 | ###TOP-DOWN 152 | #TOP_DIR="Toplev-"$benchmark_name"-INPUT-N"${bc_N[$i]}"-DEG"${bc_DEG[$i]} 153 | #mkdir $TOP_DIR 154 | #cd $TOP_DIR 155 | #echo "" 156 | #echo "" 157 | #echo "Toplevel analysis for bc becnhmark ..." 158 | #python3 $python_codes_path/bench_graph_name.py ../../../"CRONO-benchmarks-baseline-toplev-l3-output.txt" $benchmark_name "N"${bc_N[$i]}"-D"${bc_DEG[$i]} 159 | #~/pmu-tools/toplev.py --core S0-C0 -l3 -v --no-desc taskset -c 0 -o $benchmark_name"-INPUT-N"${bc_N[$i]}"-DEG"${bc_DEG[$i]}"-toplev-l3.txt" -- ./../$benchmark_name 1 ${bc_N[$i]} ${bc_DEG[$i]} 160 | #echo "-------" 161 | #python3 $python_codes_path/toplev_rfile.py $benchmark_name"-INPUT-N"${bc_N[$i]}"-DEG"${bc_DEG[$i]}"-toplev-l3.txt" ../../../"CRONO-benchmarks-baseline-toplev-l3-output.txt" 162 | #cd .. 163 | done 164 | cd .. 165 | fi 166 | 167 | 168 | if [[ "$benchmark_name" == "pagerank" ]] 169 | then 170 | echo "" 171 | echo "Compile pagerank becnhmark ..." 172 | bench_dir=$benchmark_name 173 | mkdir $bench_dir 174 | cd $bench_dir 175 | $LLVM10_buildMyPasses/bin/clang++ -gmlt -std=c++11 -O3 -fdebug-info-for-profiling -Wall $benchmark_path/$benchmark_name/$benchmark_name"_lock.cc" -S -emit-llvm -o $benchmark_name".ll" 176 | $LLVM10_buildMyPasses/bin/clang++ -gmlt -std=c++11 -O3 -fdebug-info-for-profiling -Wall $benchmark_path/$benchmark_name/$benchmark_name"_lock.cc" -o $benchmark_name -lpthread -lrt 177 | cp $scripts_path/capture_PCs_real.sh . 178 | #cp $scripts_path/capture_PCs_real_prev.sh . 179 | cp $scripts_path/capture_PCs_syn.sh . 180 | #cp $scripts_path/capture_PCs_syn_prev.sh . 181 | echo "done!" 182 | echo"" 183 | for g in "${pagerank_input_graphs[@]}" 184 | do 185 | gn=${g::-4} 186 | res_DIR=$benchmark_name"-INPUT-"$gn 187 | mkdir $res_DIR 188 | cd $res_DIR 189 | ./../capture_PCs_real.sh $benchmark_name $g 190 | cd .. 191 | ######TOP-DOWN 192 | #TOP_DIR="Toplev-"$benchmark_name"-INPUT-"$gn 193 | #mkdir $TOP_DIR 194 | #cd $TOP_DIR 195 | #echo "" 196 | #echo "" 197 | #echo "Toplevel analysis pagerank becnhmark ..." 198 | #python3 $python_codes_path/bench_graph_name.py ../../../"CRONO-benchmarks-baseline-toplev-l3-output.txt" $benchmark_name $gn 199 | #~/pmu-tools/toplev.py --core S0-C0 -l3 -v --no-desc taskset -c 0 -o $benchmark_name"-INPUT-"$gn"-toplev-l3.txt" -- ./../$benchmark_name 1 1 $input_graphs_path/$g 200 | #echo "-------" 201 | #python3 $python_codes_path/toplev_rfile.py $benchmark_name"-INPUT-"$gn"-toplev-l3.txt" ../../../"CRONO-benchmarks-baseline-toplev-l3-output.txt" 202 | #cd .. 203 | 204 | done 205 | len=${#pagerank_N[@]} 206 | for (( i=0; i<$len; i++ )) 207 | do 208 | res_DIR=$benchmark_name"-INPUT-N"${pagerank_N[$i]}"-DEG"${pagerank_DEG[$i]} 209 | mkdir $res_DIR 210 | cd $res_DIR 211 | ./../capture_PCs_syn.sh $benchmark_name ${pagerank_N[$i]} ${pagerank_DEG[$i]} 212 | cd .. 213 | ######TOP-DOWN 214 | #TOP_DIR="Toplev-"$benchmark_name"-INPUT-N"${pagerank_N[$i]}"-DEG"${pagerank_DEG[$i]} 215 | #mkdir $TOP_DIR 216 | #cd $TOP_DIR 217 | #echo "" 218 | #echo "" 219 | #echo "Toplevel analysis for pagerank becnhmark ..." 220 | #python3 $python_codes_path/bench_graph_name.py ../../../"CRONO-benchmarks-baseline-toplev-l3-output.txt" $benchmark_name "N"${pagerank_N[$i]}"-D"${pagerank_DEG[$i]} 221 | #~/pmu-tools/toplev.py --core S0-C0 -l3 -v --no-desc taskset -c 0 -o $benchmark_name"-INPUT-N"${bc_N[$i]}"-DEG"${bc_DEG[$i]}"-toplev-l3.txt" -- ./../$benchmark_name 0 1 ${pagerank_N[$i]} ${pagerank_DEG[$i]} 222 | #echo "-------" 223 | #python3 $python_codes_path/toplev_rfile.py $benchmark_name"-INPUT-N"${pagerank_N[$i]}"-DEG"${pagerank_DEG[$i]}"-toplev-l3.txt" ../../../"CRONO-benchmarks-baseline-toplev-l3-output.txt" 224 | #cd .. 225 | 226 | 227 | done 228 | cd .. 229 | fi 230 | 231 | if [[ "$benchmark_name" == "sssp" ]] 232 | then 233 | echo "sssp" 234 | echo "" 235 | echo "Compile sssp becnhmark ..." 236 | bench_dir=$benchmark_name 237 | mkdir $bench_dir 238 | cd $bench_dir 239 | $LLVM10_buildMyPasses/bin/clang -g --std=c++0x -O3 -fdebug-info-for-profiling -Wall -Werror $benchmark_path/$benchmark_name/$benchmark_name"_outer_atomic.cc" -S -emit-llvm -o $benchmark_name".ll" 240 | $LLVM10_buildMyPasses/bin/clang -g --std=c++0x -O3 -fdebug-info-for-profiling -Wall -Werror $benchmark_name".ll" -c 241 | $LLVM10_buildMyPasses/bin/clang -g --std=c++0x -O3 -fdebug-info-for-profiling -Wall -Werror $benchmark_name".o" -o $benchmark_name -lpthread -lrt 242 | cp $scripts_path/capture_PCs_real.sh . 243 | cp $scripts_path/capture_PCs_syn.sh . 244 | #cp $scripts_path/capture_PCs_real_prev.sh . 245 | #cp $scripts_path/capture_PCs_syn_prev.sh . 246 | echo "done!" 247 | echo"" 248 | for g in "${sssp_input_graphs[@]}" 249 | do 250 | gn=${g::-4} 251 | res_DIR=$benchmark_name"-INPUT-"$gn 252 | mkdir $res_DIR 253 | cd $res_DIR 254 | ./../capture_PCs_real.sh $benchmark_name $g 255 | cd .. 256 | #####TOP-down 257 | #TOP_DIR="Toplev-"$benchmark_name"-INPUT-"$gn 258 | #mkdir $TOP_DIR 259 | #cd $TOP_DIR 260 | #echo "" 261 | #echo "" 262 | #echo "Toplevel analysis sssp becnhmark ..." 263 | #python3 $python_codes_path/bench_graph_name.py ../../../"CRONO-benchmarks-baseline-toplev-l3-output.txt" $benchmark_name $gn 264 | #~/pmu-tools/toplev.py --core S0-C0 -l3 -v --no-desc taskset -c 0 -o $benchmark_name"-INPUT-"$gn"-toplev-l3.txt" -- ./../$benchmark_name 1 1 $input_graphs_path/$g 265 | #echo "-------" 266 | #python3 $python_codes_path/toplev_rfile.py $benchmark_name"-INPUT-"$gn"-toplev-l3.txt" ../../../"CRONO-benchmarks-baseline-toplev-l3-output.txt" 267 | #cd .. 268 | 269 | done 270 | len=${#sssp_N[@]} 271 | for (( i=0; i<$len; i++ )) 272 | do 273 | res_DIR=$benchmark_name"-INPUT-N"${sssp_N[$i]}"-DEG"${sssp_DEG[$i]} 274 | mkdir $res_DIR 275 | cd $res_DIR 276 | ./../capture_PCs_syn.sh $benchmark_name ${sssp_N[$i]} ${sssp_DEG[$i]} 277 | cd .. 278 | ####TOP-DOWN 279 | #TOP_DIR="Toplev-"$benchmark_name"-INPUT-N"${sssp_N[$i]}"-DEG"${sssp_DEG[$i]} 280 | #mkdir $TOP_DIR 281 | #cd $TOP_DIR 282 | #echo "" 283 | #echo "" 284 | #echo "Toplevel analysis for bfs becnhmark ..." 285 | #python3 $python_codes_path/bench_graph_name.py ../../../"CRONO-benchmarks-baseline-toplev-l3-output.txt" $benchmark_name "N"${sssp_N[$i]}"-D"${sssp_DEG[$i]} 286 | #~/pmu-tools/toplev.py --core S0-C0 -l3 -v --no-desc taskset -c 0 -o $benchmark_name"-INPUT-N"${sssp_N[$i]}"-DEG"${sssp_DEG[$i]}"-toplev-l3.txt" -- ./../$benchmark_name 0 1 ${sssp_N[$i]} ${sssp_DEG[$i]} 287 | #echo "-------" 288 | #python3 $python_codes_path/toplev_rfile.py $benchmark_name"-INPUT-N"${sssp_N[$i]}"-DEG"${sssp_DEG[$i]}"-toplev-l3.txt" ../../../"CRONO-benchmarks-baseline-toplev-l3-output.txt" 289 | #cd .. 290 | 291 | 292 | done 293 | cd .. 294 | 295 | fi 296 | 297 | if [[ "$benchmark_name" == "dfs" ]] 298 | then 299 | echo "" 300 | echo "Compile dfs becnhmark ..." 301 | bench_dir=$benchmark_name 302 | mkdir $bench_dir 303 | cd $bench_dir 304 | $LLVM10_buildMyPasses/bin/clang++ -gmlt -std=c++11 -O3 -fdebug-info-for-profiling -Wall $benchmark_path/$benchmark_name/$benchmark_name".cc" -S -emit-llvm -o $benchmark_name".ll" 305 | $LLVM10_buildMyPasses/bin/clang++ -gmlt -std=c++11 -O3 -fdebug-info-for-profiling -Wall $benchmark_path/$benchmark_name/$benchmark_name".cc" -o $benchmark_name -lpthread -lrt 306 | cp $scripts_path/capture_PCs_real.sh . 307 | cp $scripts_path/capture_PCs_syn.sh . 308 | echo "done!" 309 | echo"" 310 | for g in "${dfs_input_graphs[@]}" 311 | do 312 | gn=${g::-4} 313 | res_DIR=$benchmark_name"-INPUT-"$gn 314 | mkdir $res_DIR 315 | cd $res_DIR 316 | ./../capture_PCs_real.sh $benchmark_name $g 317 | cd .. 318 | ######TOP-DOWN 319 | #TOP_DIR="Toplev-"$benchmark_name"-INPUT-"$gn 320 | #mkdir $TOP_DIR 321 | #cd $TOP_DIR 322 | #echo "" 323 | #echo "" 324 | #echo "Toplevel analysis dfs becnhmark ..." 325 | #python3 $python_codes_path/bench_graph_name.py ../../../"CRONO-benchmarks-baseline-toplev-l3-output.txt" $benchmark_name $gn 326 | #~/pmu-tools/toplev.py --core S0-C0 -l3 -v --no-desc taskset -c 0 -o $benchmark_name"-INPUT-"$gn"-toplev-l3.txt" -- ./../$benchmark_name 1 1 $input_graphs_path/$g 327 | #echo "-------" 328 | #python3 $python_codes_path/toplev_rfile.py $benchmark_name"-INPUT-"$gn"-toplev-l3.txt" ../../../"CRONO-benchmarks-baseline-toplev-l3-output.txt" 329 | #cd .. 330 | 331 | done 332 | len=${#dfs_N[@]} 333 | for (( i=0; i<$len; i++ )) 334 | do 335 | res_DIR=$benchmark_name"-INPUT-N"${dfs_N[$i]}"-DEG"${dfs_DEG[$i]} 336 | mkdir $res_DIR 337 | cd $res_DIR 338 | ./../capture_PCs_syn.sh $benchmark_name ${dfs_N[$i]} ${dfs_DEG[$i]} 339 | cd .. 340 | ######TOP-DOWN 341 | #TOP_DIR="Toplev-"$benchmark_name"-INPUT-N"${dfs_N[$i]}"-DEG"${dfs_DEG[$i]} 342 | #mkdir $TOP_DIR 343 | #cd $TOP_DIR 344 | #echo "" 345 | #echo "" 346 | #echo "Toplevel analysis for bfs becnhmark ..." 347 | #python3 $python_codes_path/bench_graph_name.py ../../../"CRONO-benchmarks-baseline-toplev-l3-output.txt" $benchmark_name "N"${dfs_N[$i]}"-D"${dfs_DEG[$i]} 348 | #~/pmu-tools/toplev.py --core S0-C0 -l3 -v --no-desc taskset -c 0 -o $benchmark_name"-INPUT-N"${dfs_N[$i]}"-DEG"${dfs_DEG[$i]}"-toplev-l3.txt" -- ./../$benchmark_name 0 1 ${dfs_N[$i]} ${dfs_DEG[$i]} 349 | #echo "-------" 350 | #python3 $python_codes_path/toplev_rfile.py $benchmark_name"-INPUT-N"${dfs_N[$i]}"-DEG"${dfs_DEG[$i]}"-toplev-l3.txt" ../../../"CRONO-benchmarks-baseline-toplev-l3-output.txt" 351 | #cd .. 352 | done 353 | cd .. 354 | fi 355 | 356 | done 357 | 358 | 359 | -------------------------------------------------------------------------------- /SWPrefetchingLLVMPass/SWPrefetchingLLVMPass.cpp: -------------------------------------------------------------------------------- 1 | #include "llvm/CodeGen/MachineModuleInfo.h" 2 | #include "llvm/IR/DebugInfoMetadata.h" 3 | #include "llvm/ProfileData/SampleProf.h" 4 | #include "llvm/ProfileData/SampleProfReader.h" 5 | #include "llvm/Transforms/IPO/SampleProfile.h" 6 | #include "llvm/Support/CommandLine.h" 7 | #include "llvm/IR/IRBuilder.h" 8 | #include "llvm/IR/InstVisitor.h" 9 | #include "llvm/IR/LegacyPassManager.h" 10 | #include "llvm/IR/ValueMap.h" 11 | #include "llvm/Transforms/IPO/PassManagerBuilder.h" 12 | #include "llvm/Analysis/ScalarEvolution.h" 13 | #include "llvm/Analysis/LoopInfo.h" 14 | #include "llvm/Transforms/Utils/ValueMapper.h" 15 | 16 | using namespace llvm; 17 | using namespace sampleprof; 18 | 19 | bool AutoFDOMapping; 20 | 21 | static cl::opt PrefetchFile("input-file", cl::desc("Specify input filename for mypass"), cl::value_desc("filename")); 22 | 23 | cl::list LBR_dist("dist", cl::desc("Specify offset value from LBR"), cl::OneOrMore); 24 | 25 | SmallVector IndirectLoads; 26 | SmallVector IndirectInstrs; 27 | SmallVector IndirectPhis; 28 | Instruction* IndirectLoad; 29 | int64_t IndirectPrefetchDist; 30 | 31 | namespace { 32 | struct SWPrefetchingLLVMPass : public FunctionPass { 33 | bool doInitialization(Module &M) override; 34 | bool runOnFunction(Function &F) override; 35 | void getAnalysisUsage(AnalysisUsage &AU) const override { 36 | AU.addRequired(); 37 | AU.addPreserved(); 38 | AU.addRequired(); 39 | } 40 | 41 | bool SearchAlgorithm(Instruction* I ,LoopInfo &LI, Instruction* &Phi, SmallVector &Loads, SmallVector &Instrs, SmallVector &Phis); 42 | bool InjectPrefeches(Instruction* curLoad, LoopInfo &LI, SmallVector &CapturedPhis, SmallVector &CapturedLoads, SmallVector &CapturedInstrs, int64_t prefetchDist, bool ItIsIndirectLoad); 43 | bool InjectPrefechesOnePhiPartOne(Instruction* curLoad, LoopInfo &LI, SmallVector &CapturedPhis, SmallVector &CapturedLoads, SmallVector &CapturedInstrs, int64_t prefetchDist, bool ItIsIndirectLoad); 44 | bool InjectPrefechesOnePhiPartTwo(Instruction* I, LoopInfo &LI,Instruction* Phi, SmallVector &DepInstrs, int64_t prefetchDist); 45 | CmpInst* getCompareInstrADD(Loop* L, Instruction* nextInd); 46 | CmpInst* getCompareInstrGetElememntPtr(Loop* L, Instruction* nextInd); 47 | PHINode* getCanonicalishInductionVariable(Loop* L); 48 | bool CheckLoopCond(Loop* L); 49 | Instruction* GetIncomingValue(Loop* L, llvm::Instruction* curPN); 50 | ConstantInt* getValueAddedToIndVar(Loop* L, Instruction* nextInd); 51 | ConstantInt* getValueAddedToIndVarInLoopIterxxx(Loop* L); 52 | Value* getLoopEndCondxxx(Loop* L); 53 | bool IsDep(Instruction* I ,LoopInfo &LI, Instruction* &Phi,SmallVector &DependentLoads,SmallVector &DependentInstrs, SmallVector &DPhis); 54 | public: 55 | static char ID; 56 | SWPrefetchingLLVMPass() : FunctionPass(ID) {} 57 | Module *M = 0; 58 | private: 59 | std::unique_ptr Reader; 60 | 61 | };//struct 62 | 63 | using Hints = SampleRecord::CallTargetMap; 64 | ErrorOr getHints(const llvm::Instruction &Inst, const llvm::sampleprof::FunctionSamples *TopSamples) { 65 | if (const auto &Loc = Inst.getDebugLoc()){ 66 | if (const auto *Samples = TopSamples->findFunctionSamples(Loc)){ 67 | return Samples->findCallTargetMapAt(FunctionSamples::getOffset(Loc), 68 | Loc->getBaseDiscriminator()); 69 | } 70 | } 71 | return std::error_code(); 72 | } 73 | }//namespace 74 | 75 | char SWPrefetchingLLVMPass::ID = 0; 76 | 77 | 78 | 79 | 80 | 81 | bool SWPrefetchingLLVMPass::doInitialization(Module &M) { 82 | if (PrefetchFile.empty()){ 83 | errs()<<"PrefetchFile is Empty!\n"; 84 | return false; 85 | } 86 | 87 | LLVMContext &Ctx = M.getContext(); 88 | ErrorOr> ReaderOrErr = SampleProfileReader::create(PrefetchFile, Ctx); 89 | if (std::error_code EC = ReaderOrErr.getError()) { 90 | std::string Msg = "Could not open profile: " + EC.message(); 91 | Ctx.diagnose(DiagnosticInfoSampleProfile(PrefetchFile, Msg, DiagnosticSeverity::DS_Warning)); 92 | return false; 93 | } 94 | 95 | Reader = std::move(ReaderOrErr.get()); 96 | Reader->read(); 97 | 98 | for(auto &F : M) { 99 | const llvm::sampleprof::FunctionSamples* SamplesReaded = Reader->getSamplesFor(F); 100 | if(SamplesReaded){ 101 | AutoFDOMapping=true; 102 | } 103 | } 104 | 105 | return true; 106 | } 107 | 108 | 109 | 110 | bool SWPrefetchingLLVMPass::SearchAlgorithm(Instruction* I ,LoopInfo &LI, Instruction* &Phi, SmallVector &Loads ,SmallVector &Instrs, SmallVector &Phis){ 111 | bool PhiFound = false; 112 | Use* OperandList = I->getOperandList(); 113 | Use* NumOfOperands = OperandList + I->getNumOperands(); 114 | Loop* curInstrLoop= LI.getLoopFor(I->getParent()); 115 | SmallVector NeedToSearch; 116 | 117 | for(Use* op = OperandList; op < NumOfOperands; op++) { 118 | if(PHINode* CurOpIsPhiNode = dyn_cast(op->get())){ 119 | Phi = CurOpIsPhiNode; 120 | if(!(std::find(Phis.begin(), Phis.end(), CurOpIsPhiNode) != Phis.end())){ 121 | Phis.push_back(CurOpIsPhiNode); 122 | } 123 | PhiFound=true; 124 | } 125 | else if(LoadInst * curOperandIsLoad = dyn_cast(op->get())){ 126 | if(!(std::find(Loads.begin(), Loads.end(), curOperandIsLoad) != Loads.end())){ 127 | Loads.push_back(curOperandIsLoad); 128 | } 129 | NeedToSearch.push_back(curOperandIsLoad); 130 | 131 | } 132 | else if(Instruction* OtherTypeInstr = dyn_cast(op->get())){ 133 | Loop* OtherTypeInstrLoop= LI.getLoopFor(OtherTypeInstr->getParent()); 134 | if(OtherTypeInstrLoop == curInstrLoop){ 135 | NeedToSearch.push_back(OtherTypeInstr); 136 | } 137 | } 138 | } 139 | for(size_t index=0 ; index< NeedToSearch.size(); index++){ 140 | Instrs.push_back(NeedToSearch[index]); 141 | bool temp = SearchAlgorithm(NeedToSearch[index],LI,Phi,Loads,Instrs,Phis); 142 | PhiFound=true; 143 | } 144 | return PhiFound; 145 | } 146 | 147 | 148 | bool SWPrefetchingLLVMPass::IsDep(Instruction* I ,LoopInfo &LI, Instruction* &Phi,SmallVector &DependentLoadsToCurLoad,SmallVector &DependentInstrsToCurLoad, SmallVector &Phis){ 149 | bool PhiFound = false; 150 | Use* OperandList = I->getOperandList(); 151 | Use* NumOfOperands = OperandList + I->getNumOperands(); 152 | Loop* curInstrLoop= LI.getLoopFor(I->getParent()); 153 | 154 | for(Use* op = OperandList; op < NumOfOperands; op++) { 155 | if(PHINode* CurOpIsPhiNode = dyn_cast(op->get())){ 156 | Loop* PhiNodeLoop = LI.getLoopFor(CurOpIsPhiNode->getParent()); 157 | if(PhiNodeLoop == curInstrLoop){ 158 | Phi = CurOpIsPhiNode; 159 | DependentInstrsToCurLoad.push_back(CurOpIsPhiNode); 160 | Phis.push_back(CurOpIsPhiNode); 161 | PhiFound=true; 162 | } 163 | } 164 | else if(LoadInst * curOperandIsLoad = dyn_cast(op->get())){ 165 | Loop* LoadInstrLoop = LI.getLoopFor(curOperandIsLoad->getParent()); 166 | if(LoadInstrLoop == curInstrLoop){ 167 | DependentLoadsToCurLoad.push_back(curOperandIsLoad); 168 | DependentInstrsToCurLoad.push_back(curOperandIsLoad); 169 | if(IsDep(curOperandIsLoad,LI,Phi,DependentLoadsToCurLoad,DependentInstrsToCurLoad,Phis)){ 170 | PhiFound=true; 171 | } 172 | } 173 | } 174 | else if(Instruction* OtherTypeInstr = dyn_cast(op->get())){ 175 | Loop* OtherTypeInstrLoop= LI.getLoopFor(OtherTypeInstr->getParent()); 176 | if(OtherTypeInstrLoop == curInstrLoop){ 177 | DependentInstrsToCurLoad.push_back(OtherTypeInstr); 178 | if(IsDep(OtherTypeInstr,LI,Phi,DependentLoadsToCurLoad,DependentInstrsToCurLoad,Phis)){ 179 | PhiFound=true; 180 | } 181 | } 182 | } 183 | } 184 | return PhiFound; 185 | } 186 | 187 | 188 | ConstantInt* SWPrefetchingLLVMPass::getValueAddedToIndVarInLoopIterxxx(Loop* L){ 189 | SetVector BBInsts; 190 | auto B = L->getExitingBlock(); 191 | int count=0; 192 | if(!B) return nullptr; 193 | for(Instruction &J : *B) { 194 | Instruction* I = &J; 195 | BBInsts.insert(I); 196 | count++; 197 | } 198 | bool Changed = false; 199 | for(int i= BBInsts.size()-1;i>=0;i--){ 200 | CmpInst *CI = dyn_cast(BBInsts[i]); 201 | if(CI){ 202 | Instruction *AddI = dyn_cast(BBInsts[i-1]); 203 | if(AddI->getOpcode()==Instruction::Add){ 204 | if(L->makeLoopInvariant(AddI->getOperand(1),Changed)) { 205 | if(ConstantInt *constInt = dyn_cast(AddI->getOperand(1))){ 206 | return constInt; 207 | } 208 | 209 | } 210 | if(L->makeLoopInvariant(AddI->getOperand(0),Changed)) { 211 | if(ConstantInt *constInt = dyn_cast(AddI->getOperand(1))){ 212 | return constInt; 213 | } 214 | } 215 | } 216 | } 217 | } 218 | return nullptr; 219 | } 220 | 221 | 222 | PHINode* SWPrefetchingLLVMPass::getCanonicalishInductionVariable(Loop* L) { 223 | BasicBlock *H = L->getHeader(); 224 | BasicBlock *Incoming = nullptr, *Backedge = nullptr; 225 | pred_iterator PI = pred_begin(H); 226 | assert(PI != pred_end(H) && "Loop must have at least one backedge!"); 227 | Backedge = *PI++; 228 | if (PI == pred_end(H)) { 229 | return nullptr; // dead loop 230 | } 231 | Incoming = *PI++; 232 | if (PI != pred_end(H)){ 233 | return nullptr; // multiple backedges? 234 | } 235 | if (L->contains(Incoming)) { 236 | if (L->contains(Backedge)) 237 | return nullptr; 238 | std::swap(Incoming, Backedge); 239 | }else if (!L->contains(Backedge)){ 240 | return nullptr; 241 | } 242 | for (BasicBlock::iterator I = H->begin(); isa(I); ++I) { 243 | PHINode *PN = cast(I); 244 | if (Instruction *Inc = dyn_cast(PN->getIncomingValueForBlock(Backedge))){ 245 | if (Inc->getOpcode() == Instruction::Add && Inc->getOperand(0) == PN){ 246 | if (dyn_cast(Inc->getOperand(1))){ 247 | return PN; 248 | } 249 | } 250 | } 251 | } 252 | return nullptr; 253 | } 254 | 255 | 256 | Value* SWPrefetchingLLVMPass::getLoopEndCondxxx(Loop* L){ 257 | SetVector BBInsts; 258 | auto B = L->getExitingBlock(); 259 | int count=0; 260 | if(!B) return nullptr; 261 | for(Instruction &J : *B) { 262 | Instruction* I = &J; 263 | BBInsts.insert(I); 264 | count++; 265 | } 266 | bool Changed = false; 267 | for(int i= BBInsts.size()-1;i>=0;i--){ 268 | CmpInst *CI = dyn_cast(BBInsts[i]); 269 | if(CI){ 270 | if(L->makeLoopInvariant(CI->getOperand(1),Changed)) { 271 | return CI->getOperand(1); 272 | } 273 | if(L->makeLoopInvariant(CI->getOperand(0),Changed)) { 274 | return CI->getOperand(0); 275 | } 276 | } 277 | } 278 | return nullptr; 279 | } 280 | 281 | 282 | CmpInst* SWPrefetchingLLVMPass::getCompareInstrADD(Loop* L, Instruction* nextInd){ 283 | SetVector BBInsts; 284 | auto B = L->getExitingBlock(); 285 | int count=0; 286 | 287 | if(!B) return nullptr; 288 | for(Instruction &J : *B){ 289 | Instruction* I = &J; 290 | BBInsts.insert(I); 291 | count++; 292 | } 293 | for(int i= BBInsts.size()-1;i>=0;i--){ 294 | CmpInst *CI = dyn_cast(BBInsts[i]); 295 | if(CI&& (CI->getOperand(0)==nextInd || CI->getOperand(1)==nextInd )&& nextInd->getOpcode()==Instruction::Add){ 296 | return CI; 297 | } 298 | } 299 | 300 | return nullptr; 301 | } 302 | 303 | 304 | CmpInst* SWPrefetchingLLVMPass::getCompareInstrGetElememntPtr(Loop* L, Instruction* nextInd){ 305 | SetVector BBInsts; 306 | auto B = L->getExitingBlock(); 307 | int count=0; 308 | 309 | if(!B) return nullptr; 310 | for(Instruction &J : *B){ 311 | Instruction* I = &J; 312 | BBInsts.insert(I); 313 | count++; 314 | } 315 | for(int i= BBInsts.size()-1;i>=0;i--){ 316 | CmpInst *CI = dyn_cast(BBInsts[i]); 317 | if(CI&& (CI->getOperand(0)==nextInd || CI->getOperand(1)==nextInd )&& nextInd->getOpcode()==Instruction::GetElementPtr){ 318 | return CI; 319 | } 320 | } 321 | 322 | return nullptr; 323 | } 324 | 325 | 326 | bool SWPrefetchingLLVMPass::CheckLoopCond(Loop* L) { 327 | bool OKtoPrefetch =false; 328 | BasicBlock *H = L->getHeader(); 329 | BasicBlock *Incoming = nullptr, *Backedge = nullptr; 330 | pred_iterator PI = pred_begin(H); 331 | assert(PI != pred_end(H) && "Loop must have at least one backedge!"); 332 | Backedge = *PI++; 333 | if (PI == pred_end(H)) { 334 | return OKtoPrefetch; // dead loop 335 | } 336 | Incoming = *PI++; 337 | if (PI != pred_end(H)){ 338 | return OKtoPrefetch; // multiple backedges? 339 | } 340 | 341 | if (L->contains(Incoming)) { 342 | if (L->contains(Backedge)){ 343 | return OKtoPrefetch; 344 | } 345 | std::swap(Incoming, Backedge); 346 | }else if (!L->contains(Backedge)){ 347 | return OKtoPrefetch; 348 | } 349 | OKtoPrefetch =true; 350 | return OKtoPrefetch; 351 | } 352 | 353 | 354 | Instruction* SWPrefetchingLLVMPass::GetIncomingValue(Loop* L, llvm::Instruction* curPN) { 355 | BasicBlock *H = L->getHeader(); 356 | BasicBlock *Backedge = nullptr; 357 | pred_iterator PI = pred_begin(H); 358 | Backedge = *PI++; 359 | 360 | for (BasicBlock::iterator I = H->begin(); isa(I); ++I) { 361 | PHINode *PN = cast(I); 362 | if(PN==curPN){ 363 | if (Instruction *IncomingInstr = dyn_cast(PN->getIncomingValueForBlock(Backedge))){ 364 | return IncomingInstr; 365 | } 366 | } 367 | } 368 | return nullptr; 369 | } 370 | 371 | 372 | ConstantInt* SWPrefetchingLLVMPass::getValueAddedToIndVar(Loop* L, Instruction* nextInd){ 373 | bool Changed = false; 374 | if(L->makeLoopInvariant(nextInd->getOperand(1),Changed)) { 375 | if(ConstantInt *constInt = dyn_cast(nextInd->getOperand(1))){ 376 | return constInt; 377 | } 378 | } 379 | if(L->makeLoopInvariant(nextInd->getOperand(0),Changed)) { 380 | if(ConstantInt *constInt = dyn_cast(nextInd->getOperand(1))){ 381 | return constInt; 382 | } 383 | } 384 | return nullptr; 385 | } 386 | 387 | 388 | bool SWPrefetchingLLVMPass::InjectPrefeches(Instruction* curLoad, LoopInfo &LI, SmallVector &CapturedPhis, SmallVector &CapturedLoads, SmallVector &CapturedInstrs, int64_t prefetchDist, bool ItIsIndirectLoad){ 389 | 390 | Loop* IndirectLoadLoop; 391 | if(ItIsIndirectLoad){ 392 | IndirectLoad = curLoad; 393 | IndirectLoads = CapturedLoads; 394 | IndirectInstrs = CapturedInstrs; 395 | IndirectPhis = CapturedPhis; 396 | IndirectPrefetchDist = prefetchDist; 397 | IndirectLoadLoop = LI.getLoopFor(IndirectLoad->getParent()); 398 | } 399 | 400 | bool done=false; 401 | bool PrefetchGetElem=false; 402 | Instruction* phi =nullptr; 403 | Loop* curLoadLoop = LI.getLoopFor(curLoad->getParent()); 404 | bool donePrefetchingForPhi =false; 405 | 406 | if(CapturedPhis.size()==1){ 407 | phi=CapturedPhis[0]; 408 | ValueMap Transforms; 409 | IRBuilder<> Builder(curLoad); 410 | Loop* PhiLoop = LI.getLoopFor(phi->getParent()); 411 | 412 | for(auto &curDep : CapturedInstrs){ 413 | if(Transforms.count(curDep)){ 414 | continue; 415 | } 416 | if(curDep == phi){ 417 | if(PhiLoop == curLoadLoop){ 418 | //1) figure out (ADD, MUL, GETELEMPTR) 419 | //2) capture all exit conditions of BB 420 | //3) figure out how to prefetch 421 | if(CheckLoopCond(PhiLoop)){ 422 | if(GetIncomingValue(PhiLoop, phi)){ 423 | Instruction* IncInstr =GetIncomingValue(PhiLoop, phi); 424 | if(IncInstr->getOpcode() == Instruction::Add && IncInstr->getOperand(0) == phi){ 425 | errs()<<"ADD\n"; 426 | Instruction* NewInstr; 427 | Instruction* mod; 428 | if(dyn_cast(IncInstr->getOperand(1))){ 429 | if(getCompareInstrADD(PhiLoop, IncInstr)){ 430 | Value* EndCond =nullptr; 431 | bool Changed = false; 432 | CmpInst* compareInstr = getCompareInstrADD(PhiLoop, IncInstr); 433 | if(PhiLoop->makeLoopInvariant(compareInstr->getOperand(0),Changed)) { 434 | EndCond = compareInstr->getOperand(0); 435 | }//makeLoopInvariant(0) 436 | if(PhiLoop->makeLoopInvariant(compareInstr->getOperand(1),Changed)) { 437 | EndCond = compareInstr->getOperand(1); 438 | }//makeLoopInvariant(1) 439 | ConstantInt* UpdateInd = getValueAddedToIndVar(PhiLoop, IncInstr); 440 | if(UpdateInd){ 441 | if(UpdateInd->isNegative()){ 442 | int64_t curPrefetchDist = 0-prefetchDist; 443 | NewInstr = dyn_cast(Builder.CreateAdd(curDep,curDep->getType()->isIntegerTy(64) ? ConstantInt::get(Type::getInt64Ty((curDep->getParent())->getContext()),curPrefetchDist) : ConstantInt::get(Type::getInt32Ty((curDep->getParent())->getContext()),curPrefetchDist))); 444 | 445 | if(EndCond!=nullptr){ 446 | if(EndCond->getType() != NewInstr->getType()) { 447 | Instruction* cast = CastInst::CreateIntegerCast(EndCond,NewInstr->getType(),true); 448 | Builder.Insert(cast); 449 | Value* cmp = Builder.CreateICmp(CmpInst::ICMP_SGT,cast,NewInstr); 450 | mod = dyn_cast(Builder.CreateSelect(cmp,cast,NewInstr)); 451 | }//if(EndCond->getType() != NewInstr->getType()) 452 | else{ 453 | Value* cmp = Builder.CreateICmp(CmpInst::ICMP_SGT,EndCond,NewInstr); 454 | mod = dyn_cast(Builder.CreateSelect(cmp,EndCond,NewInstr)); 455 | }//else(EndCond->getType() == NewInstr->getType()) 456 | }//if(EndCond!=nullptr) 457 | Transforms.insert(std::pair(curDep,NewInstr)); 458 | donePrefetchingForPhi =true; 459 | }//isNegative() 460 | else{ 461 | NewInstr = dyn_cast(Builder.CreateAdd(curDep,curDep->getType()->isIntegerTy(64) ? ConstantInt::get(Type::getInt64Ty((curDep->getParent())->getContext()),prefetchDist) : ConstantInt::get(Type::getInt32Ty((curDep->getParent())->getContext()),prefetchDist))); 462 | if(EndCond!=nullptr){ 463 | if(EndCond->getType() != NewInstr->getType()){ 464 | Instruction* cast = CastInst::CreateIntegerCast(EndCond,NewInstr->getType(),true); 465 | Builder.Insert(cast); 466 | Value* cmp = Builder.CreateICmp(CmpInst::ICMP_SLT,cast,NewInstr); 467 | mod = dyn_cast(Builder.CreateSelect(cmp,cast,NewInstr)); 468 | }//if(EndCond->getType() != NewInstr->getType()) 469 | else{ 470 | Value* cmp = Builder.CreateICmp(CmpInst::ICMP_SLT,EndCond,NewInstr); 471 | mod = dyn_cast(Builder.CreateSelect(cmp,EndCond,NewInstr)); 472 | }//else(EndCond->getType() == NewInstr->getType()) 473 | Transforms.insert(std::pair(curDep,mod)); 474 | }///if(EndCond!=nullptr) 475 | else{ 476 | Transforms.insert(std::pair(curDep,NewInstr)); 477 | } 478 | donePrefetchingForPhi =true; 479 | }//else(Positive) 480 | } 481 | }//getCompareInstrADD 482 | }//getOperand(1) 483 | }//ADD 484 | if(IncInstr->getOpcode() == Instruction::Mul && IncInstr->getOperand(0) == phi){ 485 | errs()<<"Mul\n"; 486 | if(dyn_cast(IncInstr->getOperand(1))){ 487 | //errs()<< " Operand#1: "<< *(dyn_cast(IncInstr->getOperand(1))) << "\n"; 488 | }//getOperand(1) 489 | }//MUL 490 | 491 | if(IncInstr->getOpcode() == Instruction::GetElementPtr && IncInstr->getOperand(0) == phi){ 492 | GetElementPtrInst* NewInstr; 493 | Instruction* mod; 494 | SmallVector NextPhiDependencies; 495 | NextPhiDependencies.push_back(IncInstr); 496 | if(dyn_cast(IncInstr->getOperand(1))){ 497 | if(getCompareInstrGetElememntPtr(PhiLoop, IncInstr)){ 498 | Value* EndCond; 499 | bool Changed = false; 500 | CmpInst* compareInstr = getCompareInstrGetElememntPtr(PhiLoop, IncInstr); 501 | if(PhiLoop->makeLoopInvariant(compareInstr->getOperand(0),Changed)) { 502 | EndCond = compareInstr->getOperand(0); 503 | NextPhiDependencies.push_back( dyn_cast(compareInstr->getOperand(0))); 504 | }//makeLoopInvariant(0) 505 | else if(PhiLoop->makeLoopInvariant(compareInstr->getOperand(1),Changed)) { 506 | EndCond = compareInstr->getOperand(1); 507 | NextPhiDependencies.push_back( dyn_cast(compareInstr->getOperand(1))); 508 | }//makeLoopInvariant(1) 509 | else if(compareInstr->getOperand(1) != IncInstr && compareInstr->getOperand(0) == IncInstr){ 510 | EndCond = compareInstr->getOperand(1); 511 | NextPhiDependencies.push_back( dyn_cast(compareInstr->getOperand(1))); 512 | }// else if 513 | else if(compareInstr->getOperand(0) != IncInstr && compareInstr->getOperand(0) == IncInstr){ 514 | EndCond = compareInstr->getOperand(0); 515 | NextPhiDependencies.push_back( dyn_cast(compareInstr->getOperand(0))); 516 | }// else if 517 | 518 | NextPhiDependencies.push_back(compareInstr); 519 | ConstantInt* UpdateInd = getValueAddedToIndVar(PhiLoop, IncInstr); 520 | //Instruction* tempInst; 521 | Value* cmp; 522 | for(size_t index=0 ; indexgetOpcode()==Instruction::GetElementPtr){ 524 | if((NextPhiDependencies[index]->getOperand(0)==curDep || NextPhiDependencies[index]->getOperand(1)==curDep )){ 525 | NewInstr =dyn_cast(Builder.CreateInBoundsGEP(curDep,curDep->getType()->isIntegerTy(64) ? ConstantInt::get(Type::getInt64Ty((curDep->getParent())->getContext()),prefetchDist): ConstantInt::get(Type::getInt32Ty((curDep->getParent())->getContext()),prefetchDist))); 526 | Transforms.insert(std::pair(curDep,NewInstr)); 527 | donePrefetchingForPhi =true; 528 | PrefetchGetElem =true; 529 | } 530 | } 531 | } 532 | }//getCompareInstrGetElememntPtr(PhiLoop, IncInstr) 533 | else{ 534 | NewInstr =dyn_cast(Builder.CreateInBoundsGEP(phi,ConstantInt::get(Type::getInt64Ty((curDep->getParent())->getContext()),prefetchDist))); 535 | Transforms.insert(std::pair(phi,NewInstr)); 536 | donePrefetchingForPhi =true; 537 | PrefetchGetElem=true; 538 | 539 | } 540 | }//getOperand(1) 541 | }//Getelementptr 542 | }//if(GetIncomingValue(PhiLoop, phi)) 543 | }//if(CheckLoopCond(PhiLoop)) 544 | 545 | else{ 546 | return done; 547 | } 548 | }//if(PhiLoop == curLoadLoop) 549 | }//if(curDep == phi) 550 | }//for(auto &curDep : CapturedInstrs) 551 | if(donePrefetchingForPhi){ 552 | for(int index=CapturedInstrs.size()-1 ; index>=0; index--){ 553 | auto &curDep = CapturedInstrs[index]; 554 | if( !dyn_cast(curDep)){ 555 | Instruction* NewInstr = curDep->clone(); 556 | Use* OpListNewInstr = NewInstr->getOperandList(); 557 | int64_t NewInstrsNumOp = NewInstr ->getNumOperands(); 558 | for(int64_t index = 0; index(op)) { 561 | GetElementPtrInst *opIsInstr =dyn_cast(op); 562 | if(Transforms.count(opIsInstr)) { 563 | NewInstr->setOperand(index,Transforms.lookup(opIsInstr)); 564 | } 565 | } 566 | else if(Instruction* opIsInstr = dyn_cast(op)) { 567 | if(Transforms.count(opIsInstr)) { 568 | NewInstr->setOperand(index,Transforms.lookup(opIsInstr)); 569 | } 570 | } 571 | } 572 | NewInstr->insertBefore(curLoad); 573 | Transforms.insert(std::pair(curDep,NewInstr)); 574 | }//if(phi!=curDep) 575 | }//for(int index=CapturedInstrs.size()-1 ; index>=0; index--) 576 | if(!PrefetchGetElem){ 577 | Type *I32 = Type::getInt32Ty((curLoad->getParent())->getContext()); 578 | Type *I8 = Type::getInt8PtrTy(((curLoad->getFunction())->getParent())->getContext()); 579 | Function *PrefetchFunc = Intrinsic::getDeclaration((curLoad->getFunction())->getParent(), Intrinsic::prefetch, I8); 580 | Instruction* oldGep = dyn_cast(curLoad->getOperand(0)); 581 | Instruction* gep = dyn_cast(Transforms.lookup(oldGep)); 582 | Instruction* cast = dyn_cast(Builder.CreateBitCast (gep, Type::getInt8PtrTy(((curLoad->getFunction())->getParent())->getContext()))); 583 | Value* ar[] = { 584 | cast, 585 | ConstantInt::get(I32 ,0), 586 | ConstantInt::get(I32 ,3), 587 | ConstantInt::get(I32 ,1) 588 | }; 589 | CallInst* call = CallInst::Create(PrefetchFunc,ar); 590 | call->insertBefore(curLoad); 591 | } 592 | else{ 593 | Type *I32 = Type::getInt32Ty((curLoad->getParent())->getContext()); 594 | Function *PrefetchFunc = Intrinsic::getDeclaration((curLoad->getFunction())->getParent(), Intrinsic::prefetch, (curLoad->getOperand(0))->getType()); 595 | Instruction* oldGep = dyn_cast(curLoad->getOperand(0)); 596 | Instruction* gep = dyn_cast(Transforms.lookup(oldGep)); 597 | Value* ar[] = { 598 | gep, 599 | ConstantInt::get(I32 ,0), 600 | ConstantInt::get(I32 ,3), 601 | ConstantInt::get(I32 ,1) 602 | }; 603 | CallInst* call = CallInst::Create(PrefetchFunc,ar); 604 | call->insertBefore(curLoad); 605 | 606 | } 607 | if(IndirectLoads.size()>0){ 608 | for(size_t index=0 ; index< IndirectLoads.size(); index++){ 609 | auto &curStrideLoad = IndirectLoads[index]; 610 | Loop* curStrideLoadLoop = LI.getLoopFor(curStrideLoad->getParent()); 611 | if(curStrideLoadLoop == IndirectLoadLoop){ 612 | bool ItIsStrideLoad = false; 613 | Instruction * StridePhi = nullptr; 614 | SmallVector StrideLoads; 615 | SmallVector StrideInstrs; 616 | SmallVector StridePhis; 617 | int64_t StridePrefetchDist; 618 | if(SearchAlgorithm(curStrideLoad,LI,StridePhi,StrideLoads,StrideInstrs,StridePhis)){ 619 | for(size_t index=0; index< StridePhis.size(); index++){ 620 | StrideInstrs.push_back(StridePhis[StridePhis.size()-1 -index]); 621 | } 622 | bool NotFoundAPhi = false; 623 | for(long unsigned int j=0; j< StridePhis.size(); j++){ 624 | if(!(std::find(IndirectPhis.begin(), IndirectPhis.end(), StridePhis[j]) != IndirectPhis.end())){ 625 | NotFoundAPhi =true; 626 | }//if(!(std::find(CapturedPhis.begin(),..))) 627 | }//for(long unsigned int j=0; j< StridePhis.size(); j++) 628 | bool NotFoundAnInstr = false; 629 | for(long unsigned int j=0; j< StrideInstrs.size(); j++){ 630 | if(!(std::find(IndirectInstrs.begin(), IndirectInstrs.end(), StrideInstrs[j]) != IndirectInstrs.end())){ 631 | NotFoundAnInstr =true; 632 | }//if(!(std::find(CapturedPhis.begin(),..))) 633 | }//for(long unsigned int j=0; j< StridePhis.size(); j++) 634 | if(!NotFoundAnInstr && !NotFoundAPhi){ 635 | ItIsStrideLoad=true; 636 | StridePrefetchDist = IndirectPrefetchDist*(index+2); 637 | }//if(!NotFoundAnInstr && !NotFoundAPhi) 638 | if(ItIsStrideLoad){ 639 | if(InjectPrefeches(curStrideLoad,LI,StridePhis, StrideLoads, StrideInstrs, StridePrefetchDist,false)){ 640 | done=true; 641 | }//if(InjectPrefeches(...)) 642 | }//if(ItIsStrideLoad) 643 | }//if(SearchAlgorithm) 644 | }//if(curStrideLoadLoop == curLoadLoop) 645 | }//for(int index=CapturedLoads.size()-1 ; index>=0; index--) 646 | }//if(IndirectLoads.size()>0) 647 | 648 | done =true; 649 | }//if(donePrefetchingForPhi) 650 | }//if(Phi.size()==1) 651 | else{ 652 | if(prefetchDist > 1000){ 653 | prefetchDist = prefetchDist/1000; 654 | Loop* curPLoop; 655 | Loop* curLoop; 656 | std::vector trans_new_instructions; 657 | std::vector old_trans_new_instructions; 658 | std::vector new_instructions; 659 | llvm::ValueToValueMapTy vmap; 660 | ValueMap Transforms; 661 | Instruction* last; 662 | //Instruction* InsideLoopPhi; 663 | Instruction* cmp; 664 | Instruction* x; 665 | 666 | 667 | for(auto p: CapturedPhis){ 668 | curPLoop = LI.getLoopFor(p->getParent()); 669 | curLoop = LI.getLoopFor(curLoad->getParent()); 670 | //if(curPLoop ==curLoop) 671 | //InsideLoopPhi=p; 672 | if(curPLoop !=curLoop){ 673 | phi=p; 674 | curPLoop = LI.getLoopFor(p->getParent()); 675 | auto PEB = curPLoop->getExitingBlock(); 676 | Value * EndCond; 677 | if(PEB ){ 678 | SmallVector DepPhiInsts; 679 | SetVector PEBInsts; 680 | if(PEB){ 681 | for(Instruction &J : *PEB) { 682 | Instruction* I = &J; 683 | PEBInsts.insert(I); 684 | } 685 | } 686 | bool CIexist=false; 687 | CmpInst *CI; 688 | for(int i= PEBInsts.size()-1;i>=0;i--){ 689 | //errs()<<"inst "<(PEBInsts[i]); 691 | if(CI){ 692 | CIexist=true; 693 | } 694 | } 695 | if(CIexist){ 696 | for(int i= PEBInsts.size()-1;i>=0;i--){ 697 | if(!dyn_cast(PEBInsts[i]) && !dyn_cast(PEBInsts[i]) && !dyn_cast(PEBInsts[i])) 698 | DepPhiInsts.push_back(PEBInsts[i]); 699 | } 700 | } 701 | 702 | if(DepPhiInsts.size() > 0) { 703 | llvm::Instruction * insertPt = phi->getParent()->getFirstNonPHIOrDbg(); 704 | for (int i= DepPhiInsts.size()-1; i>=0;i--) { 705 | auto *inst= DepPhiInsts[i]; 706 | auto *new_inst = inst->clone(); 707 | if(new_inst->getOpcode() == Instruction::Add){ Value* val; 708 | if(new_inst->getOperand(0)->getType()->isIntegerTy(64)){ 709 | val = ConstantInt::get(Type::getInt64Ty((curLoad->getParent())->getContext()),prefetchDist); 710 | } 711 | else{ 712 | val = ConstantInt::get(Type::getInt32Ty((curLoad->getParent())->getContext()),prefetchDist); 713 | } 714 | new_inst->setOperand(1, val); 715 | } 716 | new_inst->insertBefore(insertPt); 717 | new_instructions.push_back(new_inst); 718 | vmap[inst] = new_inst; 719 | last =new_inst; 720 | insertPt = new_inst->getNextNode(); 721 | } 722 | 723 | for (auto * i : new_instructions) { 724 | llvm::RemapInstruction(i, vmap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); 725 | if(dyn_cast(i)) 726 | cmp=dyn_cast(i); 727 | } 728 | } 729 | 730 | 731 | IRBuilder<> Builder(last->getNextNode()); 732 | Instruction* NewInstr; 733 | NewInstr = dyn_cast(Builder.CreateAdd(phi,phi->getType()->isIntegerTy(64) ? ConstantInt::get(Type::getInt64Ty((curLoad->getParent())->getContext()),prefetchDist) : ConstantInt::get(Type::getInt32Ty((curLoad->getParent())->getContext()),prefetchDist))); 734 | Transforms.insert(std::pair(phi,NewInstr)); 735 | x = NewInstr; 736 | SmallVector SDepInstrs_insideLoop; 737 | for(int index=CapturedInstrs.size()-1 ; index>=0; index--){ 738 | if(LI.getLoopFor(curLoad->getParent()) == LI.getLoopFor(CapturedInstrs[index]->getParent()) ){ 739 | SDepInstrs_insideLoop.push_back(CapturedInstrs[index]); 740 | } 741 | } 742 | bool theSLoad =false; 743 | Instruction * SLoad; 744 | for(auto &t: SDepInstrs_insideLoop){ 745 | if(dyn_cast(t)){ 746 | theSLoad=true; 747 | SLoad =t; 748 | } 749 | } 750 | Instruction * Sphi = nullptr; 751 | SmallVector SLoads; 752 | SmallVector SInstrs; 753 | SmallVector SPhis; 754 | 755 | if(theSLoad){ 756 | if(SearchAlgorithm(SLoad,LI,Sphi,SLoads,SInstrs,SPhis)){ 757 | for(size_t index=0; index< SPhis.size(); index++){ 758 | SInstrs.push_back(SPhis[SPhis.size()-1 -index]); 759 | } 760 | for(int index=SInstrs.size()-1 ; index>=0; index--){ 761 | auto &curDep = SInstrs[index]; 762 | } 763 | } 764 | }//if(theSLoad) 765 | 766 | SmallVector InstrsToInsert; 767 | bool phiFound=false; 768 | int start_index; 769 | if(theSLoad){ 770 | InstrsToInsert =SInstrs; 771 | } 772 | else{ 773 | InstrsToInsert =CapturedInstrs; 774 | } 775 | for(int index=InstrsToInsert.size()-1 ; index>=0; index--){ 776 | if(!phiFound){ 777 | auto &curDep = InstrsToInsert[index]; 778 | Use* OpListNewInstr = curDep->getOperandList(); 779 | int64_t NewInstrsNumOp= curDep ->getNumOperands(); 780 | for(int64_t i = 0; i(curDep)) { 792 | errs()<<"\n"; 793 | } 794 | else{ 795 | Instruction* NewInstr = curDep->clone(); 796 | Use* OpListNewInstr = NewInstr->getOperandList(); 797 | int64_t NewInstrsNumOp= NewInstr ->getNumOperands(); 798 | for(int64_t index = 0; index(op)) { 801 | GetElementPtrInst *opIsInstr =dyn_cast(op); 802 | if(Transforms.count(opIsInstr)) { 803 | NewInstr->setOperand(index,Transforms.lookup(opIsInstr)); 804 | } 805 | } 806 | else if(Instruction* opIsInstr = dyn_cast(op)) { 807 | if(Transforms.count(opIsInstr)) { 808 | NewInstr->setOperand(index,Transforms.lookup(opIsInstr)); 809 | } 810 | } 811 | } 812 | NewInstr->insertAfter(x); 813 | last_gap =NewInstr; 814 | Transforms.insert(std::pair(curDep,NewInstr)); 815 | trans_new_instructions.push_back(NewInstr); 816 | old_trans_new_instructions.push_back(curDep); 817 | x=NewInstr; 818 | } 819 | bool insert =false; 820 | for(int index=start_index-1 ; index>= 0; index--){ 821 | insert =false; 822 | auto &curDep = InstrsToInsert[index]; 823 | if(PHINode * pNode = dyn_cast(curDep)) { 824 | errs()<<"\n"; 825 | } 826 | else{ 827 | if(dyn_cast(curDep)){ 828 | Instruction *temp =dyn_cast(curDep); 829 | last_gap=temp; 830 | if((std::find(CapturedPhis.begin(), CapturedPhis.end(), temp->getOperand(1)) != CapturedPhis.end())){ 831 | Value *val = ConstantInt::get(Type::getInt64Ty(((curLoad->getFunction())->getParent())->getContext()),0); 832 | Instruction* NewInstr = curDep->clone(); 833 | NewInstr->setOperand(1, val); 834 | Use* OpListNewInstr = NewInstr->getOperandList(); 835 | int64_t NewInstrsNumOp= NewInstr ->getNumOperands(); 836 | for(int64_t index = 0; index(op)) { 839 | GetElementPtrInst *opIsInstr =dyn_cast(op); 840 | if(Transforms.count(opIsInstr)) { 841 | NewInstr->setOperand(index,Transforms.lookup(opIsInstr)); 842 | insert=true; 843 | } 844 | } 845 | else if(Instruction* opIsInstr = dyn_cast(op)) { 846 | if(Transforms.count(opIsInstr)) { 847 | NewInstr->setOperand(index,Transforms.lookup(opIsInstr)); 848 | insert=true; 849 | } 850 | } 851 | } 852 | NewInstr->insertAfter(x); 853 | Transforms.insert(std::pair(curDep,NewInstr)); 854 | trans_new_instructions.push_back(NewInstr); 855 | old_trans_new_instructions.push_back(curDep); 856 | x=NewInstr; 857 | } 858 | else{ 859 | Instruction* NewInstr = curDep->clone(); 860 | Use* OpListNewInstr = NewInstr->getOperandList(); 861 | int64_t NewInstrsNumOp= NewInstr ->getNumOperands(); 862 | for(int64_t index = 0; index(op)) { 865 | GetElementPtrInst *opIsInstr =dyn_cast(op); 866 | if(Transforms.count(opIsInstr)) { 867 | NewInstr->setOperand(index,Transforms.lookup(opIsInstr)); 868 | insert=true; 869 | } 870 | } 871 | else if(Instruction* opIsInstr = dyn_cast(op)) { 872 | if(Transforms.count(opIsInstr)) { 873 | NewInstr->setOperand(index,Transforms.lookup(opIsInstr)); 874 | insert=true; 875 | } 876 | } 877 | } 878 | NewInstr->insertAfter(x); 879 | Transforms.insert(std::pair(curDep,NewInstr)); 880 | trans_new_instructions.push_back(NewInstr); 881 | old_trans_new_instructions.push_back(curDep); 882 | x=NewInstr; 883 | } 884 | } 885 | else{ 886 | Instruction* NewInstr = curDep->clone(); 887 | Use* OpListNewInstr = NewInstr->getOperandList(); 888 | int64_t NewInstrsNumOp= NewInstr ->getNumOperands(); 889 | for(int64_t index = 0; index(op)) { 892 | GetElementPtrInst *opIsInstr =dyn_cast(op); 893 | if(Transforms.count(opIsInstr)) { 894 | NewInstr->setOperand(index,Transforms.lookup(opIsInstr)); 895 | insert =true; 896 | } 897 | } 898 | else if(Instruction* opIsInstr = dyn_cast(op)) { 899 | if(Transforms.count(opIsInstr)) { 900 | NewInstr->setOperand(index,Transforms.lookup(opIsInstr)); 901 | insert=true; 902 | } 903 | } 904 | } 905 | NewInstr->insertAfter(x); 906 | Transforms.insert(std::pair(curDep,NewInstr)); 907 | trans_new_instructions.push_back(NewInstr); 908 | old_trans_new_instructions.push_back(curDep); 909 | x=NewInstr; 910 | } 911 | } 912 | } 913 | Type *I8 = Type::getInt8PtrTy(((curLoad->getFunction())->getParent())->getContext()); 914 | Type *I32 = Type::getInt32Ty((curLoad->getParent())->getContext()); 915 | Function *PrefetchFunc = Intrinsic::getDeclaration((curLoad->getFunction())->getParent(), Intrinsic::prefetch, I8); 916 | Instruction* oldGep = dyn_cast(last_gap); 917 | Instruction* gep = dyn_cast(Transforms.lookup(oldGep)); 918 | Instruction* cast = dyn_cast(Builder.CreateBitCast (gep, Type::getInt8PtrTy(((curLoad->getFunction())->getParent())->getContext()))); 919 | Value* ar[] = { 920 | cast, 921 | ConstantInt::get(I32 ,0), 922 | ConstantInt::get(I32 ,3), 923 | ConstantInt::get(I32 ,1) 924 | }; 925 | CallInst* call = CallInst::Create(PrefetchFunc,ar); 926 | call->insertAfter(cast); 927 | x=call; 928 | } 929 | } 930 | } 931 | }//prefetchDist>1000 932 | else{ 933 | Instruction* InnerPhi =nullptr; 934 | Loop* LoadLoop = LI.getLoopFor(curLoad->getParent()); 935 | SmallVector InnerPhis; 936 | for(int index=CapturedPhis.size()-1 ; index>=0; index--){ 937 | Loop* InnerPhiLoop = LI.getLoopFor(CapturedPhis[index]->getParent()); 938 | if(InnerPhiLoop ==LoadLoop ){ 939 | InnerPhi =CapturedPhis[index]; 940 | InnerPhis.push_back(InnerPhi); 941 | if(InjectPrefeches(curLoad,LI,InnerPhis,CapturedLoads, CapturedInstrs, prefetchDist ,true)){ 942 | done=true; 943 | } 944 | }//if( InnerPhiLoop ==LoadLoop ) 945 | }//for(int index=CapturedPhis.size()-1 ; index>=0; index--) 946 | }//prefetchDist<1000 947 | }//else(CapturedPhis.size()!=1) 948 | return done; 949 | } 950 | 951 | 952 | bool SWPrefetchingLLVMPass::InjectPrefechesOnePhiPartTwo(Instruction* I, LoopInfo &LI,Instruction* Phi, SmallVector &DepInstrs, int64_t prefetchDist){ 953 | bool done =false; 954 | bool nonCanonical=false; 955 | Instruction* phi =nullptr; 956 | phi =Phi; 957 | ValueMap Transforms; 958 | IRBuilder<> Builder(I); 959 | 960 | Loop* curLoop = LI.getLoopFor(phi->getParent()); 961 | if(!getLoopEndCondxxx(curLoop)){ 962 | SmallVector DepPhiInsts; 963 | //Value* EndInstr; 964 | for(auto &curDep : DepInstrs){ 965 | if(curDep == phi){ 966 | SetVector BBInsts; 967 | auto B = curLoop->getExitingBlock(); 968 | if(B){ 969 | for(Instruction &J : *B) { 970 | Instruction* I = &J; 971 | BBInsts.insert(I); 972 | } 973 | for(int i= BBInsts.size()-1;i>=0;i--){ 974 | CmpInst *CI = dyn_cast(BBInsts[i]); 975 | if(CI){ 976 | DepPhiInsts.push_back(CI); 977 | Use* OperandList = CI->getOperandList(); 978 | Use* NumOfOperands = OperandList + CI->getNumOperands(); 979 | for(Use* op = OperandList; op < NumOfOperands; op++) { 980 | if(dyn_cast(op->get())){ 981 | Instruction* OPInstr = dyn_cast(op->get()); 982 | DepPhiInsts.push_back(OPInstr); 983 | } 984 | } 985 | } 986 | } 987 | } 988 | Instruction* NewInstr; 989 | if(curDep == getCanonicalishInductionVariable(curLoop)) { 990 | Instruction* mod; 991 | NewInstr = dyn_cast(Builder.CreateAdd(curDep,curDep->getType()->isIntegerTy(64) ? ConstantInt::get(Type::getInt64Ty((curDep->getFunction())->getParent()->getContext()),prefetchDist) : ConstantInt::get(Type::getInt32Ty(((curDep->getFunction())->getParent())->getContext()),prefetchDist))); 992 | Transforms.insert(std::pair(curDep,NewInstr)); 993 | for(auto &s: DepPhiInsts){ 994 | DepInstrs.push_back(s); 995 | } 996 | for(auto &s: DepPhiInsts){ 997 | Use* OpsInstr = s->getOperandList(); 998 | int64_t sNumOp= s ->getNumOperands(); 999 | for(int64_t index = 0; index(ops); 1002 | if(!(std::find( DepInstrs.begin(), DepInstrs.end(),m) != DepInstrs.end())) { 1003 | if(!(dyn_cast(ops))){ 1004 | DepInstrs.push_back(m); 1005 | } 1006 | } 1007 | } 1008 | } 1009 | } 1010 | }//if(curDep == phi){ 1011 | }//for(auto &curDep : DepInstrs) 1012 | }//if(!getLoopEndCond(curLoop)) 1013 | else{ 1014 | for(auto &curDep : DepInstrs){ 1015 | if(Transforms.count(curDep)){ 1016 | continue; 1017 | } 1018 | if(curDep == phi){ 1019 | Instruction* NewInstr; 1020 | if(curDep == getCanonicalishInductionVariable(curLoop)) { 1021 | Value* EndCond = getLoopEndCondxxx(curLoop); 1022 | Instruction* IncInstr =GetIncomingValue(curLoop, phi); 1023 | ConstantInt* UpdateInd = getValueAddedToIndVar(curLoop, IncInstr); 1024 | Instruction* mod; 1025 | if( UpdateInd->isNegative()){ 1026 | int64_t curprefetchDist = 0-prefetchDist; 1027 | NewInstr = dyn_cast(Builder.CreateAdd(curDep,curDep->getType()->isIntegerTy(64) ? ConstantInt::get(Type::getInt64Ty(((curDep->getFunction())->getParent())->getContext()),curprefetchDist) : ConstantInt::get(Type::getInt32Ty(((curDep->getFunction())->getParent())->getContext()),curprefetchDist))); 1028 | if(EndCond->getType() != NewInstr->getType()) { 1029 | Instruction* cast = CastInst::CreateIntegerCast(EndCond,NewInstr->getType(),true); 1030 | Builder.Insert(cast); 1031 | Value* cmp = Builder.CreateICmp(CmpInst::ICMP_SGT,cast,NewInstr); 1032 | mod = dyn_cast(Builder.CreateSelect(cmp,cast,NewInstr)); 1033 | } 1034 | else{ 1035 | Value* cmp = Builder.CreateICmp(CmpInst::ICMP_SGT,EndCond,NewInstr); 1036 | mod = dyn_cast(Builder.CreateSelect(cmp,EndCond,NewInstr)); 1037 | } 1038 | Transforms.insert(std::pair(curDep,NewInstr)); 1039 | } 1040 | else{ 1041 | NewInstr = dyn_cast(Builder.CreateAdd(curDep,curDep->getType()->isIntegerTy(64) ? ConstantInt::get(Type::getInt64Ty(((curDep->getFunction())->getParent())->getContext()),prefetchDist) : ConstantInt::get(Type::getInt32Ty(((curDep->getFunction())->getParent())->getContext()),prefetchDist))); 1042 | if(EndCond->getType() != NewInstr->getType()) { 1043 | Instruction* cast = CastInst::CreateIntegerCast(EndCond,NewInstr->getType(),true); 1044 | Builder.Insert(cast); 1045 | Value* cmp = Builder.CreateICmp(CmpInst::ICMP_SLT,cast,NewInstr); 1046 | mod = dyn_cast(Builder.CreateSelect(cmp,cast,NewInstr)); 1047 | } 1048 | else{ 1049 | Value* cmp = Builder.CreateICmp(CmpInst::ICMP_SLT,EndCond,NewInstr); 1050 | mod = dyn_cast(Builder.CreateSelect(cmp,EndCond,NewInstr)); 1051 | } 1052 | Transforms.insert(std::pair(curDep,mod)); 1053 | } 1054 | } 1055 | else{ 1056 | nonCanonical=true; 1057 | } 1058 | } 1059 | } 1060 | } 1061 | int start =0; 1062 | if(nonCanonical){ 1063 | GetElementPtrInst* newPhi; 1064 | newPhi =dyn_cast(Builder.CreateInBoundsGEP(phi,phi->getType()->isIntegerTy(64) ? ConstantInt::get(Type::getInt64Ty(((phi->getFunction())->getParent())->getContext()),prefetchDist): ConstantInt::get(Type::getInt32Ty(((phi->getFunction())->getParent())->getContext()),prefetchDist))); 1065 | Transforms.insert(std::pair(phi,newPhi)); 1066 | start=1; 1067 | } 1068 | SmallVector t; 1069 | for(int index=DepInstrs.size()-1 ; index>=0; index--){ 1070 | auto &curDep = DepInstrs[index]; 1071 | if(PHINode * pNode = dyn_cast(curDep)) { 1072 | errs()<<"\n"; 1073 | }//if 1074 | else{ 1075 | //errs()<<" "<< *curDep<<"\n"; 1076 | Instruction* NewInstr = curDep->clone(); 1077 | Use* OpListNewInstr = NewInstr->getOperandList(); 1078 | int64_t NewInstrsNumOp= NewInstr ->getNumOperands(); 1079 | for(int64_t index = 0; index(op)) { 1082 | GetElementPtrInst *opIsInstr =dyn_cast(op); 1083 | if(Transforms.count(opIsInstr)) { 1084 | NewInstr->setOperand(index,Transforms.lookup(opIsInstr)); 1085 | }//if 1086 | }//if 1087 | else if(Instruction* opIsInstr = dyn_cast(op)) { 1088 | if(Transforms.count(opIsInstr)) { 1089 | NewInstr->setOperand(index,Transforms.lookup(opIsInstr)); 1090 | }//if(Transforms) 1091 | }//else if 1092 | }//for 1093 | NewInstr->insertBefore(I); 1094 | t.push_back(NewInstr); 1095 | Transforms.insert(std::pair(curDep,NewInstr)); 1096 | }//else 1097 | }//for 1098 | Type *I8 = Type::getInt8PtrTy(((I->getFunction())->getParent())->getContext()); 1099 | Type *I32 = Type::getInt32Ty((I->getParent())->getContext()); 1100 | Function *PrefetchFunc = Intrinsic::getDeclaration((I->getFunction())->getParent(), Intrinsic::prefetch, I8); 1101 | Instruction* oldGep = dyn_cast(I->getOperand(0)); 1102 | Instruction* gep = dyn_cast(Transforms.lookup(oldGep)); 1103 | Instruction* cast = dyn_cast(Builder.CreateBitCast (gep, Type::getInt8PtrTy(((I->getFunction())->getParent())->getContext()))); 1104 | Value* ar[] = { 1105 | cast, 1106 | ConstantInt::get(I32 ,0), 1107 | ConstantInt::get(I32 ,3), 1108 | ConstantInt::get(I32 ,1) 1109 | }; 1110 | CallInst* call = CallInst::Create(PrefetchFunc,ar); 1111 | call->insertBefore(I); 1112 | done=true; 1113 | return done; 1114 | } 1115 | 1116 | bool SWPrefetchingLLVMPass::InjectPrefechesOnePhiPartOne(Instruction* I, LoopInfo &LI, SmallVector &Phi, SmallVector &CapturedLoads, SmallVector &DepInstrs, int64_t prefetchDist, bool ItIsIndirectLoad){ 1117 | bool done=false; 1118 | Instruction* phi =nullptr; 1119 | SmallVector DependentLoadsToCurLoadx; 1120 | SmallVector DependentInstrsToCurLoadx; 1121 | SmallVector DependentPhisx; 1122 | 1123 | if(IsDep(I,LI,phi,DependentLoadsToCurLoadx,DependentInstrsToCurLoadx,DependentPhisx)){ 1124 | Instruction * SearchPhi = nullptr; 1125 | SmallVector SearchLoads; 1126 | SmallVector SearchInstrs; 1127 | SmallVector SearchPhis; 1128 | for(int index=DependentLoadsToCurLoadx.size()-1 ; index>=0; index--){ 1129 | if(IsDep(DependentLoadsToCurLoadx[index],LI,SearchPhi,SearchLoads,SearchInstrs,SearchPhis)){ 1130 | if(DependentPhisx[0]==SearchPhis[0]){ 1131 | if(InjectPrefechesOnePhiPartTwo(I,LI,DependentPhisx[0], DependentInstrsToCurLoadx, prefetchDist)){ 1132 | done=true; 1133 | } 1134 | if(InjectPrefechesOnePhiPartTwo(DependentLoadsToCurLoadx[index],LI,SearchPhis[0], SearchInstrs, prefetchDist*2)){ 1135 | done=true; 1136 | } 1137 | 1138 | } 1139 | } 1140 | } 1141 | } 1142 | return done; 1143 | } 1144 | 1145 | bool SWPrefetchingLLVMPass::runOnFunction(Function &F) { 1146 | bool modified = false; 1147 | LoopInfo &LI = getAnalysis().getLoopInfo(); 1148 | if(!Reader){ 1149 | return false; 1150 | } 1151 | bool samplesExist =false; 1152 | const llvm::sampleprof::FunctionSamples* SamplesReaded = Reader->getSamplesFor(F); 1153 | if(SamplesReaded){ 1154 | samplesExist =true; 1155 | } 1156 | /*if(!SamplesReaded){ 1157 | errs()< AllCurLoads; 1162 | SmallVector NeedToEliminateCurLoads; 1163 | SmallVector AllPrefetchDist; 1164 | SmallVector IndexofNeedToEliminateCurLoads; 1165 | SmallVector correctMapping; 1166 | std::vector> AllCapturedInstrs; 1167 | std::vector> AllCapturedPhis; 1168 | std::vector> AllCapturedLoads; 1169 | 1170 | for(auto &BB : F) { 1171 | bool isBBLoop = LI.getLoopFor(&BB); 1172 | if(isBBLoop){ 1173 | for (auto &I : BB) { 1174 | const ErrorOr T =getHints(I,SamplesReaded); 1175 | if(T){ 1176 | //errs()<<"T is true!\n"; 1177 | if(LoadInst *curLoad = dyn_cast(&I)){ 1178 | for(const auto &S_V : *T) { 1179 | prefechDist = static_cast(S_V.second); 1180 | Instruction * phi = nullptr; 1181 | SmallVector Loads; 1182 | SmallVector Instrs; 1183 | SmallVector Phis; 1184 | 1185 | if(SearchAlgorithm(curLoad,LI,phi,Loads,Instrs,Phis)){ 1186 | for(size_t index=0; index< Phis.size(); index++){ 1187 | Instrs.push_back(Phis[Phis.size()-1 -index]); 1188 | } 1189 | AllCurLoads.push_back(curLoad); 1190 | AllPrefetchDist.push_back(prefechDist); 1191 | AllCapturedInstrs.push_back(Instrs); 1192 | AllCapturedPhis.push_back(Phis); 1193 | AllCapturedLoads.push_back(Loads); 1194 | 1195 | }//SearchAlgorithm 1196 | }//auto &S_V : *T 1197 | }//dyn_cast(&I) 1198 | }//T 1199 | }//auto &I : BB 1200 | }//isBBLoop 1201 | }//auto &BB : F 1202 | 1203 | bool correctMappingCheck=false; 1204 | SmallVector AlreadyPrefetched; 1205 | 1206 | if(AllCurLoads.size()>1){ 1207 | for(long unsigned int i=0; i< AllCurLoads.size(); i++){ 1208 | for(long unsigned int j=0; j< AllCurLoads.size();j++){ 1209 | if( AllCapturedInstrs[i].size() == AllCapturedInstrs[j].size() && AllCurLoads[i]!= AllCurLoads[j]){ 1210 | if( AllCapturedLoads[i].size()== AllCapturedLoads[j].size() && AllCapturedPhis[i].size() == AllCapturedPhis[j].size()){ 1211 | if(!(std::find(correctMapping.begin(), correctMapping.end(),i) != correctMapping.end())){ 1212 | correctMapping.push_back(i); 1213 | correctMappingCheck=true; 1214 | } 1215 | } 1216 | } 1217 | } 1218 | } 1219 | } 1220 | if(correctMappingCheck){ 1221 | for(long unsigned int j=0; j< AllCurLoads.size(); j++){ 1222 | if(!(std::find(correctMapping.begin(), correctMapping.end(),j) != correctMapping.end())){ 1223 | if(!(std::find(AlreadyPrefetched.begin(), AlreadyPrefetched.end(),AllCurLoads[j]) != AlreadyPrefetched.end())){ 1224 | AlreadyPrefetched.push_back(AllCurLoads[j] ); 1225 | if(AllCapturedPhis[j].size()> 1){ 1226 | if(InjectPrefeches(AllCurLoads[j],LI,AllCapturedPhis[j], AllCapturedLoads[j], AllCapturedInstrs[j], AllPrefetchDist[j],true)){ 1227 | modified=true; 1228 | } 1229 | } 1230 | else if (AllCapturedPhis[j].size()==1 && AllCapturedLoads[j].size() !=0){ 1231 | if(InjectPrefechesOnePhiPartOne(AllCurLoads[j],LI,AllCapturedPhis[j], AllCapturedLoads[j], AllCapturedInstrs[j], AllPrefetchDist[j],true)){ 1232 | modified=true; 1233 | } 1234 | } 1235 | } 1236 | } 1237 | } 1238 | } 1239 | if (!correctMappingCheck){ 1240 | for(long unsigned int j=0; j< AllCurLoads.size(); j++){ 1241 | if(!(std::find(AlreadyPrefetched.begin(), AlreadyPrefetched.end(),AllCurLoads[j]) != AlreadyPrefetched.end())){ 1242 | AlreadyPrefetched.push_back(AllCurLoads[j] ); 1243 | if(AllCapturedPhis[j].size()> 1 ){ 1244 | if(InjectPrefeches(AllCurLoads[j],LI,AllCapturedPhis[j], AllCapturedLoads[j], AllCapturedInstrs[j], AllPrefetchDist[j],true)){ 1245 | modified=true; 1246 | } 1247 | } 1248 | else if (AllCapturedPhis[j].size()==1){ 1249 | if(InjectPrefechesOnePhiPartOne(AllCurLoads[j],LI,AllCapturedPhis[j], AllCapturedLoads[j], AllCapturedInstrs[j], AllPrefetchDist[j],true)){ 1250 | modified=true; 1251 | } 1252 | } 1253 | } 1254 | } 1255 | } 1256 | } 1257 | if(!AutoFDOMapping){ 1258 | SmallVector AllLoadsDepToPhix; 1259 | int64_t pd; 1260 | for(auto &e : LBR_dist){ 1261 | pd = std::stoull(e); 1262 | } 1263 | std::vector> AllDependentInstsx; 1264 | std::vector> AllDependentPhisx; 1265 | SmallVector StrideLoadsx; 1266 | SmallVector StrideLoadsToKeepx; 1267 | SmallVector IndirectLoadsx; 1268 | SmallVector IndirectLoadsToKeepx; 1269 | SmallVector LoadsToRemovex; 1270 | SmallVector LoadsIndexx; 1271 | 1272 | std::vector> AllDependentInstrsToIndirectLoadx; 1273 | std::vector> AllDependentInstrsToStrideLoadx; 1274 | std::vector> AllDependentPhisToStrideLoadx; 1275 | std::vector> AllDependentPhisToIndirectLoadx; 1276 | 1277 | 1278 | 1279 | for(auto &BB : F) { 1280 | bool isBBLoop = LI.getLoopFor(&BB); 1281 | for (auto &I : BB) { 1282 | if(isBBLoop){ 1283 | if (LoadInst *curLoad = dyn_cast(&I)){ 1284 | Instruction * phi = nullptr; 1285 | SmallVector DependentLoadsToCurLoadx; 1286 | SmallVector DependentInstrsToCurLoadx; 1287 | SmallVector DependentPhisx; 1288 | if(IsDep(curLoad,LI,phi,DependentLoadsToCurLoadx,DependentInstrsToCurLoadx,DependentPhisx)){ 1289 | if(DependentLoadsToCurLoadx.size()>0){ 1290 | int indexOfDepLoad; 1291 | bool DepPhiOfDepLoad=false; 1292 | for(auto &s : DependentLoadsToCurLoadx){ 1293 | for (long unsigned int i=0; i DependentInstrsToIndirectLoadx; 1313 | SmallVector DependentInstrsToStrideLoadx; 1314 | SmallVector DependentPhistoIndirectLoadx; 1315 | SmallVector DependentPhistoStrideLoadx; 1316 | 1317 | IndirectLoadsx.push_back(curLoad); 1318 | StrideLoadsx.push_back(s); 1319 | for(auto &si : DependentInstrsToCurLoadx){ 1320 | DependentInstrsToIndirectLoadx.push_back(si); 1321 | } 1322 | for(auto &di : AllDependentInstsx[indexOfDepLoad]){ 1323 | DependentInstrsToStrideLoadx.push_back(di); 1324 | } 1325 | for(auto &si : DependentPhisx){ 1326 | DependentPhistoIndirectLoadx.push_back(si); 1327 | } 1328 | for(auto &di : AllDependentPhisx[indexOfDepLoad]){ 1329 | DependentPhistoStrideLoadx.push_back(di); 1330 | } 1331 | AllDependentInstrsToIndirectLoadx.push_back(DependentInstrsToIndirectLoadx); 1332 | AllDependentInstrsToStrideLoadx.push_back(DependentInstrsToStrideLoadx); 1333 | AllDependentPhisToIndirectLoadx.push_back(DependentPhistoIndirectLoadx); 1334 | AllDependentPhisToStrideLoadx.push_back(DependentPhistoStrideLoadx); 1335 | 1336 | } 1337 | DepPhiOfDepLoad=false; 1338 | } 1339 | 1340 | } 1341 | }//if(DependentLoadsToCurLoad.size() 1342 | AllLoadsDepToPhix.push_back(curLoad); 1343 | AllDependentInstsx.push_back(DependentInstrsToCurLoadx); 1344 | AllDependentPhisx.push_back(DependentPhisx); 1345 | }//if(IsCurLoadDependentToPhiNode 1346 | }//if load 1347 | } 1348 | } 1349 | }//for(auto &BB : F) 1350 | 1351 | for(long unsigned int x =0; x< StrideLoadsx.size(); x++){ 1352 | for(long unsigned int y =0; y< IndirectLoadsx.size(); y++){ 1353 | if(StrideLoadsx[x]==IndirectLoadsx[y]){ 1354 | if( AllDependentPhisToStrideLoadx[x]==AllDependentPhisToIndirectLoadx[y]){ 1355 | LoadsToRemovex.push_back(StrideLoadsx[x]); 1356 | } 1357 | } 1358 | } 1359 | } 1360 | 1361 | for(long unsigned int x =0; x< StrideLoadsx.size(); x++){ 1362 | bool kept =false; 1363 | if(LoadsToRemovex.size()>0){ 1364 | for(long unsigned int y =0; y< LoadsToRemovex.size(); y++){ 1365 | if(StrideLoadsx[x]!=LoadsToRemovex[y] && IndirectLoadsx[x]!=LoadsToRemovex[y]){ 1366 | kept=true; 1367 | } 1368 | } 1369 | if(kept){ 1370 | StrideLoadsToKeepx.push_back(StrideLoadsx[x]); 1371 | LoadsIndexx.push_back(x); 1372 | IndirectLoadsToKeepx.push_back(IndirectLoadsx[x]); 1373 | } 1374 | } 1375 | else{ 1376 | StrideLoadsToKeepx.push_back(StrideLoadsx[x]); 1377 | LoadsIndexx.push_back(x); 1378 | IndirectLoadsToKeepx.push_back(IndirectLoadsx[x]); 1379 | 1380 | } 1381 | } 1382 | for(long unsigned int x=0; x< IndirectLoadsToKeepx.size();x++){ 1383 | if(InjectPrefechesOnePhiPartTwo(IndirectLoadsToKeepx[x],LI,AllDependentPhisToIndirectLoadx[LoadsIndexx[x]][0], AllDependentInstrsToIndirectLoadx[LoadsIndexx[x]],pd)){ 1384 | modified=true; 1385 | } 1386 | if(InjectPrefechesOnePhiPartTwo(StrideLoadsToKeepx[x],LI,AllDependentPhisToStrideLoadx[LoadsIndexx[x]][0], AllDependentInstrsToStrideLoadx[LoadsIndexx[x]],pd*2)){ 1387 | modified=true; 1388 | 1389 | } 1390 | 1391 | } 1392 | }// if(!AutoFDOMapping) 1393 | return modified; 1394 | }//runOnFunction 1395 | 1396 | static RegisterPass X("SWPrefetchingLLVMPass", "Hello SWPrefetchingLLVMPass", true,true); 1397 | 1398 | 1399 | --------------------------------------------------------------------------------