├── .github └── workflows │ └── jekyll-gh-pages.yml ├── DRACKSim-Detailed ├── DRAMSIM2_ini │ ├── DDR4_x16_2400.ini │ └── DDR4_x16_2400_1.ini ├── DRAMSim2.patch ├── ID_Cache.h ├── Ins_Tracer.cpp ├── MMU.cpp ├── Makefile.sim ├── OOO_core.cpp ├── TLB.h ├── TLB_Cache.cpp ├── allocator.cpp ├── boost.tar.xz ├── branch_predictor.cpp ├── clear.sh ├── clear_sm.sh ├── icount.H ├── instlib.H ├── inter_connect.cpp ├── main.cpp ├── makefile ├── makefile.pin ├── makefile.rules ├── mem_defs.cpp ├── mmap.cpp ├── param.cpp ├── readme.md ├── sim.patch ├── start_sim.sh └── utils.h ├── DRackSim-Trace ├── DRAMSim2.patch ├── DRAMSim2_ini │ ├── DDR4_x16_2400.ini │ └── DDR4_x16_2400_1.ini ├── MMU.cpp ├── Makefile ├── Trace_Tool │ ├── Caches.H │ ├── Mem_Trace.cpp │ ├── boost.tar.xz │ ├── icount.H │ ├── libinst.H │ ├── makefile │ ├── makefile.rules │ ├── parse_trace.cpp │ └── readme.md ├── inter_connect.cpp ├── main.cpp ├── mem_defs.cpp ├── mmap.cpp ├── readme.md ├── remote_mem_allocator.cpp └── stats.cpp ├── LICENSE ├── gem5_patch_commands ├── gem5.patch └── gem5_commands_to_validate.sh └── readme.md /.github/workflows/jekyll-gh-pages.yml: -------------------------------------------------------------------------------- 1 | # Sample workflow for building and deploying a Jekyll site to GitHub Pages 2 | name: Deploy Jekyll with GitHub Pages dependencies preinstalled 3 | 4 | on: 5 | # Runs on pushes targeting the default branch 6 | push: 7 | branches: ["main"] 8 | 9 | # Allows you to run this workflow manually from the Actions tab 10 | workflow_dispatch: 11 | 12 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages 13 | permissions: 14 | contents: read 15 | pages: write 16 | id-token: write 17 | 18 | # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. 19 | # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. 20 | concurrency: 21 | group: "pages" 22 | cancel-in-progress: false 23 | 24 | jobs: 25 | # Build job 26 | build: 27 | runs-on: ubuntu-latest 28 | steps: 29 | - name: Checkout 30 | uses: actions/checkout@v3 31 | - name: Setup Pages 32 | uses: actions/configure-pages@v3 33 | - name: Build with Jekyll 34 | uses: actions/jekyll-build-pages@v1 35 | with: 36 | source: ./ 37 | destination: ./_site 38 | - name: Upload artifact 39 | uses: actions/upload-pages-artifact@v2 40 | 41 | # Deployment job 42 | deploy: 43 | environment: 44 | name: github-pages 45 | url: ${{ steps.deployment.outputs.page_url }} 46 | runs-on: ubuntu-latest 47 | needs: build 48 | steps: 49 | - name: Deploy to GitHub Pages 50 | id: deployment 51 | uses: actions/deploy-pages@v2 52 | -------------------------------------------------------------------------------- /DRACKSim-Detailed/DRAMSIM2_ini/DDR4_x16_2400.ini: -------------------------------------------------------------------------------- 1 | NUM_BANKS=4 2 | NUM_ROWS=65536 3 | NUM_COLS=1024 4 | DEVICE_WIDTH=16 5 | 6 | ;in nanoseconds 7 | ;#define REFRESH_PERIOD 7800 8 | REFRESH_PERIOD=7800 9 | tCK=.833 ;* 10 | 11 | CL=16 ;* 12 | AL=0 ;* 13 | ;AL=3; needs to be tRCD-1 or 0 14 | ;RL=(CL+AL) 15 | ;WL=(RL-1) 16 | BL=8 ;* 17 | tRAS=32;* 18 | tRCD=16 ;* 19 | tRRD=4 ;* 20 | tRC=48 ;* 21 | tRP=16 ;* 22 | tCCD=4 ;* 23 | tRTP=4 ;* 24 | tWTR=5 ;* 25 | tWR=18 ;* 26 | tRTRS=1; -- RANK PARAMETER, TODO 27 | tRFC=312;* 28 | tFAW=20;* 29 | tCKE=3 ;* 30 | tXP=4 ;* 31 | 32 | tCMD=1 ;* 33 | 34 | IDD0=90; 35 | IDD1=110; 36 | IDD2P=50; 37 | IDD2Q=65; 38 | IDD2N=70; 39 | IDD3Pf=60; 40 | IDD3Ps=60; 41 | IDD3N=75; 42 | IDD4W=230; 43 | IDD4R=230; 44 | IDD5=84; 45 | IDD6=3; 46 | IDD6L=8; 47 | IDD7=270; 48 | 49 | ;same bank 50 | ;READ_TO_PRE_DELAY=(AL+BL/2+max(tRTP,2)-2) 51 | ;WRITE_TO_PRE_DELAY=(WL+BL/2+tWR) 52 | ;READ_TO_WRITE_DELAY=(RL+BL/2+tRTRS-WL) 53 | ;READ_AUTOPRE_DELAY=(AL+tRTP+tRP) 54 | ;WRITE_AUTOPRE_DELAY=(WL+BL/2+tWR+tRP) 55 | ;WRITE_TO_READ_DELAY_B=(WL+BL/2+tWTR);interbank 56 | ;WRITE_TO_READ_DELAY_R=(WL+BL/2+tRTRS-RL);interrank 57 | 58 | Vdd=1.2 ; TODO: double check this 59 | -------------------------------------------------------------------------------- /DRACKSim-Detailed/DRAMSIM2_ini/DDR4_x16_2400_1.ini: -------------------------------------------------------------------------------- 1 | NUM_BANKS=4 2 | NUM_ROWS=512 3 | NUM_COLS=64 4 | DEVICE_WIDTH=16 5 | 6 | ;in nanoseconds 7 | ;#define REFRESH_PERIOD 7800 8 | REFRESH_PERIOD=7800 9 | tCK=.833 ;* 10 | 11 | CL=16 ;* 12 | AL=0 ;* 13 | ;AL=3; needs to be tRCD-1 or 0 14 | ;RL=(CL+AL) 15 | ;WL=(RL-1) 16 | BL=8 ;* 17 | tRAS=32;* 18 | tRCD=16 ;* 19 | tRRD=4 ;* 20 | tRC=48 ;* 21 | tRP=16 ;* 22 | tCCD=4 ;* 23 | tRTP=4 ;* 24 | tWTR=5 ;* 25 | tWR=18 ;* 26 | tRTRS=1; -- RANK PARAMETER, TODO 27 | tRFC=312;* 28 | tFAW=20;* 29 | tCKE=3 ;* 30 | tXP=4 ;* 31 | 32 | tCMD=1 ;* 33 | 34 | IDD0=90; 35 | IDD1=110; 36 | IDD2P=50; 37 | IDD2Q=65; 38 | IDD2N=70; 39 | IDD3Pf=60; 40 | IDD3Ps=60; 41 | IDD3N=75; 42 | IDD4W=230; 43 | IDD4R=230; 44 | IDD5=84; 45 | IDD6=3; 46 | IDD6L=8; 47 | IDD7=270; 48 | 49 | ;same bank 50 | ;READ_TO_PRE_DELAY=(AL+BL/2+max(tRTP,2)-2) 51 | ;WRITE_TO_PRE_DELAY=(WL+BL/2+tWR) 52 | ;READ_TO_WRITE_DELAY=(RL+BL/2+tRTRS-WL) 53 | ;READ_AUTOPRE_DELAY=(AL+tRTP+tRP) 54 | ;WRITE_AUTOPRE_DELAY=(WL+BL/2+tWR+tRP) 55 | ;WRITE_TO_READ_DELAY_B=(WL+BL/2+tWTR);interbank 56 | ;WRITE_TO_READ_DELAY_R=(WL+BL/2+tRTRS-RL);interrank 57 | 58 | Vdd=1.2 ; TODO: double check this 59 | -------------------------------------------------------------------------------- /DRACKSim-Detailed/Ins_Tracer.cpp: -------------------------------------------------------------------------------- 1 | using namespace std; 2 | 3 | #include "pin.H" 4 | #include "instlib.H" 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | std::ofstream TraceFile; 14 | 15 | INSTLIB::ICOUNT icount; 16 | UINT64 ram_count; 17 | 18 | // Get process number from the script 19 | KNOB KnobProcessNumber(KNOB_MODE_WRITEONCE, "pintool", "P", "1", "Process number"); 20 | // Get node number from the script 21 | KNOB KnobNodeNumber(KNOB_MODE_WRITEONCE, "pintool", "N", "1", "Node number"); 22 | // Get number of instructions to skip from the script 23 | KNOB KnobNumInsSkip(KNOB_MODE_WRITEONCE, "pintool", "S", "0", "INS_SKIP"); 24 | // Get number of instructions to skip from the script 25 | KNOB KnobMultiThread(KNOB_MODE_WRITEONCE, "pintool", "T", "0", "Only Multi_thread"); 26 | // Get number of instructions to skip from the script 27 | KNOB KnobMaxIns(KNOB_MODE_WRITEONCE, "pintool", "M", "10000000", "Max Instructions"); 28 | 29 | int Procid; 30 | int Nodeid; 31 | uint64_t INST_to_SKIP=0; 32 | bool only_multi_thread=false; 33 | 34 | using std::dec; 35 | using std::endl; 36 | using std::hex; 37 | using std::ios; 38 | using std::string; 39 | 40 | 41 | // Force each thread's data to be in its own data cache line so that 42 | // multiple threads do not contend for the same data cache line. 43 | // This avoids the false sharing problem. 44 | #define PADSIZE 56 45 | 46 | INT32 numThreads = 0; 47 | 48 | PIN_LOCK lock1; 49 | PIN_MUTEX Mutex, Mutex1; 50 | 51 | // a running count of the instructions 52 | class thread_data_t 53 | { 54 | public: 55 | thread_data_t() : _count(0) {} 56 | UINT64 _count; 57 | UINT8 _pad[PADSIZE]; 58 | }; 59 | 60 | // key for accessing TLS storage in the threads. initialized once in main() 61 | static TLS_KEY tls_key = INVALID_TLS_KEY; 62 | 63 | // function to access thread−specific data 64 | thread_data_t *get_tls(THREADID threadid) 65 | { 66 | thread_data_t *tdata = static_cast(PIN_GetThreadData(tls_key, threadid)); 67 | return tdata; 68 | } 69 | 70 | VOID ThreadStart(THREADID threadid, CONTEXT *ctxt, INT32 flags, VOID *v) 71 | { 72 | PIN_GetLock(&lock1, threadid + 1); 73 | numThreads++; 74 | PIN_ReleaseLock(&lock1); 75 | thread_data_t *tdata = new thread_data_t; 76 | PIN_SetThreadData(tls_key, tdata, threadid); 77 | /* if (PIN_SetThreadData(tls_key, tdata, threadid) == FALSE) 78 | { 79 | cerr << "PIN_SetThreadData failed" << endl; 80 | PIN_ExitProcess(1); 81 | } 82 | */ 83 | } 84 | 85 | 86 | int *main_start; 87 | 88 | struct INST 89 | { 90 | int proc_id; 91 | UINT64 ins_id; 92 | INT64 ins_addr; 93 | int16_t threadid; 94 | INT64 read_opr; 95 | int16_t read_size; 96 | INT64 read_opr2; 97 | int16_t read_size2; 98 | INT64 write_opr; 99 | int16_t write_size; 100 | int32_t RR[4]; 101 | int32_t WR[4]; 102 | bool is_Brach; 103 | bool branch_taken; 104 | bool branch_prediction; 105 | int ins_type; 106 | }; 107 | 108 | 109 | INST *q; 110 | UINT64 *num_ins; 111 | int n=100; 112 | int z=0; 113 | 114 | 115 | UINT64 total_ins=0; 116 | UINT64 ins_to_simulate=0; 117 | 118 | string dir_name; 119 | const char *file; 120 | std::ostringstream tf; 121 | int FileID=0; 122 | UINT32 ins_count=0; 123 | uint64_t max_ins=1000000; 124 | UINT32 a=0; 125 | static VOID InsRef(UINT32 threadid, string ins, ADDRINT addr, ADDRINT read_op, UINT32 read_size, ADDRINT read_op2, UINT32 read_size2, 126 | ADDRINT write_op, UINT32 write_size, int32_t *R, int num_rr, int32_t *W, int num_wr, bool is_Brach, bool branch_taken,UINT32 category,UINT32 opcode) 127 | { 128 | if(only_multi_thread) 129 | { 130 | if(numThreads<2) 131 | { 132 | PIN_MutexLock(&Mutex); 133 | *main_start=*main_start+1; 134 | PIN_MutexUnlock(&Mutex); 135 | ins_count=1; 136 | return; 137 | } 138 | } 139 | 140 | PIN_MutexLock(&Mutex1); 141 | ins_count++; 142 | // if(ins_count%100000==0) 143 | // cout<1024) 216 | return; 217 | temp.RR[i]=R[i]; 218 | // if(temp.RR[i]>155) 219 | // cin.get(); 220 | } 221 | for(int i=num_rr;i<4;i++) 222 | { 223 | temp.RR[i]=-1; 224 | // if(temp.RR[i]>155) 225 | // cin.get(); 226 | } 227 | 228 | for(int i=0;i1024) 231 | return; 232 | temp.WR[i]=W[i]; 233 | // if(temp.WR[i]>155) 234 | // cin.get(); 235 | } 236 | for(int i=num_wr;i<4;i++) 237 | { 238 | temp.WR[i]=-1; 239 | // if(temp.WR[i]>155) 240 | // cin.get(); 241 | } 242 | 243 | if((*num_ins)%100000 == 0) cout<max_ins) 251 | { 252 | TraceFile.close(); 253 | while(1) 254 | { 255 | cout<<*num_ins; 256 | while((*num_ins)>0) 257 | { 258 | sleep(1); 259 | } 260 | if((*num_ins)==0) 261 | { 262 | // remove(file); 263 | FileID++; 264 | tf.str(""); 265 | tf<buf[i].threadid<<" "<buf[i].procid<<" "<buf[i].addr<<" "<buf[i].size<<" bufNo= "<buf[i].index<<"\n"; 385 | *main_start=10; 386 | 387 | PIN_MutexLock(&Mutex1); 388 | 389 | PIN_MutexUnlock(&Mutex1); 390 | 391 | // shmdt(Mutex); 392 | shmdt(main_start); 393 | 394 | // shmctl(ShmID1, IPC_RMID, NULL); 395 | // shmctl(ShmID2, IPC_RMID, NULL); 396 | 397 | PIN_MutexFini(&Mutex); 398 | PIN_MutexFini(&Mutex1); 399 | 400 | TraceFile.close(); 401 | 402 | cout<<"\nIns_count is: "; 403 | cout<<" \nwith funct: "< 2 | #include 3 | 4 | class page 5 | { 6 | 7 | unsigned long page_vaddr = 0; 8 | unsigned long page_paddr = 0; 9 | 10 | public: 11 | bool referenced_bit = 0; 12 | bool TLB_present_bit = 0; 13 | bool in_victim_list_bit = 0; 14 | page(unsigned long p_vaddr, unsigned long p_paddr) 15 | { 16 | page_vaddr=p_vaddr; 17 | page_paddr=p_paddr; 18 | } 19 | 20 | void set_page_map(unsigned long vaddr, unsigned long paddr) 21 | { 22 | page_vaddr = vaddr; 23 | page_paddr = paddr; 24 | } 25 | 26 | unsigned long get_page_vaddr() 27 | { 28 | return page_vaddr; 29 | } 30 | 31 | unsigned long get_page_base_addr(unsigned long vaddr) 32 | { 33 | if(vaddr==page_vaddr) 34 | return page_paddr; 35 | else 36 | return 0L; 37 | } 38 | 39 | unsigned long get_page_physical_addr() 40 | { 41 | return this->page_paddr; 42 | } 43 | 44 | }; 45 | 46 | class pte 47 | { 48 | 49 | std::vector _pte; 50 | unsigned long pte_vaddr = 0L; 51 | 52 | public: 53 | 54 | pte(unsigned long vaddr): pte_vaddr(vaddr) 55 | { 56 | _pte.reserve(512); 57 | } 58 | 59 | void add_in_pte(unsigned long vaddr, unsigned long paddr) 60 | { 61 | if(_pte.size() > 512) 62 | return; 63 | 64 | _pte.emplace_back(vaddr, paddr); 65 | } 66 | 67 | page* access_in_pte(unsigned long vaddr) 68 | { 69 | for (page& p: _pte) 70 | if (p.get_page_vaddr() == vaddr) 71 | return &p; 72 | 73 | return nullptr; 74 | } 75 | 76 | void set_pte_vaddr(unsigned long vaddr) 77 | { 78 | pte_vaddr = vaddr; 79 | } 80 | 81 | unsigned long get_pte_vaddr() 82 | { 83 | return pte_vaddr; 84 | } 85 | }; 86 | 87 | class pmd 88 | { 89 | 90 | std::vector _pmd; 91 | unsigned long pmd_vaddr = 0L; 92 | 93 | public: 94 | 95 | pmd(unsigned long vaddr): pmd_vaddr(vaddr) 96 | { 97 | _pmd.reserve(512); 98 | } 99 | 100 | void add_in_pmd(unsigned long vaddr) 101 | { 102 | if(_pmd.size() > 512) 103 | return; 104 | 105 | _pmd.emplace_back(vaddr); 106 | } 107 | 108 | pte* access_in_pmd(unsigned long vaddr) 109 | { 110 | for (pte& p: _pmd) 111 | if (p.get_pte_vaddr() == vaddr) 112 | return &p; 113 | 114 | return nullptr; 115 | } 116 | 117 | void set_pmd_vaddr(unsigned long vaddr) 118 | { 119 | pmd_vaddr = vaddr; 120 | } 121 | 122 | unsigned long get_pmd_vaddr() 123 | { 124 | return pmd_vaddr; 125 | } 126 | }; 127 | 128 | class pud 129 | { 130 | 131 | std::vector _pud; 132 | unsigned long pud_vaddr = 0L; 133 | 134 | public: 135 | 136 | pud(unsigned long vaddr): pud_vaddr(vaddr) 137 | { 138 | _pud.reserve(512); 139 | } 140 | 141 | void add_in_pud(unsigned long vaddr) 142 | { 143 | if(_pud.size() > 512) 144 | return; 145 | 146 | _pud.emplace_back(vaddr); 147 | } 148 | 149 | pmd* access_in_pud(unsigned long vaddr) 150 | { 151 | for (pmd& p: _pud) 152 | { 153 | if (p.get_pmd_vaddr() == vaddr) 154 | return &p; 155 | } 156 | return nullptr; 157 | } 158 | 159 | void set_pud_vaddr(unsigned long vaddr) 160 | { 161 | pud_vaddr = vaddr; 162 | } 163 | 164 | unsigned long get_pud_vaddr() 165 | { 166 | return pud_vaddr; 167 | } 168 | }; 169 | 170 | class pgd 171 | { 172 | 173 | std::vector _pgd; 174 | unsigned long pgd_vaddr = 0L; 175 | 176 | public: 177 | 178 | pgd() = default; 179 | 180 | pgd(unsigned long vaddr): pgd_vaddr(vaddr) 181 | { 182 | _pgd.reserve(512); 183 | } 184 | 185 | void add_in_pgd(unsigned long vaddr) 186 | { 187 | if(_pgd.size() > 512) 188 | return; 189 | 190 | _pgd.emplace_back(vaddr); 191 | } 192 | 193 | pud* access_in_pgd(unsigned long vaddr) 194 | { 195 | for (pud& p: _pgd) 196 | if (p.get_pud_vaddr() == vaddr) 197 | return &p; 198 | 199 | return nullptr; 200 | } 201 | 202 | void set_pgd_vaddr(unsigned long vaddr) 203 | { 204 | pgd_vaddr = vaddr; 205 | } 206 | 207 | unsigned long get_pgd_vaddr() 208 | { 209 | return pgd_vaddr; 210 | } 211 | }; 212 | 213 | 214 | unsigned long get_page_addr(unsigned long paddr) 215 | { 216 | unsigned long page_addr=paddr & (0xfffffffff000); 217 | page_addr=page_addr>>12; 218 | return page_addr; 219 | } 220 | 221 | void split_vaddr(unsigned long &pgd, unsigned long &pud, unsigned long &pmd, unsigned long &pte, unsigned long &page_offset, unsigned long vaddr) 222 | { 223 | //cout<<"\nvaddr ="<(vaddr)<<"\n"; 224 | 225 | page_offset=vaddr & (0x000000000fff); 226 | 227 | pte=vaddr & (0x0000001ff000); 228 | pte=pte>>12; 229 | 230 | pmd=vaddr & (0x00003fe00000); 231 | pmd=pmd>>21; 232 | 233 | pud=vaddr & (0x0007fc0000000); 234 | pud=pud>>30; 235 | 236 | pgd=vaddr & (0xff8000000000); 237 | pgd=pgd>>39; 238 | //cout<<"\n"<<(pte)<<"\n"; 239 | 240 | } 241 | 242 | 243 | /*int main() 244 | { 245 | 246 | pgd p; 247 | p.add_in_pgd(1); 248 | pud *a=p.access_in_pgd(1); 249 | a->add_in_pud(12); 250 | pmd *b=a->access_in_pud(12); 251 | b->add_in_pmd(123); 252 | pte *c=b->access_in_pmd(123); 253 | c->add_in_pte(10,20); 254 | 255 | a->add_in_pud(23); 256 | p.add_in_pgd(2); 257 | a=p.access_in_pgd(2); 258 | a->add_in_pud(1234); 259 | b=a->access_in_pud(1234); 260 | b->add_in_pmd(456); 261 | c=b->access_in_pmd(456); 262 | c->add_in_pte(20,30); 263 | p.add_in_pgd(3); 264 | a=p.access_in_pgd(3); 265 | a->add_in_pud(34); 266 | 267 | b->add_in_pmd(567); 268 | c->add_in_pte(30,40); 269 | 270 | pgd _pgd[5]; //to accomodate 5-processes, can be dynamically declared as per need 271 | long int vaddr=0x0000001f1ffe; 272 | 273 | long int a,b,c,d,e; 274 | int proc_id=1; 275 | 276 | // long int paddr; 277 | 278 | split_vaddr(a,b,c,d,e,vaddr); 279 | 280 | return 0; 281 | }*/ -------------------------------------------------------------------------------- /DRACKSim-Detailed/Makefile.sim: -------------------------------------------------------------------------------- 1 | 2 | #tell the linker the rpath so that we don't have to muck with LD_LIBRARY_PATH, etc 3 | main: main.cpp 4 | $(CXX) -g -o DRackSim main.cpp -I./boost/ -I./DRAMSim2/ -L./DRAMSim2/ -ldramsim -Wl,-rpath=./DRAMSim2/ -lpthread 5 | 6 | clean: 7 | rm read 8 | -------------------------------------------------------------------------------- /DRACKSim-Detailed/TLB.h: -------------------------------------------------------------------------------- 1 | typedef uint32_t TLB_STATS; 2 | 3 | #ifndef TLB_H 4 | #define TLB_H 5 | 6 | #include 7 | #include 8 | /*! 9 | * @brief Checks if n is a power of 2. 10 | * @returns true if n is power of 2 11 | */ 12 | static inline bool IsPower2(uint32_t n) 13 | { 14 | return ((n & (n - 1)) == 0); 15 | } 16 | 17 | /*! 18 | * @brief Computes floor(log2(n)) 19 | * Works by finding position of MSB set. 20 | * @returns -1 if n == 0. 21 | */ 22 | static inline uint32_t FloorLog2(uint32_t n) 23 | { 24 | uint32_t p = 0; 25 | 26 | if (n == 0) 27 | return -1; 28 | 29 | if (n & 0xffff0000) 30 | { 31 | p += 16; 32 | n >>= 16; 33 | } 34 | if (n & 0x0000ff00) 35 | { 36 | p += 8; 37 | n >>= 8; 38 | } 39 | if (n & 0x000000f0) 40 | { 41 | p += 4; 42 | n >>= 4; 43 | } 44 | if (n & 0x0000000c) 45 | { 46 | p += 2; 47 | n >>= 2; 48 | } 49 | if (n & 0x00000002) 50 | { 51 | p += 1; 52 | } 53 | 54 | return p; 55 | } 56 | 57 | /*! 58 | * @brief Computes floor(log2(n)) 59 | * Works by finding position of MSB set. 60 | * @returns -1 if n == 0. 61 | */ 62 | static inline uint32_t CeilLog2(uint32_t n) 63 | { 64 | return FloorLog2(n - 1) + 1; 65 | } 66 | 67 | /*! 68 | * @brief TLB tag - self clearing on creation 69 | */ 70 | class TLB_TAG 71 | { 72 | private: 73 | uint64_t _tag; 74 | 75 | public: 76 | TLB_TAG(uint64_t tag = 0) { _tag = tag; } 77 | bool operator==(const TLB_TAG &right) const { return _tag == right._tag; } 78 | operator uint64_t() const { return _tag; } 79 | }; 80 | 81 | /*! 82 | * Everything related to Tlb sets 83 | */ 84 | namespace TLB_SET 85 | { 86 | 87 | /*! 88 | * @brief Tlb set direct mapped 89 | */ 90 | class DIRECT_MAPPED 91 | { 92 | private: 93 | TLB_TAG _tag; 94 | int32_t _ASID; 95 | uint64_t _PPN; 96 | int32_t _valid; 97 | 98 | 99 | public: 100 | DIRECT_MAPPED(uint32_t associativity = 1) 101 | { 102 | assert(associativity == 1); 103 | _tag = -1; 104 | _ASID = -1; 105 | _PPN = NULL; 106 | _valid = 0; 107 | } 108 | 109 | void SetAssociativity(uint32_t associativity) { assert(associativity == 1); } 110 | uint32_t GetAssociativity(uint32_t associativity) { return 1; } 111 | 112 | uint32_t Find_And_Access(TLB_TAG tag, int32_t ASID, uint64_t &paddr) 113 | { 114 | if (_valid == 1 && _tag == tag && _ASID == ASID) 115 | { 116 | paddr = _PPN; 117 | return true; 118 | } 119 | else 120 | { 121 | paddr = NULL; 122 | return false; 123 | } 124 | } 125 | void Replace(TLB_TAG tag, int32_t ASID, uint64_t paddr) 126 | { 127 | _tag = tag; 128 | _ASID = ASID; 129 | _PPN = paddr; 130 | _valid = 1; 131 | } 132 | void Invalidate(TLB_TAG tag, int32_t ASID) 133 | { 134 | if (_valid == 1 && _tag == tag && _ASID == ASID) 135 | _valid = 0; 136 | } 137 | void Flush() 138 | { 139 | _tag = 0; 140 | _ASID = 0; 141 | _valid = 0; 142 | _PPN = NULL; 143 | } 144 | }; 145 | 146 | /*! 147 | * @brief Tlb set with round robin replacement 148 | */ 149 | template 150 | class ROUND_ROBIN 151 | { 152 | private: 153 | TLB_TAG _tags[MAX_ASSOCIATIVITY]; 154 | uint32_t _tagsLastIndex; 155 | uint32_t _nextReplaceIndex; 156 | int32_t _ASID[MAX_ASSOCIATIVITY]; 157 | uint64_t _PPN[MAX_ASSOCIATIVITY]; 158 | int32_t _valid[MAX_ASSOCIATIVITY]; 159 | 160 | public: 161 | ROUND_ROBIN(uint32_t associativity = MAX_ASSOCIATIVITY) 162 | : _tagsLastIndex(associativity - 1) 163 | { 164 | assert(associativity <= MAX_ASSOCIATIVITY); 165 | _nextReplaceIndex = _tagsLastIndex; 166 | 167 | for (int32_t index = _tagsLastIndex; index >= 0; index--) 168 | { 169 | _tags[index] = TLB_TAG(0); 170 | _ASID[index] = 0; 171 | _PPN[index] = NULL; 172 | _valid[index] = 0; 173 | } 174 | } 175 | 176 | void SetAssociativity(uint32_t associativity) 177 | { 178 | assert(associativity <= MAX_ASSOCIATIVITY); 179 | _tagsLastIndex = associativity - 1; 180 | _nextReplaceIndex = _tagsLastIndex; 181 | } 182 | uint32_t GetAssociativity(uint32_t associativity) { return _tagsLastIndex + 1; } 183 | 184 | uint32_t Find_and_Access(TLB_TAG tag, int32_t ASID, uint64_t &paddr) 185 | { 186 | // std::cout<= 0; index--) 191 | { 192 | // this is an ugly micro-optimization, but it does cause a 193 | // tighter assembly loop for ARM that way ... 194 | if (_tags[index] == tag && _ASID[index] == ASID && _valid[index] == 1) 195 | { 196 | paddr = _PPN[index]; 197 | goto end; 198 | } 199 | } 200 | result = false; 201 | paddr = NULL; 202 | 203 | end: 204 | return result; 205 | } 206 | 207 | void Replace(TLB_TAG tag, int32_t ASID, uint64_t paddr, uint64_t &victim_paddr, uint64_t victim_tag) 208 | { 209 | // g++ -O3 too dumb to do CSE on following lines?! 210 | const uint32_t index = _nextReplaceIndex; 211 | 212 | victim_paddr=_PPN[index]; 213 | victim_tag=_tags[index]; 214 | 215 | _tags[index] = tag; 216 | _ASID[index] = ASID; 217 | _PPN[index] = paddr; 218 | _valid[index] = 1; 219 | // condition typically faster than modulo 220 | _nextReplaceIndex = (index == 0 ? _tagsLastIndex : index - 1); 221 | } 222 | 223 | void Invalidate(TLB_TAG tag, int32_t ASID) 224 | { 225 | bool result = true; 226 | 227 | for (int32_t index = _tagsLastIndex; index >= 0; index--) 228 | { 229 | // this is an ugly micro-optimization, but it does cause a 230 | // tighter assembly loop for ARM that way ... 231 | if (_tags[index] == tag && _ASID[index] == ASID && _valid[index] == 1) 232 | { 233 | _valid[index] = 0; 234 | } 235 | } 236 | } 237 | 238 | void Flush() 239 | { 240 | for (int32_t index = _tagsLastIndex; index >= 0; index--) 241 | { 242 | _tags[index] = 0; 243 | _ASID[index] = 0; 244 | _PPN[index] = NULL; 245 | _valid[index] = 0; 246 | } 247 | _nextReplaceIndex = _tagsLastIndex; 248 | } 249 | }; 250 | 251 | } // namespace TLB_SET 252 | 253 | namespace TLB_ALLOC 254 | { 255 | typedef enum 256 | { 257 | STORE_ALLOCATE, 258 | STORE_NO_ALLOCATE 259 | } STORE_ALLOCATION; 260 | } 261 | 262 | /*! 263 | * @brief Generic tlb base class; no allocate specialization, no tlb set specialization 264 | */ 265 | class TLB_BASE 266 | { 267 | public: 268 | // types, constants 269 | typedef enum 270 | { 271 | ACCESS_TYPE_LOAD, 272 | ACCESS_TYPE_STORE, 273 | ACCESS_TYPE_NUM 274 | } ACCESS_TYPE; 275 | 276 | protected: 277 | static const uint32_t HIT_MISS_NUM = 2; 278 | TLB_STATS _access[ACCESS_TYPE_NUM][HIT_MISS_NUM]; 279 | 280 | private: 281 | // input params 282 | const std::string _name; 283 | const uint32_t _tlbSize; 284 | const uint32_t _lineSize; 285 | const uint32_t _associativity; 286 | uint32_t _numberOfFlushes; 287 | uint32_t _numberOfResets; 288 | 289 | // computed params 290 | const uint32_t _lineShift; 291 | const uint32_t _setIndexMask; 292 | 293 | TLB_STATS SumAccess(bool hit) const 294 | { 295 | TLB_STATS sum = 0; 296 | 297 | for (uint32_t accessType = 0; accessType < ACCESS_TYPE_NUM; accessType++) 298 | { 299 | sum += _access[accessType][hit]; 300 | } 301 | 302 | return sum; 303 | } 304 | 305 | protected: 306 | uint32_t NumSets() const { return _setIndexMask + 1; } 307 | 308 | public: 309 | 310 | uint32_t get_linesize() 311 | { 312 | return _lineShift; 313 | } 314 | // constructors/destructors 315 | TLB_BASE(std::string name, uint32_t tlbSize, uint32_t lineSize, uint32_t associativity); 316 | 317 | // accessors 318 | uint32_t TlbSize() const { return _tlbSize; } 319 | uint32_t LineSize() const { return _lineSize; } 320 | uint32_t Associativity() const { return _associativity; } 321 | // 322 | TLB_STATS Hits(ACCESS_TYPE accessType) const { return _access[accessType][true]; } 323 | TLB_STATS Misses(ACCESS_TYPE accessType) const { return _access[accessType][false]; } 324 | TLB_STATS Accesses(ACCESS_TYPE accessType) const { return Hits(accessType) + Misses(accessType); } 325 | TLB_STATS Hits() const { return SumAccess(true); } 326 | TLB_STATS Misses() const { return SumAccess(false); } 327 | TLB_STATS Accesses() const { return Hits() + Misses(); } 328 | 329 | TLB_STATS Flushes() const { return _numberOfFlushes; } 330 | TLB_STATS Resets() const { return _numberOfResets; } 331 | 332 | void SplitAddress(const uint64_t addr, TLB_TAG &tag, uint32_t &setIndex) const 333 | { 334 | tag = addr >> _lineShift; 335 | setIndex = tag & _setIndexMask; 336 | } 337 | 338 | void SplitAddress(const uint64_t addr, TLB_TAG &tag, uint32_t &setIndex, uint32_t &lineIndex) const 339 | { 340 | const uint32_t lineMask = _lineSize - 1; 341 | lineIndex = addr & lineMask; 342 | SplitAddress(addr, tag, setIndex); 343 | } 344 | 345 | void IncFlushCounter() 346 | { 347 | _numberOfFlushes += 1; 348 | } 349 | 350 | void IncResetCounter() 351 | { 352 | _numberOfResets += 1; 353 | } 354 | std::string GetName() 355 | { 356 | return _name; 357 | } 358 | 359 | std::ostream &StatsLong(std::ostream &out) const; 360 | }; 361 | 362 | TLB_BASE::TLB_BASE(std::string name, uint32_t tlbSize, uint32_t lineSize, uint32_t associativity) 363 | : _name(name), 364 | _tlbSize(tlbSize), 365 | _lineSize(lineSize), 366 | _associativity(associativity), 367 | _lineShift(FloorLog2(lineSize)), 368 | _setIndexMask((tlbSize / (associativity * lineSize)) - 1) 369 | { 370 | 371 | assert(IsPower2(_lineSize)); 372 | assert(IsPower2(_setIndexMask + 1)); 373 | 374 | for (uint32_t accessType = 0; accessType < ACCESS_TYPE_NUM; accessType++) 375 | { 376 | _access[accessType][false] = 0; 377 | _access[accessType][true] = 0; 378 | } 379 | } 380 | 381 | /*! 382 | * @brief Stats output method 383 | */ 384 | std::ostream &TLB_BASE::StatsLong(std::ostream &out) const 385 | { 386 | const uint32_t headerWidth = 19; 387 | const uint32_t numberWidth = 10; 388 | 389 | out << _name << ":" << std::endl; 390 | 391 | for (uint32_t i = 0; i < ACCESS_TYPE_NUM; i++) 392 | { 393 | const ACCESS_TYPE accessType = ACCESS_TYPE(i); 394 | 395 | std::string type(accessType == ACCESS_TYPE_LOAD ? "Load" : "Store"); 396 | 397 | out << StringString(type + " Hits: ", headerWidth) 398 | << StringInt(Hits(accessType), numberWidth) << std::endl; 399 | out << StringString(type + " Misses: ", headerWidth) 400 | << StringInt(Misses(accessType), numberWidth) << std::endl; 401 | out << StringString(type + " Accesses: ", headerWidth) 402 | << StringInt(Accesses(accessType), numberWidth) << std::endl; 403 | out << StringString(type + " Miss Rate: ", headerWidth) 404 | << StringFlt(100.0 * Misses(accessType) / Accesses(accessType), 2, numberWidth - 1) << "%" << std::endl; 405 | out << std::endl; 406 | } 407 | 408 | out << StringString("Total Hits: ", headerWidth, ' ') 409 | << StringInt(Hits(), numberWidth) << std::endl; 410 | out << StringString("Total Misses: ", headerWidth, ' ') 411 | << StringInt(Misses(), numberWidth) << std::endl; 412 | out << StringString("Total Accesses: ", headerWidth, ' ') 413 | << StringInt(Accesses(), numberWidth) << std::endl; 414 | out << StringString("Total Miss Rate: ", headerWidth, ' ') 415 | << StringFlt(100.0 * Misses() / Accesses(), 2, numberWidth - 1) << "%" << std::endl; 416 | 417 | out << StringString("Flushes: ", headerWidth, ' ') 418 | << StringInt(Flushes(), numberWidth) << std::endl; 419 | out << StringString("Stat Resets: ", headerWidth, ' ') 420 | << StringInt(Resets(), numberWidth) << std::endl; 421 | 422 | out << std::endl; 423 | 424 | return out; 425 | } 426 | 427 | /// ostream operator for TLB_BASE 428 | std::ostream &operator<<(std::ostream &out, const TLB_BASE &tlbBase) 429 | { 430 | return tlbBase.StatsLong(out); 431 | } 432 | 433 | /*! 434 | * @brief Templated tlb class with specific tlb set allocation policies 435 | * 436 | * All that remains to be done here is allocate and deallocate the right 437 | * type of tlb sets. 438 | */ 439 | template 440 | class TLB : public TLB_BASE 441 | { 442 | private: 443 | SET _sets[MAX_SETS]; 444 | 445 | public: 446 | // constructors/destructors 447 | TLB(std::string name, uint32_t tlbSize, uint32_t lineSize, uint32_t associativity) 448 | : TLB_BASE(name, tlbSize, lineSize, associativity) 449 | { 450 | assert(NumSets() <= MAX_SETS); 451 | 452 | for (uint32_t i = 0; i < NumSets(); i++) 453 | { 454 | _sets[i].SetAssociativity(associativity); 455 | } 456 | } 457 | 458 | // modifiers 459 | /// Tlb access at addr 460 | bool AccessTLB(uint64_t addr, ACCESS_TYPE accessType, int32_t ASID, uint64_t &paddr); 461 | void ReplaceTLB(uint64_t addr, ACCESS_TYPE accessType, int32_t ASID, uint64_t &paddr, uint64_t &victim_paddr, uint64_t &victim_vaddr); 462 | void InvalidateTLB(uint64_t addr, int32_t ASID); 463 | void Flush(); 464 | void ResetStats(); 465 | }; 466 | 467 | /*! 468 | * @return true if accessed tlb line hits 469 | */ 470 | template 471 | bool TLB::AccessTLB(uint64_t addr, ACCESS_TYPE accessType, int32_t ASID, uint64_t &paddr) 472 | { 473 | TLB_TAG tag; 474 | uint32_t setIndex; 475 | 476 | SplitAddress(addr, tag, setIndex); 477 | 478 | SET &set = _sets[setIndex]; 479 | 480 | bool hit = set.Find_and_Access(tag, ASID, paddr); 481 | 482 | // on miss, loads always allocate, stores optionally 483 | 484 | _access[accessType][hit]++; 485 | 486 | return hit; 487 | } 488 | /*! 489 | * @return true if accessed tlb line hits 490 | */ 491 | template 492 | void TLB::ReplaceTLB(uint64_t addr, ACCESS_TYPE accessType, int32_t ASID, uint64_t &paddr, uint64_t &victim_paddr, uint64_t &victim_vaddr) 493 | { 494 | TLB_TAG tag; 495 | uint32_t setIndex; 496 | uint64_t victim_tag; 497 | 498 | SplitAddress(addr, tag, setIndex); 499 | 500 | SET &set = _sets[setIndex]; 501 | 502 | // bool hit = set.Find(tag,ASID,paddr); 503 | 504 | // on miss, loads always allocate, stores optionally 505 | if ((accessType == ACCESS_TYPE_LOAD || STORE_ALLOCATION == TLB_ALLOC::STORE_ALLOCATE)) 506 | { 507 | set.Replace(tag, ASID, paddr, victim_paddr, victim_tag); 508 | victim_vaddr=victim_tag<<(get_linesize()); 509 | } 510 | 511 | //_access[accessType][hit]++; 512 | 513 | // return hit; 514 | } 515 | 516 | template 517 | void TLB::InvalidateTLB(uint64_t addr, int32_t ASID) 518 | { 519 | TLB_TAG tag; 520 | uint32_t setIndex; 521 | 522 | SplitAddress(addr, tag, setIndex); 523 | 524 | SET &set = _sets[setIndex]; 525 | 526 | // bool hit = set.Find(tag,ASID,paddr); 527 | 528 | // on miss, loads always allocate, stores optionally 529 | set.Invalidate(tag, ASID); 530 | 531 | // if ((accessType == ACCESS_TYPE_LOAD || STORE_ALLOCATION == TLB_ALLOC::STORE_ALLOCATE)) 532 | // { 533 | // set.Replace(tag,ASID,paddr); 534 | // } 535 | 536 | //_access[accessType][hit]++; 537 | 538 | // return hit; 539 | } 540 | 541 | template 542 | void TLB::Flush() 543 | { 544 | for (int32_t index = NumSets(); index >= 0; index--) 545 | { 546 | SET &set = _sets[index]; 547 | set.Flush(); 548 | } 549 | IncFlushCounter(); 550 | } 551 | 552 | template 553 | void TLB::ResetStats() 554 | { 555 | for (uint32_t accessType = 0; accessType < ACCESS_TYPE_NUM; accessType++) 556 | { 557 | _access[accessType][false] = 0; 558 | _access[accessType][true] = 0; 559 | } 560 | IncResetCounter(); 561 | } 562 | 563 | // define shortcuts 564 | #define TLB_DIRECT_MAPPED(MAX_SETS, ALLOCATION) TLB 565 | #define TLB_ROUND_ROBIN(MAX_SETS, MAX_ASSOCIATIVITY, ALLOCATION) \ 566 | TLB, MAX_SETS, ALLOCATION> 567 | 568 | // TLB Data Structures For 'm' nodes each with 'n' cores 569 | namespace ITLB 570 | { 571 | // instruction TLB: 4 kB pages, 128 entries, 8-way associative 572 | const uint32_t lineSize = 4 * KILO; 573 | const uint32_t tlbSize = 512 * lineSize; 574 | const uint32_t associativity = 8; 575 | const TLB_ALLOC::STORE_ALLOCATION allocation = TLB_ALLOC::STORE_ALLOCATE; 576 | 577 | const uint32_t max_sets = tlbSize / (lineSize * associativity); 578 | const uint32_t max_associativity = associativity; 579 | 580 | typedef TLB_ROUND_ROBIN(max_sets, max_associativity, allocation) TLB; 581 | } 582 | extern ITLB::TLB itlbs[core_count * num_nodes]; 583 | #define BOOST_PP_LOCAL_LIMITS (0, ((core_count * num_nodes) - 1)) 584 | #define BOOST_PP_LOCAL_MACRO(n) ITLB::TLB("ITLB" #n, ITLB::tlbSize, ITLB::lineSize, ITLB::associativity), 585 | ITLB::TLB itlbs[] = 586 | { 587 | #include "boost/preprocessor/iteration/detail/local.hpp" 588 | }; 589 | 590 | namespace DTLB 591 | { 592 | // data TLB: 4 kB pages, 64 entries, 4-way associative 593 | const uint32_t lineSize = 4 * KILO; 594 | const uint32_t tlbSize = 1024 * lineSize; 595 | const uint32_t associativity = 4; 596 | const TLB_ALLOC::STORE_ALLOCATION allocation = TLB_ALLOC::STORE_ALLOCATE; 597 | 598 | const uint32_t max_sets = tlbSize / (lineSize * associativity); 599 | const uint32_t max_associativity = associativity; 600 | 601 | typedef TLB_ROUND_ROBIN(max_sets, max_associativity, allocation) TLB; 602 | } 603 | extern DTLB::TLB dtlbs[core_count * num_nodes]; 604 | #define BOOST_PP_LOCAL_LIMITS (0, ((core_count * num_nodes) - 1)) 605 | #define BOOST_PP_LOCAL_MACRO(n) DTLB::TLB("DTLB" #n, DTLB::tlbSize, DTLB::lineSize, DTLB::associativity), 606 | DTLB::TLB dtlbs[] = 607 | { 608 | #include "boost/preprocessor/iteration/detail/local.hpp" 609 | }; 610 | 611 | #endif // PIN_TLB_H 612 | -------------------------------------------------------------------------------- /DRACKSim-Detailed/allocator.cpp: -------------------------------------------------------------------------------- 1 | static int round_robin_last=1; 2 | 3 | //pool allocation policies 4 | 5 | 6 | //round-robin memory pool allocation 7 | int round_robin_pool_select() 8 | { 9 | if(num_mem_pools==1) 10 | return 0; 11 | 12 | out<<"\n\n=======Window-"<num_mem_pools) 18 | { 19 | round_robin_last=round_robin_last%num_mem_pools; 20 | } 21 | round_robin_last++; 22 | out<<"\nSelected pool is- "<<(round_robin_last-2); 23 | return (round_robin_last-2); 24 | } 25 | 26 | 27 | 28 | // static int node_round_robin_last[num_nodes]; 29 | // //node-wise round-robin memory pool allocation 30 | // int per_node_round_robin_pool_select(int node_no) 31 | // { 32 | // out<<"\n\n=======Window-"<num_mem_pools) 38 | // { 39 | // node_round_robin_last[node_no]=node_round_robin_last[node_no]%num_mem_pools; 40 | // } 41 | // node_round_robin_last[node_no]++; 42 | // out<<"\nSelected pool for Node:"<num_mem_pools) 88 | // { 89 | // round_robin_last=round_robin_last%num_mem_pools; 90 | // } 91 | // round_robin_last++; 92 | // out<<"\nSelected pool is- "<<(round_robin_last-2); 93 | // alloc_count[round_robin_last-2]++; 94 | // return (round_robin_last-2); 95 | // } 96 | // else 97 | // { 98 | // int min=num_mem_pools; 99 | // for(int i=0;inum_mem_pools) 142 | // { 143 | // round_robin_last=round_robin_last%num_mem_pools; 144 | // } 145 | // round_robin_last++; 146 | // out<<"\nSelected pool is- "<<(round_robin_last-2); 147 | // alloc_count[round_robin_last-2]++; 148 | // last_alloc=round_robin_last-2; 149 | // return (round_robin_last-2); 150 | // } 151 | // else 152 | // { 153 | // for(int z=0;z0) 162 | // { 163 | // for(int j=0;j limit); 210 | 211 | // out<<"\n\n=======Window-"<max && node_epoch_chunk_count[i]!=0) 248 | // max=node_epoch_chunk_count[i]; 249 | 250 | // if(node_epoch_chunk_count[i]0) 255 | // num_nodes_rqstd_chunks++; 256 | // } 257 | 258 | // invalid<<"\nMax Count-"<high_limit && node_epoch_chunk_count[i]!=0) 285 | // { 286 | // if(node_chunk_request_size[i]!=16) 287 | // node_chunk_request_size[i]=node_chunk_request_size[i]+2; 288 | // else 289 | // node_chunk_request_size[i]=16; 290 | // } 291 | 292 | // if(node_epoch_chunk_count[i]0) 333 | // { 334 | // if(node_epoch_chunk_count[i]>max_chunk_count) 335 | // max_chunk_count=node_epoch_chunk_count[i]; 336 | 337 | // if(node_epoch_chunk_count[i]0) 344 | // { 345 | // if(node_epoch_access_count[i]>max_mem_rqsts) 346 | // max_mem_rqsts=node_epoch_access_count[i]; 347 | 348 | // if(node_epoch_access_count[i]low_limit_mem_rqsts && node_epoch_access_count[i]<=high_limit_mem_rqsts) 386 | // access_rate=0; 387 | // else if(node_epoch_access_count[i]>high_limit_mem_rqsts) 388 | // access_rate=1; 389 | 390 | // //find chunk request frequency range of a node 391 | // if(node_epoch_chunk_count[i]>high_limit_chunk_count) 392 | // chunk_count=1; 393 | // else if(node_epoch_chunk_count[i]>=low_limit_chunk_count && node_epoch_chunk_count[i]<=high_limit_chunk_count) 394 | // chunk_count=0; 395 | // else if(node_epoch_chunk_count[i]8 && node_chunk_request_size[i]<=12) 403 | // chunk_size=0; 404 | // else if(node_chunk_request_size[i]>12) 405 | // chunk_size=1; 406 | // */ 407 | // //modify size 408 | 409 | 410 | // if(node_epoch_access_count[i]>0) 411 | // { 412 | // if(access_rate==-1 && chunk_count==-1) 413 | // node_chunk_request_size[i]=node_chunk_request_size[i]-4; 414 | // else if(access_rate==-1 && chunk_count== 0) 415 | // node_chunk_request_size[i]=node_chunk_request_size[i]-2; 416 | // else if(access_rate==-1 && chunk_count== 1) 417 | // node_chunk_request_size[i]=node_chunk_request_size[i]; 418 | // else if(access_rate== 0 && chunk_count==-1) 419 | // node_chunk_request_size[i]=node_chunk_request_size[i]+2; 420 | // else if(access_rate== 0 && chunk_count== 0) 421 | // node_chunk_request_size[i]=node_chunk_request_size[i]; //no-change 422 | // else if(access_rate== 0 && chunk_count== 1) 423 | // node_chunk_request_size[i]=node_chunk_request_size[i]-2; 424 | // else if(access_rate== 1 && chunk_count==-1) 425 | // node_chunk_request_size[i]=node_chunk_request_size[i]; 426 | // else if(access_rate== 1 && chunk_count== 0) 427 | // node_chunk_request_size[i]=node_chunk_request_size[i]+2; 428 | // else if(access_rate== 1 && chunk_count== 1) 429 | // node_chunk_request_size[i]=node_chunk_request_size[i]+4; 430 | 431 | // if(node_chunk_request_size[i]>16) 432 | // node_chunk_request_size[i]=16; 433 | // else if(node_chunk_request_size[i]<4) 434 | // node_chunk_request_size[i]=4; 435 | // } 436 | 437 | // invalid<<" New_N"<= ((MAX_COUNTER + 1)/2)) ? 1 : 0; 20 | 21 | return prediction; 22 | } 23 | 24 | void last_branch_result(uint64_t ip, int taken, int node_id, int core_id) 25 | { 26 | uint32_t hash = ip % BTB_Prime; 27 | 28 | if (taken && (branch_pred_table[node_id][core_id][hash] < MAX_COUNTER)) 29 | branch_pred_table[node_id][core_id][hash]++; 30 | else if ((taken == 0) && (branch_pred_table[node_id][core_id][hash] > 0)) 31 | branch_pred_table[node_id][core_id][hash]--; 32 | } 33 | -------------------------------------------------------------------------------- /DRACKSim-Detailed/clear.sh: -------------------------------------------------------------------------------- 1 | ipcs | nawk -v u=`whoami` '/Shared/,/^$/{ if($6==0&&$3==u) print "ipcrm shm",$2,";"}/Semaphore/,/^$/{ if($3==u) print "ipcrm sem",$2,";"}' | /bin/sh 2 | -------------------------------------------------------------------------------- /DRACKSim-Detailed/clear_sm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | source ./clear.sh 3 | exit 4 | -------------------------------------------------------------------------------- /DRACKSim-Detailed/icount.H: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2002-2019 Intel Corporation. 3 | * 4 | * This software is provided to you as Sample Source Code as defined in the accompanying 5 | * End User License Agreement for the Intel(R) Software Development Products ("Agreement") 6 | * section 1.L. 7 | * 8 | * This software and the related documents are provided as is, with no express or implied 9 | * warranties, other than those that are expressly stated in the License. 10 | */ 11 | 12 | #ifndef ICOUNT_H 13 | #define ICOUNT_H 14 | 15 | namespace INSTLIB 16 | { 17 | 18 | /*! @defgroup ICOUNT 19 | Instrumentation for counting instruction execution 20 | */ 21 | 22 | /*! @ingroup ICOUNT 23 | The example below can be found in InstLibExamples/icount.cpp 24 | 25 | \include icount.cpp 26 | */ 27 | class ICOUNT 28 | { 29 | public: 30 | ICOUNT() 31 | { 32 | _mode = ModeInactive; 33 | 34 | /* Allocate 64 byte aligned data for the statistics. */ 35 | _space = new char [(ISIMPOINT_MAX_THREADS+1)*sizeof(threadStats) -1]; 36 | 37 | ADDRINT space = VoidStar2Addrint(_space); 38 | ADDRINT align_1 = static_cast (cacheLineSize-1); 39 | _stats = reinterpret_cast((space+align_1) & ~align_1); 40 | memset (_stats, 0, ISIMPOINT_MAX_THREADS*sizeof(threadStats)); 41 | }; 42 | 43 | ~ICOUNT() 44 | { 45 | delete [] _space; 46 | } 47 | /*! @ingroup ICOUNT 48 | @return Total number of instructions executed. (But see @ref mode for what this means). 49 | */ 50 | 51 | UINT32 MultiThreadCount() const 52 | { 53 | UINT64 multithreadCount=0; 54 | ASSERTX(Mode() == ModeBoth); 55 | for(UINT64 i=0; icount - s->repDuplicateCount; 86 | } 87 | 88 | /*! @ingroup ICOUNT 89 | Set the current count 90 | */ 91 | VOID SetCount(UINT64 count, THREADID tid = 0) 92 | { 93 | ASSERTX(_mode != ModeInactive); 94 | ASSERTX(tid < ISIMPOINT_MAX_THREADS); 95 | _stats[tid].count = count; 96 | _stats[tid].repDuplicateCount = 0; 97 | } 98 | 99 | /*! @ingroup ICOUNT 100 | * The mode used for counting REP prefixed instructions. 101 | */ 102 | enum mode { 103 | ModeInactive = -1, 104 | ModeNormal = 0, /**< Count all instructions, each REP "iteration" adds 1 */ 105 | ModeBoth /**< Provide both the normal count and a count in which REP prefixed 106 | instructions are only counted once. */ 107 | }; 108 | 109 | /*! @ingroup ICOUNT 110 | * @return the mode of the ICOUNT object. 111 | */ 112 | mode Mode() const 113 | { 114 | return _mode; 115 | } 116 | 117 | /*! @ingroup ICOUNT 118 | Activate the counter, must be called before PIN_StartProgram. 119 | @param [in] mode Determine the way in which REP prefixed operations are counted. By default (ICOUNT::ModeNormal), 120 | REP prefixed instructions are counted as if REP is an implicit loop. By passing 121 | ICOUNT::ModeRepsCountedOnlyOnce you can have the counter treat each REP as only one dynamic instruction. 122 | */ 123 | VOID Activate(mode m = ModeNormal) 124 | { 125 | ASSERTX(_mode == ModeInactive); 126 | _mode = m; 127 | TRACE_AddInstrumentFunction(Trace, this); 128 | } 129 | 130 | private: 131 | enum { 132 | cacheLineSize = 64 133 | }; 134 | 135 | static VOID Trace(TRACE trace, VOID * icount) 136 | { 137 | #if (defined(TARGET_IA32) || defined(TARGET_IA32E)) 138 | ICOUNT const * ic = reinterpret_cast(icount); 139 | mode m = ic->Mode(); 140 | #endif 141 | for (BBL bbl = TRACE_BblHead(trace); BBL_Valid(bbl); bbl = BBL_Next(bbl)) 142 | { 143 | BBL_InsertCall(bbl, IPOINT_ANYWHERE, 144 | AFUNPTR(Advance), 145 | IARG_FAST_ANALYSIS_CALL, 146 | IARG_ADDRINT, icount, 147 | IARG_ADDRINT, ADDRINT(BBL_NumIns(bbl)), 148 | IARG_THREAD_ID, 149 | IARG_END); 150 | 151 | // REP prefixed instructions are an IA-32 and Intel(R) 64 feature 152 | #if (defined(TARGET_IA32) || defined(TARGET_IA32E)) 153 | if (m == ModeBoth) 154 | { // Check whether there are any REP prefixed instructions in the BBL 155 | // and, if so, subtract out their execution unless it is the first 156 | // iteration. 157 | for (INS ins = BBL_InsHead(bbl); 158 | INS_Valid(ins); 159 | ins = INS_Next(ins)) 160 | { 161 | if (INS_HasRealRep(ins)) 162 | { 163 | INS_InsertCall(ins, IPOINT_BEFORE, 164 | AFUNPTR(CountDuplicates), 165 | IARG_FAST_ANALYSIS_CALL, 166 | IARG_ADDRINT, icount, 167 | IARG_FIRST_REP_ITERATION, 168 | IARG_THREAD_ID, 169 | IARG_END); 170 | 171 | } 172 | } 173 | } 174 | #endif 175 | } 176 | } 177 | 178 | static VOID PIN_FAST_ANALYSIS_CALL Advance(ICOUNT * ic, ADDRINT c, THREADID tid) 179 | { 180 | // ASSERTX(tid < ISIMPOINT_MAX_THREADS); 181 | ic->_stats[tid].count += c; 182 | } 183 | 184 | // Accumulate the count of REP prefixed executions which aren't the first iteration. 185 | // 186 | // We are assuming that this will be inlined, and is small, so there is no point 187 | // in guarding it with an InsertIf call testing IARG_FIRST_REP_ITERATION. 188 | static VOID PIN_FAST_ANALYSIS_CALL CountDuplicates(ICOUNT * ic, BOOL first, THREADID tid) 189 | { 190 | // ASSERTX(tid < ISIMPOINT_MAX_THREADS); 191 | ic->_stats[tid].repDuplicateCount += !first; 192 | } 193 | 194 | struct threadStats { 195 | UINT64 count; 196 | UINT64 repDuplicateCount; /* Number of REP iterations after the first */ 197 | char padding [cacheLineSize - 2*sizeof(UINT64)]; /* Expand so we can cache align this. 198 | * We want to avoid false sharing of the stats between threads. 199 | */ 200 | }; 201 | 202 | threadStats * _stats; 203 | char * _space; 204 | mode _mode; 205 | }; 206 | } 207 | #endif 208 | -------------------------------------------------------------------------------- /DRACKSim-Detailed/instlib.H: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2002-2020 Intel Corporation. 3 | * 4 | * This software is provided to you as Sample Source Code as defined in the accompanying 5 | * End User License Agreement for the Intel(R) Software Development Products ("Agreement") 6 | * section 1.L. 7 | * 8 | * This software and the related documents are provided as is, with no express or implied 9 | * warranties, other than those that are expressly stated in the License. 10 | */ 11 | 12 | #ifndef INSTLIB_H 13 | #define INSTLIB_H 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #define ISIMPOINT_MAX_THREADS 160 23 | 24 | #include "filter.H" 25 | #include "skipper.H" 26 | #include "icount.H" 27 | #include "follow_child.H" 28 | 29 | extern "C" 30 | { 31 | #include "xed-interface.h" 32 | } 33 | 34 | #endif 35 | -------------------------------------------------------------------------------- /DRACKSim-Detailed/makefile: -------------------------------------------------------------------------------- 1 | sim: 2 | make -f Makefile.sim 3 | 4 | pin: 5 | make -f makefile.pin obj-intel64/Ins_Tracer.so TARGET=intel64 -------------------------------------------------------------------------------- /DRACKSim-Detailed/makefile.pin: -------------------------------------------------------------------------------- 1 | ############################################################## 2 | # 3 | # DO NOT EDIT THIS FILE! 4 | # 5 | ############################################################## 6 | 7 | # If the tool is built out of the kit, PIN_ROOT must be specified in the make invocation and point to the kit root. 8 | ifdef PIN_ROOT 9 | CONFIG_ROOT := $(PIN_ROOT)/source/tools/Config 10 | else 11 | CONFIG_ROOT := ../Config 12 | endif 13 | include $(CONFIG_ROOT)/makefile.config 14 | include makefile.rules 15 | include $(TOOLS_ROOT)/Config/makefile.default.rules 16 | 17 | ############################################################## 18 | # 19 | # DO NOT EDIT THIS FILE! 20 | # 21 | ############################################################## 22 | -------------------------------------------------------------------------------- /DRACKSim-Detailed/mem_defs.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "MMU.cpp" 3 | #include "mmap.cpp" 4 | #include "DRAMSim2/DRAMSim.h" 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | ofstream track; 12 | 13 | static int epoch_num = 0; 14 | 15 | #define num_nodes 1 16 | #define num_mem_pools 1 17 | #define core_count 4 18 | 19 | #define local_mem_size 4 20 | #define remote_mem_size 4 21 | 22 | // hardware DRAM units of same size in MBs (used by DRAMSim2) 23 | #define local_DRAM_size (local_mem_size * pow(2.0, 10.0)) 24 | #define remote_DRAM_size (remote_mem_size * pow(2.0, 10.0)) 25 | 26 | pthread_mutex_t lock_update; 27 | pthread_mutex_t lock_queue; 28 | pthread_mutex_t lock_mem; 29 | pthread_mutex_t lock; 30 | 31 | // One page-table for each process 32 | pgd _pgd[num_nodes]; // a max of 1-process assumed per node, should be increased 33 | // when more processes are simulated 34 | 35 | // instruction object structure 36 | struct INST 37 | { 38 | int proc_id; 39 | uint64_t ins_id; 40 | int64_t addr; // ns_addr 41 | int16_t threadid; 42 | int64_t read_opr; 43 | int16_t read_size; 44 | int64_t read_opr2; 45 | int16_t read_size2; 46 | int64_t write_opr; 47 | int16_t write_size; 48 | int32_t RR[4]; 49 | int32_t WR[4]; 50 | bool is_Branch; 51 | bool branch_taken; 52 | bool branch_miss_predicted; 53 | int16_t ins_type; 54 | }; 55 | 56 | struct remote_memory_access 57 | { 58 | uint64_t mem_access_addr; 59 | int source; 60 | int dest; 61 | uint64_t cycle; 62 | uint64_t miss_cycle_num; 63 | uint64_t memory_access_completion_cycle; 64 | uint64_t trans_id; 65 | bool isWrite; 66 | bool isRDMA; 67 | int RDMA_segment_id=0; 68 | }; 69 | 70 | // memory request/response to node/pool from pool/node as a network packet 71 | struct packet 72 | { 73 | remote_memory_access mem; 74 | int is_transmitting; 75 | int is_processing; 76 | int64_t in_nic_source; 77 | int64_t out_nic_source; 78 | int64_t in_switch_input_port; 79 | int64_t out_switch_input_port; 80 | int64_t in_switch_output_port; 81 | int64_t out_switch_output_port; 82 | int64_t in_nic_dest; 83 | int64_t out_nic_dest; 84 | }; 85 | 86 | using namespace DRAMSim; 87 | 88 | // used to store average local memory access cycle time for different nodes 89 | long unsigned total_core_local_time[num_nodes][core_count]={0}; 90 | long unsigned total_core_local_count[num_nodes][core_count]={0}; 91 | // used to store average remote memory access cycle time for different nodes 92 | long unsigned total_core_remote_time[num_nodes][core_count]={0}; 93 | long unsigned total_core_remote_count[num_nodes][core_count]={0}; 94 | // used to store average remote memory access for different pools 95 | long unsigned total_remote_pool_time[num_mem_pools]={0}; 96 | long unsigned total_remote_pool_count[num_mem_pools]={0}; 97 | 98 | 99 | // total number of memory accesses at each memory unit 100 | unsigned long _total_access_count[num_mem_pools]; 101 | 102 | // address spaces for different type of memory units 103 | local_addr_space L[num_nodes]; 104 | remote_addr_space R[num_mem_pools]; 105 | 106 | // total number of completed memory accesses at each memory unit 107 | uint64_t completed_trans_local[num_nodes]; 108 | uint64_t completed_trans_node_to_remote[num_nodes]; 109 | uint64_t completed_trans_remote[num_mem_pools]; 110 | uint64_t local_writebacks[num_nodes]={0}; 111 | 112 | // total cycle count for all memory accesses at each memory unit 113 | uint64_t total_node_local[num_nodes]; 114 | uint64_t total_remote_pool[num_mem_pools]; 115 | uint64_t total_node_remote[num_nodes]; 116 | 117 | // used to store core id of a memory request 118 | unordered_map core_map[num_nodes]; 119 | 120 | uint64_t total = 0; 121 | uint64_t avg = 0; 122 | uint64_t completed = 0; 123 | 124 | class some_object 125 | { 126 | public: 127 | void read_complete(int, uint64_t, uint64_t, uint64_t, int, unsigned, uint64_t, uint64_t); 128 | void write_complete(int, uint64_t, uint64_t, uint64_t, int, unsigned, uint64_t, uint64_t); 129 | int add_one_and_run(DRAMSim::MultiChannelMemorySystem *mem, uint64_t addr, bool isWrite, uint64_t tid, uint64_t start_cycle, uint64_t miss_cycle, int nid); 130 | }; 131 | 132 | // used to store the total cycle number used at each memory unit 133 | uint64_t total_cycle[num_nodes + num_mem_pools]; 134 | 135 | // used for just printing "simulation going on" message rather than memory access complete message 136 | uint64_t x = 0; 137 | 138 | // to maintain count of memory accesses with different access time on different memory pools 139 | uint64_t U_100ns[num_mem_pools]; 140 | uint64_t B_100_300ns[num_mem_pools]; 141 | uint64_t B_300_500ns[num_mem_pools]; 142 | uint64_t B_500_700ns[num_mem_pools]; 143 | uint64_t B_700_1000ns[num_mem_pools]; 144 | uint64_t G_1000ns[num_mem_pools]; 145 | 146 | void to_trans_layer(remote_memory_access, deque *); 147 | void add_local_access_time(remote_memory_access); 148 | void add_remote_access_time(remote_memory_access); 149 | deque rx_packet_queue_pool[num_mem_pools]; // receiving queue 150 | 151 | vector memory_completion_queue[num_nodes]; 152 | mapRDMA_response_queue[num_nodes]; 153 | int RDMA_PKT_SIZE=64; //(bytes) 154 | uint64_t RDMA_packet_segments =0; 155 | //pagesize/RDMA_pkt_size 156 | 157 | // function to update end-cycle of each memory request once a transaction is completed 158 | void update(int node_id, int mem_id, uint64_t cycle, uint64_t tid, uint64_t start_cycle, uint64_t miss_cycle, uint64_t address, bool write) 159 | { 160 | // uint64_t size=0; 161 | int i = mem_id - 1; 162 | total_cycle[i] = cycle; 163 | // It is an RDMA request 164 | if(tid>1e10) 165 | { 166 | address = address&(0xfffffffff000); 167 | RDMA_response_queue[node_id][address]++; 168 | if(RDMA_response_queue[node_id][address]%((int)ceil((double)64/(double)RDMA_packet_segments))==0 || RDMA_response_queue[node_id][address]==64) 169 | { 170 | #ifdef MEM_LOG 171 | invalid<<"\nRemote memory Page Request(4KB)"<<" Source Node-id: C"<= num_nodes && i < (num_nodes + num_mem_pools)) 223 | { 224 | #ifdef MEM_LOG 225 | invalid<<"\nRemote memory access completed, response to be sent "<<" Node-id: C"< 100 && diff < 300) 257 | { 258 | B_100_300ns[i - num_nodes]++; 259 | } 260 | else if (diff > 300 && diff < 500) 261 | { 262 | B_300_500ns[i - num_nodes]++; 263 | } 264 | else if (diff > 500 && diff < 700) 265 | { 266 | B_500_700ns[i - num_nodes]++; 267 | } 268 | else if (diff > 700 && diff < 1000) 269 | { 270 | B_700_1000ns[i - num_nodes]++; 271 | } 272 | else if (diff > 1000) 273 | { 274 | G_1000ns[i - num_nodes]++; 275 | } 276 | 277 | // track<<"\nmem-id:"<<(i-num_nodes)<<" tid:"<addTransaction(isWrite, addr, nid, tid, start_cycle, miss_cycle); 329 | return 0; 330 | } 331 | 332 | // used to store total number of accesses in local nodes and remote pools 333 | unsigned long long local_access[num_nodes]; 334 | unsigned long long remote_access[num_nodes]; 335 | unsigned long long count_access[num_nodes]; 336 | 337 | // used to store each nodes total memory access count at each remote pool 338 | unsigned long per_pool_access_count[num_nodes][num_mem_pools]; 339 | 340 | some_object obj; 341 | 342 | TransactionCompleteCB *read_cb = new Callback(&obj, &some_object::read_complete); 343 | TransactionCompleteCB *write_cb = new Callback(&obj, &some_object::write_complete); 344 | 345 | /* Declare DRAMs memory to simulate */ 346 | MultiChannelMemorySystem *local_mem[num_nodes]; 347 | MultiChannelMemorySystem *remote_mem[num_mem_pools]; 348 | 349 | 350 | //std::ofstream ResultsFile; 351 | 352 | void declare_memory_variables(string dir) 353 | { 354 | // initializing stat variables 355 | 356 | // node wise number of local, remote and total memory access 357 | for (int i = 0; i < num_nodes; i++) 358 | { 359 | local_access[i] = 0; 360 | count_access[i] = 0; 361 | remote_access[i] = 0; 362 | 363 | total_node_local[i] = 0; 364 | 365 | completed_trans_local[i] = 0; 366 | 367 | total_node_remote[i] = 0; 368 | 369 | completed_trans_node_to_remote[i] = 0; 370 | } 371 | 372 | for (int i = 0; i < num_mem_pools; i++) 373 | { 374 | total_remote_pool[i] = 0; 375 | 376 | completed_trans_remote[i] = 0; 377 | _total_access_count[i] = 0; 378 | 379 | U_100ns[i] = 0; 380 | B_100_300ns[i] = 0; 381 | B_300_500ns[i] = 0; 382 | B_500_700ns[i] = 0; 383 | B_700_1000ns[i] = 0; 384 | G_1000ns[i] = 0; 385 | } 386 | 387 | for (int i = 0; i < num_nodes; i++) 388 | { 389 | for (int j = 0; j < num_mem_pools; j++) 390 | { 391 | per_pool_access_count[i][j] = 0; 392 | } 393 | } 394 | 395 | for (int i = 0; i < (num_nodes + num_mem_pools); i++) 396 | total_cycle[i] = 0; 397 | 398 | for (int i = 0; i < num_nodes; i++) // add local memory at each ndoe 399 | L[i].add_local_memory(local_mem_size, i); 400 | 401 | for (int i = 0; i < num_mem_pools; i++) // add remote memory pools 402 | R[i].add_remote_memory_pool(remote_mem_size, i); 403 | 404 | for (int i = 0; i < num_nodes; i++) 405 | { 406 | local_mem[i] = getMemorySystemInstance(i + 1, "ini/DDR4_x16_2400.ini", "system.ini", "./DRAMSim2", "abc", local_DRAM_size); 407 | local_mem[i]->RegisterCallbacks(read_cb, write_cb, power_callback); // DRAM simulator function 408 | 409 | // local_mem[i]->setCPUClockSpeed(3601440555); 410 | } 411 | 412 | for (int i = 0; i < num_mem_pools; i++) 413 | { 414 | remote_mem[i] = getMemorySystemInstance(i + num_nodes + 1, "ini/DDR4_x16_2400.ini", "system.ini", "./DRAMSim2", "abc", remote_DRAM_size); 415 | remote_mem[i]->RegisterCallbacks(read_cb, write_cb, power_callback); // DRAM simulator function 416 | 417 | // remote_mem[i]->setCPUClockSpeed(3601440555); 418 | } 419 | 420 | string inv, tra, ou, mem; 421 | inv = dir + "/memory_access_completion_log.trc"; 422 | tra = dir + "/Extra_stats.log"; 423 | ou = dir + "/pool_select_trace.trc"; 424 | 425 | cout << inv << endl; 426 | cout << tra << endl; 427 | cout << ou << endl; 428 | 429 | const char *dirr = dir.c_str(); 430 | 431 | mkdir(dirr, 0777); 432 | 433 | const char *inv1 = inv.c_str(); 434 | invalid.open(inv1); 435 | invalid <<"\n=================Memory Request Log=================\n\n"; 436 | // invalid << "\nLog Turned Off, Un-comment log statements in mem_defs.h read and write complete functions to turn it on"; 437 | const char *tra1 = tra.c_str(); 438 | track.open(tra1); 439 | const char *ou1 = ou.c_str(); 440 | out.open(ou1); 441 | } 442 | 443 | // stats for local access 444 | void add_local_access_time(remote_memory_access mem_response) 445 | { 446 | int node_id = mem_response.source; 447 | long unsigned trans_id = mem_response.trans_id; 448 | 449 | int core_id = core_map[node_id][trans_id]; 450 | core_map[node_id].erase(trans_id); 451 | 452 | int time_taken = mem_response.memory_access_completion_cycle - mem_response.miss_cycle_num; 453 | total_core_local_time[node_id][core_id]+=time_taken; 454 | total_core_local_count[node_id][core_id]++; 455 | } 456 | 457 | // stats for remote access 458 | void add_remote_access_time(remote_memory_access mem_response) 459 | { 460 | #ifdef MEM_LOG 461 | invalid<<"\nRemote memory response reached compute node "<<" Request Source Node:"< 2 | #include 3 | #include 4 | #include 5 | 6 | #define chunk_size 4 7 | 8 | #define Page_Size 4096 //in bytes 9 | 10 | class remote_addr_space; 11 | 12 | 13 | //maintains local-to-remote mapping at each node 14 | class node_remote_map 15 | { 16 | public: 17 | unsigned long local_base; //base address at local node address space 18 | unsigned long remote_base; //remote base address at remote pool address space 19 | unsigned long offset_mask; //unused 20 | int region_size; //size of the region assigned from remote to local 21 | //(multiple enteries can be there for one node to same remote pool) 22 | int mem_pool_no; //remote pool number at which memory is reserved 23 | }; 24 | 25 | class local_addr_space 26 | { 27 | long double memory_size; //in Giga-Bytes 28 | long double remote_mem_size; //total remote memory reserved 29 | unsigned long total_pages; //local+remote pages 30 | unsigned long allocated_pages; //used pages 31 | unsigned long local_allocated_pages; //used local pages 32 | unsigned long remote_allocated_pages; //used remote pages 33 | unsigned long free_pages; //free pages in all the memory 34 | unsigned long local_pages; //total local pages in memory 35 | unsigned long remote_pages; //total remote pages in memory 36 | int node_no; 37 | node_remote_map *remote_map; //node maintain local-remote mapping table 38 | int remote_map_index; 39 | vector page_allocation_status; 40 | vector free_local_page_list; 41 | vector free_remote_page_list; 42 | 43 | public: 44 | 45 | local_addr_space(){} 46 | 47 | unsigned long local_page_count() 48 | { 49 | return local_pages; 50 | } 51 | 52 | //add mem_size(GB) amount of local memory into the address space 53 | void add_local_memory(long double mem_size, int node_num) 54 | { 55 | remote_map_index=-1; 56 | node_no=node_num; 57 | memory_size=mem_size; 58 | total_pages=(pow(2.0,30.0) * memory_size) / 4096 ; 59 | local_pages=total_pages; 60 | for(uint64_t i=0;i=0) 242 | { 243 | mem_stats<<"\n\t\t\t\t\t\tNode Remote-Memory Mapping Table Node-"<=0) 278 | { 279 | int last_pool=remote_map[remote_map_index].mem_pool_no; 280 | pools[last_pool]=pools[last_pool] - pages_left_in_last_shared_region; 281 | } 282 | 283 | for(int i=0;i0) 286 | mem_stats<<"\nPages in remote-pool-"<=new_pages) 460 | { 461 | for(int i=0;i 4 | 5 | // ---------------------SIMULATION --------------------- 6 | #define simulation_time 1000000000 7 | #define Result_cycle 10000000 8 | #define max_ins 1000000 9 | 10 | // --------------- NODES AND MEMORY--------------- 11 | #define num_nodes 12 12 | #define num_mem_pools 4 13 | #define core_count 8 14 | #define local_mem_size 0.25 15 | #define remote_mem_size1 32 16 | #define local_DRAM_size 256 17 | #define remote_DRAM_size remote_mem_size1 * pow(2.0, 10.0) 18 | #define Page_Size 4096 19 | 20 | // ------------TLB AND CACHE ------------ 21 | 22 | #define TLB_HIT 9 23 | #define TLB_MISS 60 24 | #define L1_HIT 4 25 | #define L2_HIT 12 26 | #define L3_HIT 25 27 | #define page_fault_latency 9000 28 | #define chunk_allocation_plus_page_fault_latency 10000 29 | #define iqueue_size 50 30 | #define mshr 8 31 | 32 | // -----------------PROCESSOR----------------------- 33 | #define RS_Size 64 34 | #define ROB_Size 192 35 | #define LSQ_Size 128 36 | #define num_reg 512 37 | #define decode_width 2 38 | #define issue_width 2 39 | #define dispatch_width 2 40 | #define commit_width 2 41 | #define ld_str_width 2 42 | #define num_exec_units 5 43 | #define decode_latency 2 44 | #define execution_latency 5 45 | #define branch_penalty 100 46 | #define max_read_reg 4 47 | #define max_write_reg 4 48 | #define decode_buffer_size 8 49 | 50 | // -----------------INTERCONNECT---------- 51 | 52 | 53 | #define tx_packet_size 512 // bits (64-bytes) 54 | #define rx_packet_size 1024 // bits (128-bytes) 55 | 56 | // both tx/rx, for all packet sizes, assuming enough memory 57 | #define nic_queue_size 16384 * 64 58 | 59 | // buffer sizes for Rx/tx, divided according to the total buffer size of the switch 60 | //(DELL POWERSWITCH Z9432F-ON), allows 128 ports of 100GbE(used in this simulation), or 32-ports of 400GbE 61 | // has buffer size of 132MB, 62 | 63 | double per_port_mb = ceil((double)132 / (num_nodes + num_mem_pools)); 64 | 65 | long double tx_per_port_mb = (per_port_mb / 2); 66 | long double rx_per_port_mb = (per_port_mb / 2); 67 | 68 | long double tx_input_port_queue_size = ((tx_per_port_mb * 1024 * 1024)); // 64-byte packet while sending 69 | long double rx_input_port_queue_size = ((rx_per_port_mb * 1024 * 1024)); // 128-byte packet while receiving 70 | long double tx_output_port_queue_size = ((tx_per_port_mb * 1024 * 1024)); 71 | long double rx_output_port_queue_size = ((rx_per_port_mb * 1024 * 1024)); 72 | 73 | #define nic_bandwidth 100 // Gbps 74 | #define switch_bandwidth 400 // Gbps 75 | 76 | // 1ns=1cycle 77 | int nic_trans_delay = 0; 78 | int switch_trans_delay = 0; 79 | 80 | #define nic_proc_delay 5 81 | #define switch_proc_delay 10 82 | #define prop_delay 5 // max 1-meter inside rack 83 | #define switching_delay 2 84 | 85 | // switch input port arbitrator (transmitting/receiving) 86 | static int tx_arbitrator = 0; 87 | static int rx_arbitrator = 0; 88 | 89 | // ---------------BRANCH PREDICTOR ------- 90 | 91 | #define BTB_Table_Size 16384 92 | #define BTB_Prime 16381 93 | #define MAX_COUNTER 3 94 | 95 | -------------------------------------------------------------------------------- /DRACKSim-Detailed/readme.md: -------------------------------------------------------------------------------- 1 |

Cycle-level Disaggregated Memory Simulator

2 | 3 | ***Instructions to use cycle-level Disaggregated Memory Simulation Tool***: 4 | * Simulation has to be started in two steps simultaneously: 5 | 6 | - In the first step, instances (as per the number of nodes required to simulate) of pintool need to be started with the respective workloads to start production of instruction trace. 7 | 8 | - In the second step, the simulator needs to be started to simulate the produced instructions traces of each node (one trace for each node). 9 | - Instruction traces of multiple workloads can also be produced to represent traces for one-node (Multi-program support, not properly tested) 10 | 11 | ***Instructions to compile Detailed Cycle-Level Simulation model:*** 12 | * The tool needs to set paths, either add pin-paths or copy this whole directory to the **$pin-path/source/tools/detail/** (/detail is the new directory to be created) 13 | * Download DRACKSim2 from **https://github.com/umd-memsys/DRAMSim2** 14 | * Extract DRAMSim2 in this directory and name it as DRAMSim2 15 | * Apply DRAMSim2.patch on the extracted DRAMSim2 directory to apply all the modifications that we made on it and copy files from **DRAMSIM2_ini/** to **ini/** in **DRAMSIM2**: 16 | ``` 17 | patch -p1 < DRAMSim2.patch 18 | ``` 19 | 20 | go inside DRAMSim2 and build DRAMSim2 as a library: 21 | ``` 22 | cd DRAMSim2 23 | make libdramsim.so 24 | cd .. 25 | ``` 26 | 27 | * Download and install the boost library in your system from **https://www.boost.org/users/download/** or use the copy we provide in this repo. 28 | * Extract boost in the same directory 29 | * Set the number of nodes and remote memory pools in the **mem_defs.cpp** 30 | * Set the number of instructions or simulation cycles to simulate in the **main.cpp** 31 | * Create executable using '**make sim**' 32 | 33 | ***Instructions for producing instruction traces:*** 34 | * After copying this to **$pin-path/source/tools/** use '**make pin**' command to compile the instruction trace tool 35 | * Start as many instruction trace Pintools, as the number of nodes you set in the mem_defs.h (This is required, otherwise simulation will not start) 36 | * Samples for using the instruction trace tool are given in the '**bash start_sim.sh**' 37 | * After every simulation, use '**bash clear_sm.sh**', because the simulator creates shared memory variables and might create trouble during the next simulation if the execution was stopped in-between. clear_sm.sh will clear all the shared memory variables. Also kill the running workload (to be automated in next updates). 38 | 39 | ***#Starting Simulation:*** 40 | * For simplification, you can either use multiple terminal windows or install '**screen**' utility on your system 41 | * In first terminal, run '**bash start_sim.sh**' to start production of instruction traces. You can specify different options while starting this tool. 42 | - '**- N x**' : Mention the node number '**x**' (mandatory argument) 43 | - '**-T 1/0**' : '**1**' will only simulate multi-threaded part of the workload 44 | - '**-S n**' : skip '**n**' initial instructions 45 | - '**-S**' and '**-T**' can be used together; skipping will be done in multi-threaded part if '**-T 1**' is used. 46 | - '**-M**' : Number of instructions to trace and simulate (default 10M) 47 | * In the second terminal, start simulator DRackSim and pass the name of the output directory to it, e.g., ' **./DRackSim $OutDir numINST (10M default)** ' 48 | * The simulation will start and continue until the exit condition arrives. 49 | * The frequency of printing results can be changed using pre-processor #Result_cycle in main.cpp 50 | -------------------------------------------------------------------------------- /DRACKSim-Detailed/sim.patch: -------------------------------------------------------------------------------- 1 | diff -crB DRACKSIM_noPage/main.cpp DRACKSIM_noPage1/main.cpp 2 | *** DRACKSIM_noPage/main.cpp 2023-04-11 12:55:52.654103346 +0530 3 | --- DRACKSIM_noPage1/main.cpp 2023-04-11 12:50:19.904779468 +0530 4 | *************** 5 | *** 14,20 **** 6 | #include 7 | #include 8 | using namespace std; 9 | ! // #define local_remote 1 10 | uint64_t common_clock = 0; 11 | #include "mem_defs.cpp" 12 | 13 | --- 14,20 ---- 14 | #include 15 | #include 16 | using namespace std; 17 | ! #define local_remote 1 18 | uint64_t common_clock = 0; 19 | #include "mem_defs.cpp" 20 | 21 | *************** 22 | *** 122,128 **** 23 | initialize_branch_predictor(nodeid); 24 | // simulation starts here, clock started 25 | // while (fileid<=1000) 26 | ! while(common_clock<=(Result_cycle*10)+5 && total_num_inst_commited<=max_insts_to_simulate) 27 | { 28 | pthread_barrier_wait(&b); 29 | 30 | --- 122,128 ---- 31 | initialize_branch_predictor(nodeid); 32 | // simulation starts here, clock started 33 | // while (fileid<=1000) 34 | ! while(common_clock<=(Result_cycle*10)+5)// && total_num_inst_commited<=max_insts_to_simulate) 35 | { 36 | pthread_barrier_wait(&b); 37 | 38 | -------------------------------------------------------------------------------- /DRACKSim-Detailed/start_sim.sh: -------------------------------------------------------------------------------- 1 | # Node-to-Pool configurations (4:1,8:1,4:2,8:2) 2 | # Local-to_remote memory ratio configuration (75:25, 50:50, 75:25) 3 | 4 | # WL-1 mg,sp,bt,ft 5 | # WL-2 lulesh,miniFE,SimpleMOC,XSBench 6 | 7 | 8 | # Used for producing reported results in the experimentation section 9 | 10 | # OMP_NUM_THREADS=4 ../../../pin -t obj-intel64/Ins_Tracer.so -N 1 -- ./workloads/mg.B.x & 11 | # OMP_NUM_THREADS=4 ../../../pin -t obj-intel64/Ins_Tracer.so -N 2 -- ./workloads/sp.C.x & 12 | # OMP_NUM_THREADS=4 ../../../pin -t obj-intel64/Ins_Tracer.so -N 3 -- ./workloads/bt.C.x & 13 | # OMP_NUM_THREADS=4 ../../../pin -t obj-intel64/Ins_Tracer.so -N 4 -- ./workloads/ft.C.x & 14 | 15 | # OMP_NUM_THREADS=4 ../../../pin -t obj-intel64/Ins_Tracer.so -N 5 -- ./workloads/mg.B.x & 16 | # OMP_NUM_THREADS=4 ../../../pin -t obj-intel64/Ins_Tracer.so -N 6 -- ./workloads/sp.C.x & 17 | # OMP_NUM_THREADS=4 ../../../pin -t obj-intel64/Ins_Tracer.so -N 7 -- ./workloads/bt.C.x & 18 | # OMP_NUM_THREADS=4 ../../../pin -t obj-intel64/Ins_Tracer.so -N 8 -- ./workloads/ft.C.x & 19 | 20 | # OMP_NUM_THREADS=4 ../../../pin -t obj-intel64/Ins_Tracer.so -N 1 -S 10000000 -- ./workloads/lulesh2.0 -s 120 & 21 | # OMP_NUM_THREADS=4 ../../../pin -t obj-intel64/Ins_Tracer.so -N 2 -S 210000000 -- ./workloads/miniFE.x -nx 140 -ny 140 -nz 140 & 22 | # OMP_NUM_THREADS=4 ../../../pin -t obj-intel64/Ins_Tracer.so -N 3 -- ./workloads/SimpleMOC -s -t 4 & 23 | # OMP_NUM_THREADS=4 ../../../pin -t obj-intel64/Ins_Tracer.so -N 4 -- ./workloads/XSBench -s small -t 4 & 24 | 25 | # OMP_NUM_THREADS=4 ../../../pin -t obj-intel64/Ins_Tracer.so -N 5 -S 10000000 -- ./workloads/lulesh2.0 -s 120 & 26 | # OMP_NUM_THREADS=4 ../../../pin -t obj-intel64/Ins_Tracer.so -N 6 -S 210000000 -- ./workloads/miniFE.x -nx 140 -ny 140 -nz 140 & 27 | # OMP_NUM_THREADS=4 ../../../pin -t obj-intel64/Ins_Tracer.so -N 7 -- ./workloads/SimpleMOC -s -t 4 & 28 | # OMP_NUM_THREADS=4 ../../../pin -t obj-intel64/Ins_Tracer.so -N 8 -- ./workloads/XSBench -s small -t 4 & 29 | 30 | 31 | 32 | #!/bin/bash 33 | 34 | # Used for validation with gem5 35 | #4-cores 36 | # ../../../pin -t obj-intel64/Ins_Tracer.so -N 1 -S 0 -- ./FFT -p4 -m16 #50000000 37 | # ../../../pin -t obj-intel64/Ins_Tracer.so -N 1 -S 0 -- ./FMM 2 | #include 3 | #include 4 | #include 5 | 6 | #define KILO 1024 7 | #define MEGA (KILO * KILO) 8 | #define GIGA (KILO * MEGA) 9 | 10 | extern std::string StringInt(uint64_t val, uint32_t width = 0, char padding = ' ') 11 | { 12 | std::ostringstream ostr; 13 | ostr.setf(std::ios::fixed, std::ios::floatfield); 14 | ostr.fill(padding); 15 | ostr << std::setw(width) << val; 16 | return ostr.str(); 17 | } 18 | 19 | extern std::string StringFlt(long double val, uint32_t width = 0, char padding = ' ') 20 | { 21 | std::ostringstream ostr; 22 | ostr.setf(std::ios::fixed, std::ios::floatfield); 23 | ostr.fill(padding); 24 | ostr << std::setw(width) << val; 25 | return ostr.str(); 26 | } 27 | 28 | extern std::string StringHex(uint64_t val, uint32_t width = 0, char padding = ' ') 29 | { 30 | std::ostringstream ostr; 31 | ostr.setf(std::ios::fixed, std::ios::floatfield); 32 | ostr.fill(padding); 33 | ostr << std::setw(width) << std::hex << "0x" << val; 34 | return ostr.str(); 35 | } 36 | 37 | extern std::string StringString(std::string val, uint32_t width = 0, char padding = ' ') 38 | { 39 | std::ostringstream ostr; 40 | ostr.setf(std::ios::fixed, std::ios::floatfield); 41 | ostr.fill(padding); 42 | ostr << std::setw(width) << val; 43 | return ostr.str(); 44 | } 45 | -------------------------------------------------------------------------------- /DRackSim-Trace/DRAMSim2_ini/DDR4_x16_2400.ini: -------------------------------------------------------------------------------- 1 | NUM_BANKS=4 2 | NUM_ROWS=65536 3 | NUM_COLS=1024 4 | DEVICE_WIDTH=16 5 | 6 | ;in nanoseconds 7 | ;#define REFRESH_PERIOD 7800 8 | REFRESH_PERIOD=7800 9 | tCK=.833 ;* 10 | 11 | CL=16 ;* 12 | AL=0 ;* 13 | ;AL=3; needs to be tRCD-1 or 0 14 | ;RL=(CL+AL) 15 | ;WL=(RL-1) 16 | BL=8 ;* 17 | tRAS=32;* 18 | tRCD=16 ;* 19 | tRRD=4 ;* 20 | tRC=48 ;* 21 | tRP=16 ;* 22 | tCCD=4 ;* 23 | tRTP=4 ;* 24 | tWTR=5 ;* 25 | tWR=18 ;* 26 | tRTRS=1; -- RANK PARAMETER, TODO 27 | tRFC=312;* 28 | tFAW=20;* 29 | tCKE=3 ;* 30 | tXP=4 ;* 31 | 32 | tCMD=1 ;* 33 | 34 | IDD0=90; 35 | IDD1=110; 36 | IDD2P=50; 37 | IDD2Q=65; 38 | IDD2N=70; 39 | IDD3Pf=60; 40 | IDD3Ps=60; 41 | IDD3N=75; 42 | IDD4W=230; 43 | IDD4R=230; 44 | IDD5=84; 45 | IDD6=3; 46 | IDD6L=8; 47 | IDD7=270; 48 | 49 | ;same bank 50 | ;READ_TO_PRE_DELAY=(AL+BL/2+max(tRTP,2)-2) 51 | ;WRITE_TO_PRE_DELAY=(WL+BL/2+tWR) 52 | ;READ_TO_WRITE_DELAY=(RL+BL/2+tRTRS-WL) 53 | ;READ_AUTOPRE_DELAY=(AL+tRTP+tRP) 54 | ;WRITE_AUTOPRE_DELAY=(WL+BL/2+tWR+tRP) 55 | ;WRITE_TO_READ_DELAY_B=(WL+BL/2+tWTR);interbank 56 | ;WRITE_TO_READ_DELAY_R=(WL+BL/2+tRTRS-RL);interrank 57 | 58 | Vdd=1.2 ; TODO: double check this 59 | -------------------------------------------------------------------------------- /DRackSim-Trace/DRAMSim2_ini/DDR4_x16_2400_1.ini: -------------------------------------------------------------------------------- 1 | NUM_BANKS=4 2 | NUM_ROWS=65536 3 | NUM_COLS=1024 4 | DEVICE_WIDTH=16 5 | 6 | ;in nanoseconds 7 | ;#define REFRESH_PERIOD 7800 8 | REFRESH_PERIOD=7800 9 | tCK=.833 ;* 10 | 11 | CL=16 ;* 12 | AL=0 ;* 13 | ;AL=3; needs to be tRCD-1 or 0 14 | ;RL=(CL+AL) 15 | ;WL=(RL-1) 16 | BL=8 ;* 17 | tRAS=32;* 18 | tRCD=16 ;* 19 | tRRD=4 ;* 20 | tRC=48 ;* 21 | tRP=16 ;* 22 | tCCD=4 ;* 23 | tRTP=4 ;* 24 | tWTR=5 ;* 25 | tWR=18 ;* 26 | tRTRS=1; -- RANK PARAMETER, TODO 27 | tRFC=312;* 28 | tFAW=20;* 29 | tCKE=3 ;* 30 | tXP=4 ;* 31 | 32 | tCMD=1 ;* 33 | 34 | IDD0=90; 35 | IDD1=110; 36 | IDD2P=50; 37 | IDD2Q=65; 38 | IDD2N=70; 39 | IDD3Pf=60; 40 | IDD3Ps=60; 41 | IDD3N=75; 42 | IDD4W=230; 43 | IDD4R=230; 44 | IDD5=84; 45 | IDD6=3; 46 | IDD6L=8; 47 | IDD7=270; 48 | 49 | ;same bank 50 | ;READ_TO_PRE_DELAY=(AL+BL/2+max(tRTP,2)-2) 51 | ;WRITE_TO_PRE_DELAY=(WL+BL/2+tWR) 52 | ;READ_TO_WRITE_DELAY=(RL+BL/2+tRTRS-WL) 53 | ;READ_AUTOPRE_DELAY=(AL+tRTP+tRP) 54 | ;WRITE_AUTOPRE_DELAY=(WL+BL/2+tWR+tRP) 55 | ;WRITE_TO_READ_DELAY_B=(WL+BL/2+tWTR);interbank 56 | ;WRITE_TO_READ_DELAY_R=(WL+BL/2+tRTRS-RL);interrank 57 | 58 | Vdd=1.2 ; TODO: double check this 59 | -------------------------------------------------------------------------------- /DRackSim-Trace/MMU.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | class page 5 | { 6 | 7 | unsigned long page_vaddr = 0; 8 | unsigned long page_paddr = 0; 9 | 10 | public: 11 | 12 | page(unsigned long p_vaddr, unsigned long p_paddr) 13 | { 14 | page_vaddr=p_vaddr; 15 | page_paddr=p_paddr; 16 | } 17 | 18 | void set_page_map(unsigned long vaddr, unsigned long paddr) 19 | { 20 | page_vaddr = vaddr; 21 | page_paddr = paddr; 22 | } 23 | 24 | unsigned long get_page_vaddr() 25 | { 26 | return page_vaddr; 27 | } 28 | 29 | unsigned long get_page_base_addr(unsigned long vaddr) 30 | { 31 | if(vaddr==page_vaddr) 32 | return page_paddr; 33 | else 34 | return 0L; 35 | } 36 | 37 | }; 38 | 39 | class pte 40 | { 41 | 42 | std::vector _pte; 43 | unsigned long pte_vaddr = 0L; 44 | 45 | public: 46 | 47 | pte(unsigned long vaddr): pte_vaddr(vaddr) 48 | { 49 | _pte.reserve(512); 50 | } 51 | 52 | void add_in_pte(unsigned long vaddr, unsigned long paddr) 53 | { 54 | if(_pte.size() > 512) 55 | return; 56 | 57 | _pte.emplace_back(vaddr, paddr); 58 | } 59 | 60 | page* access_in_pte(unsigned long vaddr) 61 | { 62 | for (page& p: _pte) 63 | if (p.get_page_vaddr() == vaddr) 64 | return &p; 65 | 66 | return nullptr; 67 | } 68 | 69 | void set_pte_vaddr(unsigned long vaddr) 70 | { 71 | pte_vaddr = vaddr; 72 | } 73 | 74 | unsigned long get_pte_vaddr() 75 | { 76 | return pte_vaddr; 77 | } 78 | }; 79 | 80 | class pmd 81 | { 82 | 83 | std::vector _pmd; 84 | unsigned long pmd_vaddr = 0L; 85 | 86 | public: 87 | 88 | pmd(unsigned long vaddr): pmd_vaddr(vaddr) 89 | { 90 | _pmd.reserve(512); 91 | } 92 | 93 | void add_in_pmd(unsigned long vaddr) 94 | { 95 | if(_pmd.size() > 512) 96 | return; 97 | 98 | _pmd.emplace_back(vaddr); 99 | } 100 | 101 | pte* access_in_pmd(unsigned long vaddr) 102 | { 103 | for (pte& p: _pmd) 104 | if (p.get_pte_vaddr() == vaddr) 105 | return &p; 106 | 107 | return nullptr; 108 | } 109 | 110 | void set_pmd_vaddr(unsigned long vaddr) 111 | { 112 | pmd_vaddr = vaddr; 113 | } 114 | 115 | unsigned long get_pmd_vaddr() 116 | { 117 | return pmd_vaddr; 118 | } 119 | }; 120 | 121 | class pud 122 | { 123 | 124 | std::vector _pud; 125 | unsigned long pud_vaddr = 0L; 126 | 127 | public: 128 | 129 | pud(unsigned long vaddr): pud_vaddr(vaddr) 130 | { 131 | _pud.reserve(512); 132 | } 133 | 134 | void add_in_pud(unsigned long vaddr) 135 | { 136 | if(_pud.size() > 512) 137 | return; 138 | 139 | _pud.emplace_back(vaddr); 140 | } 141 | 142 | pmd* access_in_pud(unsigned long vaddr) 143 | { 144 | for (pmd& p: _pud) 145 | { 146 | if (p.get_pmd_vaddr() == vaddr) 147 | return &p; 148 | } 149 | return nullptr; 150 | } 151 | 152 | void set_pud_vaddr(unsigned long vaddr) 153 | { 154 | pud_vaddr = vaddr; 155 | } 156 | 157 | unsigned long get_pud_vaddr() 158 | { 159 | return pud_vaddr; 160 | } 161 | }; 162 | 163 | class pgd 164 | { 165 | 166 | std::vector _pgd; 167 | unsigned long pgd_vaddr = 0L; 168 | 169 | public: 170 | 171 | pgd() = default; 172 | 173 | pgd(unsigned long vaddr): pgd_vaddr(vaddr) 174 | { 175 | _pgd.reserve(512); 176 | } 177 | 178 | void add_in_pgd(unsigned long vaddr) 179 | { 180 | if(_pgd.size() > 512) 181 | return; 182 | 183 | _pgd.emplace_back(vaddr); 184 | } 185 | 186 | pud* access_in_pgd(unsigned long vaddr) 187 | { 188 | for (pud& p: _pgd) 189 | if (p.get_pud_vaddr() == vaddr) 190 | return &p; 191 | 192 | return nullptr; 193 | } 194 | 195 | void set_pgd_vaddr(unsigned long vaddr) 196 | { 197 | pgd_vaddr = vaddr; 198 | } 199 | 200 | unsigned long get_pgd_vaddr() 201 | { 202 | return pgd_vaddr; 203 | } 204 | }; 205 | 206 | void split_vaddr(unsigned long &pgd, unsigned long &pud, unsigned long &pmd, unsigned long &pte, unsigned long &page_offset, unsigned long vaddr) 207 | { 208 | // cout<<"\nvaddr ="<(vaddr)<<"\n"; 209 | 210 | page_offset=vaddr & (0x000000000fff); 211 | 212 | pte=vaddr & (0x0000001ff000); 213 | pte=pte>>12; 214 | 215 | pmd=vaddr & (0x00003fe00000); 216 | pmd=pmd>>21; 217 | 218 | pud=vaddr & (0x0007fc0000000); 219 | pud=pud>>30; 220 | 221 | pgd=vaddr & (0xff8000000000); 222 | pgd=pgd>>39; 223 | //cout<<"\n"<<(pte)<<"\n"; 224 | 225 | } 226 | 227 | unsigned long get_page_addr(unsigned long paddr) 228 | { 229 | unsigned long page_addr=paddr & (0xfffffffff000); 230 | page_addr=page_addr>>12; 231 | return page_addr; 232 | } 233 | 234 | 235 | /*int main() 236 | { 237 | 238 | pgd p; 239 | p.add_in_pgd(1); 240 | pud *a=p.access_in_pgd(1); 241 | a->add_in_pud(12); 242 | pmd *b=a->access_in_pud(12); 243 | b->add_in_pmd(123); 244 | pte *c=b->access_in_pmd(123); 245 | c->add_in_pte(10,20); 246 | 247 | a->add_in_pud(23); 248 | p.add_in_pgd(2); 249 | a=p.access_in_pgd(2); 250 | a->add_in_pud(1234); 251 | b=a->access_in_pud(1234); 252 | b->add_in_pmd(456); 253 | c=b->access_in_pmd(456); 254 | c->add_in_pte(20,30); 255 | p.add_in_pgd(3); 256 | a=p.access_in_pgd(3); 257 | a->add_in_pud(34); 258 | 259 | b->add_in_pmd(567); 260 | c->add_in_pte(30,40); 261 | 262 | pgd _pgd[5]; //to accomodate 5-processes, can be dynamically declared as per need 263 | long int vaddr=0x0000001f1ffe; 264 | 265 | long int a,b,c,d,e; 266 | int proc_id=1; 267 | 268 | // long int paddr; 269 | 270 | split_vaddr(a,b,c,d,e,vaddr); 271 | 272 | return 0; 273 | }*/ -------------------------------------------------------------------------------- /DRackSim-Trace/Makefile: -------------------------------------------------------------------------------- 1 | 2 | #tell the linker the rpath so that we don't have to muck with LD_LIBRARY_PATH, etc 3 | main: main.cpp 4 | $(CXX) -g -o TraceDRackSim main.cpp -I./DRAMSim2/ -L./DRAMSim2/ -ldramsim -Wl,-rpath=./DRAMSim2/ -lpthread 5 | 6 | clean: 7 | rm TraceDRackSim 8 | -------------------------------------------------------------------------------- /DRackSim-Trace/Trace_Tool/Caches.H: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2002-2019 Intel Corporation. 3 | * 4 | * This software and the related documents are Intel copyrighted materials, and your 5 | * use of them is governed by the express license under which they were provided to 6 | * you ("License"). Unless the License provides otherwise, you may not use, modify, 7 | * copy, publish, distribute, disclose or transmit this software or the related 8 | * documents without Intel's prior written permission. 9 | * 10 | * This software and the related documents are provided as is, with no express or 11 | * implied warranties, other than those that are expressly stated in the License. 12 | */ 13 | 14 | /*! @file 15 | * This file contains a configurable cache class 16 | */ 17 | 18 | #ifndef I_D_CACHE_H 19 | #define I_D_CACHE_H 20 | 21 | #include 22 | 23 | #include "pin_util.H" 24 | 25 | /*! 26 | * @brief Checks if n is a power of 2. 27 | * @returns true if n is power of 2 28 | */ 29 | static inline bool IsPower2(UINT32 n) 30 | { 31 | return ((n & (n - 1)) == 0); 32 | } 33 | 34 | /*! 35 | * @brief Computes floor(log2(n)) 36 | * Works by finding position of MSB set. 37 | * @returns -1 if n == 0. 38 | */ 39 | static inline INT32 FloorLog2(UINT32 n) 40 | { 41 | INT32 p = 0; 42 | 43 | if (n == 0) return -1; 44 | 45 | if (n & 0xffff0000) { p += 16; n >>= 16; } 46 | if (n & 0x0000ff00) { p += 8; n >>= 8; } 47 | if (n & 0x000000f0) { p += 4; n >>= 4; } 48 | if (n & 0x0000000c) { p += 2; n >>= 2; } 49 | if (n & 0x00000002) { p += 1; } 50 | 51 | return p; 52 | } 53 | 54 | /*! 55 | * @brief Computes floor(log2(n)) 56 | * Works by finding position of MSB set. 57 | * @returns -1 if n == 0. 58 | */ 59 | static inline INT32 CeilLog2(UINT32 n) 60 | { 61 | return FloorLog2(n - 1) + 1; 62 | } 63 | 64 | /*! 65 | * @brief Cache tag - self clearing on creation 66 | */ 67 | class CACHE_TAG 68 | { 69 | private: 70 | ADDRINT _tag; 71 | 72 | public: 73 | CACHE_TAG(ADDRINT tag = 0) { _tag = tag; } 74 | bool operator==(const CACHE_TAG &right) const { return _tag == right._tag; } 75 | operator ADDRINT() const { return _tag; } 76 | }; 77 | 78 | 79 | /*! 80 | * Everything related to cache sets 81 | */ 82 | namespace CACHE_SET 83 | { 84 | 85 | /*! 86 | * @brief Cache set direct mapped 87 | */ 88 | class DIRECT_MAPPED 89 | { 90 | private: 91 | CACHE_TAG _tag; 92 | INT32 _ASID; 93 | 94 | public: 95 | DIRECT_MAPPED(UINT32 associativity = 1) { ASSERTX(associativity == 1); } 96 | 97 | VOID SetAssociativity(UINT32 associativity) { ASSERTX(associativity == 1); } 98 | UINT32 GetAssociativity(UINT32 associativity) { return 1; } 99 | 100 | UINT32 Find(CACHE_TAG tag, INT32 ASID) { return(_tag == tag && _ASID==ASID); } 101 | VOID Replace(CACHE_TAG tag, INT32 ASID) { _tag = tag; _ASID=ASID;} 102 | VOID Flush() { _tag = 0; _ASID=0;} 103 | }; 104 | 105 | /*! 106 | * @brief Cache set with round robin replacement 107 | */ 108 | template 109 | class ROUND_ROBIN 110 | { 111 | private: 112 | CACHE_TAG _tags[MAX_ASSOCIATIVITY]; 113 | UINT32 _tagsLastIndex; 114 | UINT32 _nextReplaceIndex; 115 | INT32 _ASID[MAX_ASSOCIATIVITY]; 116 | 117 | public: 118 | ROUND_ROBIN(UINT32 associativity = MAX_ASSOCIATIVITY) 119 | : _tagsLastIndex(associativity - 1) 120 | { 121 | ASSERTX(associativity <= MAX_ASSOCIATIVITY); 122 | _nextReplaceIndex = _tagsLastIndex; 123 | 124 | for (INT32 index = _tagsLastIndex; index >= 0; index--) 125 | { 126 | _tags[index] = CACHE_TAG(0); 127 | _ASID[index] = 0; 128 | } 129 | } 130 | 131 | VOID SetAssociativity(UINT32 associativity) 132 | { 133 | ASSERTX(associativity <= MAX_ASSOCIATIVITY); 134 | _tagsLastIndex = associativity - 1; 135 | _nextReplaceIndex = _tagsLastIndex; 136 | } 137 | UINT32 GetAssociativity(UINT32 associativity) { return _tagsLastIndex + 1; } 138 | 139 | UINT32 Find(CACHE_TAG tag, INT32 ASID) 140 | { 141 | bool result = true; 142 | 143 | for (INT32 index = _tagsLastIndex; index >= 0; index--) 144 | { 145 | // this is an ugly micro-optimization, but it does cause a 146 | // tighter assembly loop for ARM that way ... 147 | if(_tags[index] == tag && _ASID[index] == ASID) goto end; 148 | } 149 | result = false; 150 | 151 | end: return result; 152 | } 153 | 154 | VOID Replace(CACHE_TAG tag, INT32 ASID) 155 | { 156 | // g++ -O3 too dumb to do CSE on following lines?! 157 | const UINT32 index = _nextReplaceIndex; 158 | 159 | _tags[index] = tag; 160 | _ASID[index] = ASID; 161 | // condition typically faster than modulo 162 | _nextReplaceIndex = (index == 0 ? _tagsLastIndex : index - 1); 163 | } 164 | VOID Flush() 165 | { 166 | for (INT32 index = _tagsLastIndex; index >= 0; index--) 167 | { 168 | _tags[index] = 0; 169 | _ASID[index] = 0; 170 | } 171 | _nextReplaceIndex=_tagsLastIndex; 172 | } 173 | }; 174 | 175 | } // namespace CACHE_SET 176 | 177 | namespace CACHE_ALLOC 178 | { 179 | typedef enum 180 | { 181 | STORE_ALLOCATE, 182 | STORE_NO_ALLOCATE 183 | } STORE_ALLOCATION; 184 | } 185 | 186 | /*! 187 | * @brief Generic cache base class; no allocate specialization, no cache set specialization 188 | */ 189 | class CACHE_BASE 190 | { 191 | public: 192 | // types, constants 193 | typedef enum 194 | { 195 | ACCESS_TYPE_LOAD, 196 | ACCESS_TYPE_STORE, 197 | ACCESS_TYPE_NUM 198 | } ACCESS_TYPE; 199 | 200 | protected: 201 | static const UINT32 HIT_MISS_NUM = 2; 202 | CACHE_STATS _access[ACCESS_TYPE_NUM][HIT_MISS_NUM]; 203 | 204 | private: 205 | // input params 206 | const std::string _name; 207 | const UINT32 _cacheSize; 208 | const UINT32 _lineSize; 209 | const UINT32 _associativity; 210 | UINT32 _numberOfFlushes; 211 | UINT32 _numberOfResets; 212 | 213 | // computed params 214 | const UINT32 _lineShift; 215 | const UINT32 _setIndexMask; 216 | 217 | CACHE_STATS SumAccess(bool hit) const 218 | { 219 | CACHE_STATS sum = 0; 220 | 221 | for (UINT32 accessType = 0; accessType < ACCESS_TYPE_NUM; accessType++) 222 | { 223 | sum += _access[accessType][hit]; 224 | } 225 | 226 | return sum; 227 | } 228 | 229 | protected: 230 | UINT32 NumSets() const { return _setIndexMask + 1; } 231 | 232 | public: 233 | // constructors/destructors 234 | CACHE_BASE(std::string name, UINT32 cacheSize, UINT32 lineSize, UINT32 associativity); 235 | 236 | // accessors 237 | UINT32 CacheSize() const { return _cacheSize; } 238 | UINT32 LineSize() const { return _lineSize; } 239 | UINT32 Associativity() const { return _associativity; } 240 | // 241 | CACHE_STATS Hits(ACCESS_TYPE accessType) const { return _access[accessType][true];} 242 | CACHE_STATS Misses(ACCESS_TYPE accessType) const { return _access[accessType][false];} 243 | CACHE_STATS Accesses(ACCESS_TYPE accessType) const { return Hits(accessType) + Misses(accessType);} 244 | CACHE_STATS Hits() const { return SumAccess(true);} 245 | CACHE_STATS Misses() const { return SumAccess(false);} 246 | CACHE_STATS Accesses() const { return Hits() + Misses();} 247 | 248 | CACHE_STATS Flushes() const { return _numberOfFlushes;} 249 | CACHE_STATS Resets() const { return _numberOfResets;} 250 | 251 | VOID SplitAddress(const ADDRINT addr, CACHE_TAG & tag, UINT32 & setIndex) const 252 | { 253 | tag = addr >> _lineShift; 254 | setIndex = tag & _setIndexMask; 255 | } 256 | 257 | VOID SplitAddress(const ADDRINT addr, CACHE_TAG & tag, UINT32 & setIndex, UINT32 & lineIndex) const 258 | { 259 | const UINT32 lineMask = _lineSize - 1; 260 | lineIndex = addr & lineMask; 261 | SplitAddress(addr, tag, setIndex); 262 | } 263 | 264 | VOID IncFlushCounter() 265 | { 266 | _numberOfFlushes += 1; 267 | } 268 | 269 | VOID IncResetCounter() 270 | { 271 | _numberOfResets += 1; 272 | } 273 | std::string GetName() 274 | { 275 | return _name; 276 | } 277 | 278 | std::ostream & StatsLong(std::ostream & out) const; 279 | }; 280 | 281 | CACHE_BASE::CACHE_BASE(std::string name, UINT32 cacheSize, UINT32 lineSize, UINT32 associativity) 282 | : _name(name), 283 | _cacheSize(cacheSize), 284 | _lineSize(lineSize), 285 | _associativity(associativity), 286 | _lineShift(FloorLog2(lineSize)), 287 | _setIndexMask((cacheSize / (associativity * lineSize)) - 1) 288 | { 289 | 290 | ASSERTX(IsPower2(_lineSize)); 291 | ASSERTX(IsPower2(_setIndexMask + 1)); 292 | 293 | for (UINT32 accessType = 0; accessType < ACCESS_TYPE_NUM; accessType++) 294 | { 295 | _access[accessType][false] = 0; 296 | _access[accessType][true] = 0; 297 | } 298 | } 299 | 300 | /*! 301 | * @brief Stats output method 302 | */ 303 | std::ostream & CACHE_BASE::StatsLong(std::ostream & out) const 304 | { 305 | const UINT32 headerWidth = 19; 306 | const UINT32 numberWidth = 10; 307 | 308 | out << _name << ":" << std::endl; 309 | 310 | for (UINT32 i = 0; i < ACCESS_TYPE_NUM; i++) 311 | { 312 | const ACCESS_TYPE accessType = ACCESS_TYPE(i); 313 | 314 | std::string type(accessType == ACCESS_TYPE_LOAD ? "Load" : "Store"); 315 | 316 | out << StringString(type + " Hits: ", headerWidth) 317 | << StringInt(Hits(accessType), numberWidth) << std::endl; 318 | out << StringString(type + " Misses: ", headerWidth) 319 | << StringInt(Misses(accessType), numberWidth) << std::endl; 320 | out << StringString(type + " Accesses: ", headerWidth) 321 | << StringInt(Accesses(accessType), numberWidth) << std::endl; 322 | out << StringString(type + " Miss Rate: ", headerWidth) 323 | << StringFlt(100.0 * Misses(accessType) / Accesses(accessType), 2, numberWidth-1) << "%" << std::endl; 324 | out << std::endl; 325 | } 326 | 327 | out << StringString("Total Hits: ", headerWidth, ' ') 328 | << StringInt(Hits(), numberWidth) << std::endl; 329 | out << StringString("Total Misses: ", headerWidth, ' ') 330 | << StringInt(Misses(), numberWidth) << std::endl; 331 | out << StringString("Total Accesses: ", headerWidth, ' ') 332 | << StringInt(Accesses(), numberWidth) << std::endl; 333 | out << StringString("Total Miss Rate: ", headerWidth, ' ') 334 | << StringFlt(100.0 * Misses() / Accesses(), 2, numberWidth-1) << "%" << std::endl; 335 | 336 | out << StringString("Flushes: ", headerWidth, ' ') 337 | << StringInt(Flushes(), numberWidth) << std::endl; 338 | out << StringString("Stat Resets: ", headerWidth, ' ') 339 | << StringInt(Resets(), numberWidth) << std::endl; 340 | 341 | out << std::endl; 342 | 343 | return out; 344 | } 345 | 346 | /// ostream operator for CACHE_BASE 347 | std::ostream & operator<< (std::ostream & out, const CACHE_BASE & cacheBase) 348 | { 349 | return cacheBase.StatsLong(out); 350 | } 351 | 352 | /*! 353 | * @brief Templated cache class with specific cache set allocation policies 354 | * 355 | * All that remains to be done here is allocate and deallocate the right 356 | * type of cache sets. 357 | */ 358 | template 359 | class CACHE : public CACHE_BASE 360 | { 361 | private: 362 | SET _sets[MAX_SETS]; 363 | 364 | public: 365 | // constructors/destructors 366 | CACHE(std::string name, UINT32 cacheSize, UINT32 lineSize, UINT32 associativity) 367 | : CACHE_BASE(name, cacheSize, lineSize, associativity) 368 | { 369 | ASSERTX(NumSets() <= MAX_SETS); 370 | 371 | for (UINT32 i = 0; i < NumSets(); i++) 372 | { 373 | _sets[i].SetAssociativity(associativity); 374 | } 375 | } 376 | 377 | // modifiers 378 | /// Cache access from addr to addr+size-1 379 | bool Access(ADDRINT addr, UINT32 size, ACCESS_TYPE accessType, INT32 ASID, int &line_read, bool *linehit); 380 | bool Access(ADDRINT addr, UINT32 size, ACCESS_TYPE accessType, INT32 ASID); 381 | /// Cache access at addr that does not span cache lines 382 | bool AccessSingleLine(ADDRINT addr, ACCESS_TYPE accessType, int ASID); 383 | void Flush(); 384 | void ResetStats(); 385 | }; 386 | 387 | /*! 388 | * @return true if all accessed cache lines hit 389 | */ 390 | 391 | template 392 | bool CACHE::Access(ADDRINT addr, UINT32 size, ACCESS_TYPE accessType, INT32 ASID, int &line_read, bool *linehit) 393 | { 394 | const ADDRINT highAddr = addr + size; 395 | bool allHit = true; 396 | 397 | const ADDRINT lineSize = LineSize(); 398 | const ADDRINT notLineMask = ~(lineSize - 1); 399 | do 400 | { 401 | line_read++; 402 | CACHE_TAG tag; 403 | UINT32 setIndex; 404 | 405 | SplitAddress(addr, tag, setIndex); 406 | 407 | SET & set = _sets[setIndex]; 408 | 409 | bool localHit = set.Find(tag,ASID); 410 | allHit &= localHit; 411 | linehit[line_read-1]=localHit; 412 | 413 | 414 | // on miss, loads always allocate, stores optionally 415 | if ( (! localHit) && (accessType == ACCESS_TYPE_LOAD || STORE_ALLOCATION == CACHE_ALLOC::STORE_ALLOCATE)) 416 | { 417 | set.Replace(tag, ASID); 418 | } 419 | addr = (addr & notLineMask) + lineSize; // start of next cache line 420 | } 421 | while (addr < highAddr); 422 | _access[accessType][allHit]++; 423 | 424 | return allHit; 425 | } 426 | 427 | template 428 | bool CACHE::Access(ADDRINT addr, UINT32 size, ACCESS_TYPE accessType, INT32 ASID) 429 | { 430 | const ADDRINT highAddr = addr + size; 431 | bool allHit = true; 432 | 433 | const ADDRINT lineSize = LineSize(); 434 | const ADDRINT notLineMask = ~(lineSize - 1); 435 | do 436 | { 437 | CACHE_TAG tag; 438 | UINT32 setIndex; 439 | 440 | SplitAddress(addr, tag, setIndex); 441 | 442 | SET & set = _sets[setIndex]; 443 | 444 | bool localHit = set.Find(tag,ASID); 445 | allHit &= localHit; 446 | 447 | // on miss, loads always allocate, stores optionally 448 | if ( (! localHit) && (accessType == ACCESS_TYPE_LOAD || STORE_ALLOCATION == CACHE_ALLOC::STORE_ALLOCATE)) 449 | { 450 | set.Replace(tag, ASID); 451 | } 452 | 453 | addr = (addr & notLineMask) + lineSize; // start of next cache line 454 | } 455 | while (addr < highAddr); 456 | _access[accessType][allHit]++; 457 | 458 | return allHit; 459 | } 460 | 461 | /*! 462 | * @return true if accessed cache line hits 463 | */ 464 | template 465 | bool CACHE::AccessSingleLine(ADDRINT addr, ACCESS_TYPE accessType, INT32 ASID) 466 | { 467 | CACHE_TAG tag; 468 | UINT32 setIndex; 469 | 470 | SplitAddress(addr, tag, setIndex); 471 | 472 | SET & set = _sets[setIndex]; 473 | 474 | bool hit = set.Find(tag,ASID); 475 | 476 | // on miss, loads always allocate, stores optionally 477 | if ( (! hit) && (accessType == ACCESS_TYPE_LOAD || STORE_ALLOCATION == CACHE_ALLOC::STORE_ALLOCATE)) 478 | { 479 | set.Replace(tag,ASID); 480 | } 481 | 482 | _access[accessType][hit]++; 483 | 484 | return hit; 485 | } 486 | /*! 487 | * @return true if accessed cache line hits 488 | */ 489 | template 490 | void CACHE::Flush() 491 | { 492 | for (INT32 index = NumSets(); index >= 0; index--) { 493 | SET & set = _sets[index]; 494 | set.Flush(); 495 | } 496 | IncFlushCounter(); 497 | } 498 | 499 | template 500 | void CACHE::ResetStats() 501 | { 502 | for (UINT32 accessType = 0; accessType < ACCESS_TYPE_NUM; accessType++) 503 | { 504 | _access[accessType][false] = 0; 505 | _access[accessType][true] = 0; 506 | } 507 | IncResetCounter(); 508 | } 509 | 510 | 511 | // define shortcuts 512 | #define CACHE_DIRECT_MAPPED(MAX_SETS, ALLOCATION) CACHE 513 | #define CACHE_ROUND_ROBIN(MAX_SETS, MAX_ASSOCIATIVITY, ALLOCATION) CACHE, MAX_SETS, ALLOCATION> 514 | 515 | #endif // PIN_CACHE_H 516 | -------------------------------------------------------------------------------- /DRackSim-Trace/Trace_Tool/boost.tar.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amit-P89/-DRackSim/7af5c2d6d570317aa8b00ab249597e05870618ad/DRackSim-Trace/Trace_Tool/boost.tar.xz -------------------------------------------------------------------------------- /DRackSim-Trace/Trace_Tool/icount.H: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2002-2019 Intel Corporation. 3 | * 4 | * This software is provided to you as Sample Source Code as defined in the accompanying 5 | * End User License Agreement for the Intel(R) Software Development Products ("Agreement") 6 | * section 1.L. 7 | * 8 | * This software and the related documents are provided as is, with no express or implied 9 | * warranties, other than those that are expressly stated in the License. 10 | */ 11 | 12 | #ifndef ICOUNT_H 13 | #define ICOUNT_H 14 | 15 | namespace INSTLIB 16 | { 17 | 18 | /*! @defgroup ICOUNT 19 | Instrumentation for counting instruction execution 20 | */ 21 | 22 | /*! @ingroup ICOUNT 23 | The example below can be found in InstLibExamples/icount.cpp 24 | 25 | \include icount.cpp 26 | */ 27 | class ICOUNT 28 | { 29 | public: 30 | ICOUNT() 31 | { 32 | _mode = ModeInactive; 33 | 34 | /* Allocate 64 byte aligned data for the statistics. */ 35 | _space = new char [(ISIMPOINT_MAX_THREADS+1)*sizeof(threadStats) -1]; 36 | 37 | ADDRINT space = VoidStar2Addrint(_space); 38 | ADDRINT align_1 = static_cast (cacheLineSize-1); 39 | _stats = reinterpret_cast((space+align_1) & ~align_1); 40 | memset (_stats, 0, ISIMPOINT_MAX_THREADS*sizeof(threadStats)); 41 | }; 42 | 43 | ~ICOUNT() 44 | { 45 | delete [] _space; 46 | } 47 | /*! @ingroup ICOUNT 48 | @return Total number of instructions executed. (But see @ref mode for what this means). 49 | */ 50 | 51 | UINT32 MultiThreadCount() const 52 | { 53 | UINT64 multithreadCount=0; 54 | ASSERTX(Mode() == ModeBoth); 55 | for(UINT64 i=0; icount - s->repDuplicateCount; 86 | } 87 | 88 | /*! @ingroup ICOUNT 89 | Set the current count 90 | */ 91 | VOID SetCount(UINT64 count, THREADID tid = 0) 92 | { 93 | ASSERTX(_mode != ModeInactive); 94 | ASSERTX(tid < ISIMPOINT_MAX_THREADS); 95 | _stats[tid].count = count; 96 | _stats[tid].repDuplicateCount = 0; 97 | } 98 | 99 | /*! @ingroup ICOUNT 100 | * The mode used for counting REP prefixed instructions. 101 | */ 102 | enum mode { 103 | ModeInactive = -1, 104 | ModeNormal = 0, /**< Count all instructions, each REP "iteration" adds 1 */ 105 | ModeBoth /**< Provide both the normal count and a count in which REP prefixed 106 | instructions are only counted once. */ 107 | }; 108 | 109 | /*! @ingroup ICOUNT 110 | * @return the mode of the ICOUNT object. 111 | */ 112 | mode Mode() const 113 | { 114 | return _mode; 115 | } 116 | 117 | /*! @ingroup ICOUNT 118 | Activate the counter, must be called before PIN_StartProgram. 119 | @param [in] mode Determine the way in which REP prefixed operations are counted. By default (ICOUNT::ModeNormal), 120 | REP prefixed instructions are counted as if REP is an implicit loop. By passing 121 | ICOUNT::ModeRepsCountedOnlyOnce you can have the counter treat each REP as only one dynamic instruction. 122 | */ 123 | VOID Activate(mode m = ModeNormal) 124 | { 125 | ASSERTX(_mode == ModeInactive); 126 | _mode = m; 127 | TRACE_AddInstrumentFunction(Trace, this); 128 | } 129 | 130 | private: 131 | enum { 132 | cacheLineSize = 64 133 | }; 134 | 135 | static VOID Trace(TRACE trace, VOID * icount) 136 | { 137 | #if (defined(TARGET_IA32) || defined(TARGET_IA32E)) 138 | ICOUNT const * ic = reinterpret_cast(icount); 139 | mode m = ic->Mode(); 140 | #endif 141 | for (BBL bbl = TRACE_BblHead(trace); BBL_Valid(bbl); bbl = BBL_Next(bbl)) 142 | { 143 | BBL_InsertCall(bbl, IPOINT_ANYWHERE, 144 | AFUNPTR(Advance), 145 | IARG_FAST_ANALYSIS_CALL, 146 | IARG_ADDRINT, icount, 147 | IARG_ADDRINT, ADDRINT(BBL_NumIns(bbl)), 148 | IARG_THREAD_ID, 149 | IARG_END); 150 | 151 | // REP prefixed instructions are an IA-32 and Intel(R) 64 feature 152 | #if (defined(TARGET_IA32) || defined(TARGET_IA32E)) 153 | if (m == ModeBoth) 154 | { // Check whether there are any REP prefixed instructions in the BBL 155 | // and, if so, subtract out their execution unless it is the first 156 | // iteration. 157 | for (INS ins = BBL_InsHead(bbl); 158 | INS_Valid(ins); 159 | ins = INS_Next(ins)) 160 | { 161 | if (INS_HasRealRep(ins)) 162 | { 163 | INS_InsertCall(ins, IPOINT_BEFORE, 164 | AFUNPTR(CountDuplicates), 165 | IARG_FAST_ANALYSIS_CALL, 166 | IARG_ADDRINT, icount, 167 | IARG_FIRST_REP_ITERATION, 168 | IARG_THREAD_ID, 169 | IARG_END); 170 | 171 | } 172 | } 173 | } 174 | #endif 175 | } 176 | } 177 | 178 | static VOID PIN_FAST_ANALYSIS_CALL Advance(ICOUNT * ic, ADDRINT c, THREADID tid) 179 | { 180 | // ASSERTX(tid < ISIMPOINT_MAX_THREADS); 181 | ic->_stats[tid].count += c; 182 | } 183 | 184 | // Accumulate the count of REP prefixed executions which aren't the first iteration. 185 | // 186 | // We are assuming that this will be inlined, and is small, so there is no point 187 | // in guarding it with an InsertIf call testing IARG_FIRST_REP_ITERATION. 188 | static VOID PIN_FAST_ANALYSIS_CALL CountDuplicates(ICOUNT * ic, BOOL first, THREADID tid) 189 | { 190 | // ASSERTX(tid < ISIMPOINT_MAX_THREADS); 191 | ic->_stats[tid].repDuplicateCount += !first; 192 | } 193 | 194 | struct threadStats { 195 | UINT64 count; 196 | UINT64 repDuplicateCount; /* Number of REP iterations after the first */ 197 | char padding [cacheLineSize - 2*sizeof(UINT64)]; /* Expand so we can cache align this. 198 | * We want to avoid false sharing of the stats between threads. 199 | */ 200 | }; 201 | 202 | threadStats * _stats; 203 | char * _space; 204 | mode _mode; 205 | }; 206 | } 207 | #endif 208 | -------------------------------------------------------------------------------- /DRackSim-Trace/Trace_Tool/libinst.H: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2002-2020 Intel Corporation. 3 | * 4 | * This software is provided to you as Sample Source Code as defined in the accompanying 5 | * End User License Agreement for the Intel(R) Software Development Products ("Agreement") 6 | * section 1.L. 7 | * 8 | * This software and the related documents are provided as is, with no express or implied 9 | * warranties, other than those that are expressly stated in the License. 10 | */ 11 | 12 | #ifndef INSTLIB_H 13 | #define INSTLIB_H 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #define ISIMPOINT_MAX_THREADS 160 23 | 24 | #include "filter.H" 25 | #include "skipper.H" 26 | #include "icount.H" 27 | #include "follow_child.H" 28 | 29 | extern "C" 30 | { 31 | #include "xed-interface.h" 32 | } 33 | 34 | #endif 35 | -------------------------------------------------------------------------------- /DRackSim-Trace/Trace_Tool/makefile: -------------------------------------------------------------------------------- 1 | ############################################################## 2 | # 3 | # DO NOT EDIT THIS FILE! 4 | # 5 | ############################################################## 6 | 7 | # If the tool is built out of the kit, PIN_ROOT must be specified in the make invocation and point to the kit root. 8 | ifdef PIN_ROOT 9 | CONFIG_ROOT := $(PIN_ROOT)/source/tools/Config 10 | else 11 | CONFIG_ROOT := ../Config 12 | endif 13 | include $(CONFIG_ROOT)/makefile.config 14 | include makefile.rules 15 | include $(TOOLS_ROOT)/Config/makefile.default.rules 16 | 17 | ############################################################## 18 | # 19 | # DO NOT EDIT THIS FILE! 20 | # 21 | ############################################################## 22 | -------------------------------------------------------------------------------- /DRackSim-Trace/Trace_Tool/makefile.rules: -------------------------------------------------------------------------------- 1 | ############################################################## 2 | # 3 | # This file includes all the test targets as well as all the 4 | # non-default build rules and test recipes. 5 | # 6 | ############################################################## 7 | 8 | 9 | ############################################################## 10 | # 11 | # Test targets 12 | # 13 | ############################################################## 14 | 15 | ###### Place all generic definitions here ###### 16 | 17 | # This defines tests which run tools of the same name. This is simply for convenience to avoid 18 | # defining the test name twice (once in TOOL_ROOTS and again in TEST_ROOTS). 19 | # Tests defined here should not be defined in TOOL_ROOTS and TEST_ROOTS. 20 | TEST_TOOL_ROOTS := MyPinTool 21 | 22 | # This defines the tests to be run that were not already defined in TEST_TOOL_ROOTS. 23 | TEST_ROOTS := 24 | 25 | # This defines the tools which will be run during the the tests, and were not already defined in 26 | # TEST_TOOL_ROOTS. 27 | TOOL_ROOTS := 28 | 29 | # This defines the static analysis tools which will be run during the the tests. They should not 30 | # be defined in TEST_TOOL_ROOTS. If a test with the same name exists, it should be defined in 31 | # TEST_ROOTS. 32 | # Note: Static analysis tools are in fact executables linked with the Pin Static Analysis Library. 33 | # This library provides a subset of the Pin APIs which allows the tool to perform static analysis 34 | # of an application or dll. Pin itself is not used when this tool runs. 35 | SA_TOOL_ROOTS := 36 | 37 | # This defines all the applications that will be run during the tests. 38 | APP_ROOTS := 39 | 40 | # This defines any additional object files that need to be compiled. 41 | OBJECT_ROOTS := 42 | 43 | # This defines any additional dlls (shared objects), other than the pintools, that need to be compiled. 44 | DLL_ROOTS := 45 | 46 | # This defines any static libraries (archives), that need to be built. 47 | LIB_ROOTS := 48 | 49 | ###### Handle exceptions here (OS/arch related) ###### 50 | 51 | RUNNABLE_TESTS := $(TEST_TOOL_ROOTS) $(TEST_ROOTS) 52 | 53 | ###### Handle exceptions here (bugs related) ###### 54 | 55 | ###### Define the sanity subset ###### 56 | 57 | # This defines the list of tests that should run in sanity. It should include all the tests listed in 58 | # TEST_TOOL_ROOTS and TEST_ROOTS excluding only unstable tests. 59 | SANITY_SUBSET := $(TEST_TOOL_ROOTS) $(TEST_ROOTS) 60 | 61 | 62 | ############################################################## 63 | # 64 | # Test recipes 65 | # 66 | ############################################################## 67 | 68 | # This section contains recipes for tests other than the default. 69 | # See makefile.default.rules for the default test rules. 70 | # All tests in this section should adhere to the naming convention: .test 71 | 72 | 73 | ############################################################## 74 | # 75 | # Build rules 76 | # 77 | ############################################################## 78 | 79 | # This section contains the build rules for all binaries that have special build rules. 80 | # See makefile.default.rules for the default build rules. 81 | -------------------------------------------------------------------------------- /DRackSim-Trace/Trace_Tool/parse_trace.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace std; 9 | 10 | struct Trace 11 | { 12 | int procid; 13 | int threadid; 14 | unsigned long long addr; 15 | char r; 16 | unsigned long penalty; 17 | unsigned long cycle; 18 | int block_to_fetch; 19 | }; 20 | struct mem_stream 21 | { 22 | int procid; 23 | int threadid; 24 | unsigned long long addr; 25 | char r; 26 | unsigned long long cycle; 27 | }; 28 | 29 | struct args 30 | { 31 | int node_id; 32 | char Text_Trace; 33 | }; 34 | 35 | bool min_cycle(const Trace &a, const Trace &b) 36 | { 37 | return a.cycle < b.cycle ; 38 | } 39 | 40 | void *parse(void *t_data) 41 | { 42 | struct args *data=(struct args *)t_data; 43 | cout<node_id; 44 | vector tra; 45 | fstream trace; 46 | ofstream trace1,trace_out; 47 | string in,out; 48 | in="Output/Node"+to_string(data->node_id)+"/TraceFile.trc"; 49 | out="Trace_Node"+to_string(data->node_id)+".trc"; 50 | trace.open(in); 51 | if(data->Text_Trace=='y' || data->Text_Trace=='Y') 52 | trace_out.open(out); 53 | 54 | while(1) 55 | { 56 | Trace temp; 57 | trace.read((char*)&temp,sizeof(temp)); 58 | if(!trace.eof()) 59 | { 60 | tra.push_back(temp); 61 | } 62 | else 63 | break; 64 | } 65 | 66 | remove(in.c_str()); 67 | trace1.open(in); 68 | sort(tra.begin(),tra.end(),min_cycle); 69 | 70 | for(int j=0;jText_Trace=='y' || data->Text_Trace=='Y') 97 | trace_out<Text_Trace=='y' || data->Text_Trace=='Y') 110 | trace_out<>node_id; 130 | struct args t_data; 131 | char Text_Trace; 132 | cout<<"Press 'y' if you want to generate Text Trace\t:"; 133 | cin>>Text_Trace; 134 | 135 | pthread_t threads; 136 | 137 | t_data.Text_Trace=Text_Trace; 138 | t_data.node_id=node_id; 139 | pthread_create(&threads, NULL, parse, &t_data); 140 | 141 | pthread_exit(NULL); 142 | 143 | // parse(1); 144 | 145 | 146 | return 0; 147 | } 148 | -------------------------------------------------------------------------------- /DRackSim-Trace/Trace_Tool/readme.md: -------------------------------------------------------------------------------- 1 | ***Prerequisite for running Trace-Based Simulation:*** 2 | * Copy whole of this directory to $pin-path/source/tools/ 3 | * Download boost or use the copy provided and extract it into $pin-path/source/include/pin/ 4 | 5 | ***Running Memory Trace Tool*** 6 | -------------------------------------------------------------------------------- /DRackSim-Trace/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | using namespace std; 13 | 14 | long int max_cycle_num = 100000000; 15 | long int common_clock=0; 16 | long int result_cycle=10000000; 17 | 18 | //CPU frequency kept at 3.6-GHz, which is 3 times the memory latenct 19 | float CPU_Freq = 3.6; 20 | //DDR4 used with frequency 1200-MHz 21 | float Mem_Freq = 1.2; 22 | //It is important to keep CPU frequency as exact divisor of Mem frequency, Same way it is mapped with interconenct latency (3-times) 23 | float CPU_Freq_Times_Mem_Freq = (float)CPU_Freq/(float)Mem_Freq; 24 | int freq_ratio=ceil(CPU_Freq_Times_Mem_Freq); 25 | const int max_samples=10000; 26 | 27 | pthread_mutex_t lock, lock_mem; 28 | pthread_barrier_t b; 29 | sem_t sem1,sem2; 30 | #include"remote_mem_allocator.cpp" 31 | #include"inter_connect.cpp" 32 | #include"stats.cpp" 33 | 34 | 35 | string mem; 36 | void print_mem_stats(int); 37 | //function to simulate memory access at each node for local memory accesses and 38 | //pass remote accesses to remote_memory_handler 39 | void *node_handler(void *node) 40 | { 41 | vector tra; 42 | long node_id=(long)node; 43 | //cout<<"\n"<update(); 143 | } 144 | 145 | if(node_id==0) 146 | { 147 | if(common_clock%freq_ratio==(freq_ratio-1)) 148 | for (int i = 0; i < num_mem_pools; i++) 149 | remote_mem[i]->update(); 150 | 151 | simulate_network(); 152 | simulate_network_reverse(); 153 | 154 | if(common_clock == result_cycle) 155 | { 156 | cout<<"\nResults checkpoint created"; 157 | print_mem_stats(common_clock/result_cycle); 158 | } 159 | 160 | common_clock++; 161 | if(common_clock%100000==0) 162 | cout<<"\nCycle completed"<update(); 177 | 178 | if(node_id==0) 179 | { 180 | if(common_clock%freq_ratio==(freq_ratio-1)) 181 | for (int i = 0; i < num_mem_pools; i++) 182 | remote_mem[i]->update(); 183 | 184 | simulate_network(); 185 | simulate_network_reverse(); 186 | 187 | common_clock++; 188 | if(common_clock%100000==0) 189 | cout<<"\nCycle completed"<printStats(true); 290 | 291 | for(int i=0;iprintStats(true); 293 | 294 | print_mem_stats(-1); 295 | status_stat(); 296 | mem_stats.close(); 297 | out.close(); 298 | invalid.close(); 299 | track.close(); 300 | 301 | cout<<"\n====================Simulation_Complete====================\n"; 302 | // pthread_exit(NULL); 303 | 304 | /* L[0].display_mapping(); 305 | R[0].display_mapping(); 306 | 307 | // get a nice summary of this epoch 308 | // mem->printStats(true); 309 | 310 | cout<<"\nFree pages left "< 2 | #include 3 | #include 4 | #include 5 | 6 | ofstream out,mem_stats,invalid,netstats; 7 | 8 | #define Page_Size 4096 //in bytes 9 | 10 | class remote_addr_space; 11 | 12 | 13 | //maintains local-to-remote mapping at each node 14 | class node_remote_map 15 | { 16 | public: 17 | unsigned long local_base; //base address at local node address space 18 | unsigned long remote_base; //remote base address at remote pool address space 19 | unsigned long offset_mask; //unused 20 | int region_size; //size of the region assigned from remote to local 21 | //(multiple enteries can be there for one node to same remote pool) 22 | int mem_pool_no; //remote pool number at which memory is reserved 23 | }; 24 | 25 | class local_addr_space 26 | { 27 | long double memory_size; //in Giga-Bytes 28 | long double remote_mem_size; //total remote memory reserved 29 | unsigned long total_pages; //local+remote pages 30 | unsigned long allocated_pages; //used pages 31 | unsigned long local_allocated_pages; //used local pages 32 | unsigned long remote_allocated_pages; //used remote pages 33 | unsigned long free_pages; //free pages in all the memory 34 | unsigned long local_pages; // 35 | unsigned long remote_pages; 36 | int node_no; 37 | node_remote_map *remote_map; //node maintain local-remote mapping table 38 | int remote_map_index; 39 | 40 | public: 41 | local_addr_space(){} 42 | 43 | //add mem_size(GB) amount of local memory into the address space 44 | void add_local_memory(long double mem_size, int node_num) 45 | { 46 | remote_map_index=-1; 47 | node_no=node_num; 48 | memory_size=mem_size; 49 | total_pages=(pow(2.0,30.0) * memory_size) / 4096 ; 50 | local_pages=total_pages; 51 | remote_pages=0; 52 | allocated_pages=0; 53 | local_allocated_pages=0; 54 | remote_allocated_pages=0; 55 | free_pages=total_pages; 56 | remote_mem_size=0; 57 | cout<=0) 208 | { 209 | mem_stats<<"\n\t\t\t\t\t\tNode Remote-Memory Mapping Table Node-"<=0) 243 | { 244 | int last_pool=remote_map[remote_map_index].mem_pool_no; 245 | pools[last_pool]=pools[last_pool] - pages_left_in_last_shared_region; 246 | } 247 | 248 | for(int i=0;i0) 251 | mem_stats<<"\nPages in remote-pool-"<=new_pages) 425 | { 426 | 427 | L.add_remote_memory_entry(L.total_pages,R.allocated_pages,12,new_pages,R.mem_pool_no); 428 | L.total_pages=L.total_pages+new_pages; 429 | L.free_pages=L.free_pages+new_pages; 430 | L.remote_pages=L.remote_pages+new_pages; 431 | //cout<<"\nNew total_pages "< 95 | ``` 96 | 97 | - All the simulation statistics will be saved inside the specified folder. 98 | 99 | --- 100 | 101 | -------------------------------------------------------------------------------- /DRackSim-Trace/remote_mem_allocator.cpp: -------------------------------------------------------------------------------- 1 | int round_robin_pool_select(); 2 | int per_node_round_robin_pool_select(int); 3 | int min_access_count(); 4 | int smart_idle(); 5 | int Random(); 6 | int uni_distribution(int ); 7 | 8 | #include"mem_defs.cpp" 9 | 10 | 11 | //pool allocation policies 12 | 13 | 14 | static int round_robin_last=1; 15 | //round-robin memory pool allocation 16 | int round_robin_pool_select() 17 | { 18 | if(num_mem_pools==1) 19 | return 0; 20 | out<<"\n\n=======Cycle-"<num_mem_pools) 26 | { 27 | round_robin_last=round_robin_last%num_mem_pools; 28 | } 29 | round_robin_last++; 30 | out<<"\nSelected pool is- "<<(round_robin_last-2); 31 | return (round_robin_last-2); 32 | } 33 | 34 | 35 | 36 | static int node_round_robin_last[num_nodes]; 37 | //node-wise round-robin memory pool allocation 38 | int per_node_round_robin_pool_select(int node_no) 39 | { 40 | out<<"\n\n=======Cycle-"<num_mem_pools) 46 | { 47 | node_round_robin_last[node_no]=node_round_robin_last[node_no]%num_mem_pools; 48 | } 49 | node_round_robin_last[node_no]++; 50 | out<<"\nSelected pool for Node:"<num_mem_pools) 96 | { 97 | round_robin_last=round_robin_last%num_mem_pools; 98 | } 99 | round_robin_last++; 100 | out<<"\nSelected pool is- "<<(round_robin_last-2); 101 | alloc_count[round_robin_last-2]++; 102 | last_alloc=round_robin_last-2; 103 | return (round_robin_last-2); 104 | } 105 | else 106 | { 107 | for(int z=0;z0) 116 | { 117 | for(int j=0;j limit); 164 | 165 | out<<"\n\n=======Cycle-"<udp; 182 | vectorudp_pool_set[num_mem_pools]; 183 | 184 | 185 | bool compare_by_request_rate(const uniform_distribution &a, const uniform_distribution &b) 186 | { 187 | return a.request_rate > b.request_rate; 188 | } 189 | 190 | 191 | long int subset_sum(int pool) 192 | { 193 | long int subset_size=0; 194 | for(auto j=0;judp_set; 241 | // sort(udp.begin(),udp.end(),compare_by_request_rate); 242 | 243 | for(int i=0;i0) 246 | { 247 | aggr_request_rate+=udp[i].request_rate; 248 | udp_set.push_back(udp[i]); 249 | // track<<"\nnode: "<DRackSim Simulator 2 |

Hardware Disaggregated Memory systems Simulations at Scale

3 | 4 |

Usage Instructions

5 | 6 | ***Installing Pin*** 7 | * Download pin-3.20-98437-gf02b61307-gcc-linux.tar.gz from 8 | 9 | [https://www.intel.com/content/www/us/en/developer/articles/tool/pin-a-binary-instrumentation-tool-downloads.html](https://software.intel.com/sites/landingpage/pintool/downloads/pin-3.20-98437-gf02b61307-gcc-linux.tar.gz) 10 | 11 | ***Detailed Cycle-Level Simulation model of DRACKSim:*** 12 | * Go in directory DRACKSim-Detailed and follow the instructions in readme.md file 13 | 14 | ***Traced-based Simulation model of DRACKSim:*** 15 | * Go in directory DRackSim-Trace and follow the instructions in readme.md file 16 | 17 | 18 | ***For validation of DRACKSim-Detailed with gem5:*** 19 | * Clone the copy of gem5 to you system with #Version 22.1.0.0 20 | * Apply gem5.patch to the original gem5 code, this will calibrate different latencies and add suitable cache-levels to match DRACKSim simulator to the validation target 21 | * You can use the commands mentioned in gem5_commands_to_validate.sh, that we use to validate 22 | 23 | 24 | ***If you use this tool, please cite it as:*** 25 | ``` 26 | @inproceedings{10.1145/3615979.3656059, 27 | author = {Puri, Amit and Bellamkonda, Kartheek and Narreddy, Kailash and Jose, John and Tamarapalli, Venkatesh and Narayanan, Vijaykrishnan}, 28 | title = {DRackSim: Simulating CXL-enabled Large-Scale Disaggregated Memory Systems}, 29 | year = {2024}, 30 | isbn = {9798400703638}, 31 | publisher = {Association for Computing Machinery}, 32 | address = {New York, NY, USA}, 33 | url = {https://doi.org/10.1145/3615979.3656059}, 34 | doi = {10.1145/3615979.3656059}, 35 | booktitle = {Proceedings of the 38th ACM SIGSIM Conference on Principles of Advanced Discrete Simulation}, 36 | pages = {3–14}, 37 | numpages = {12}, 38 | keywords = {CXL, Data Centers, Performance Evaluation, Simulation}, 39 | location = {, Atlanta, GA, USA, }, 40 | series = {SIGSIM-PADS '24} 41 | } 42 | ``` 43 | --------------------------------------------------------------------------------