├── .gitignore
├── README.md
└── inst-sim
    ├── Core.py
    ├── GlobalVars.py
    ├── Main.py
    ├── Memory.py
    ├── NoC.py
    ├── Operations.py
    ├── Profile.py
    ├── Tile.py
    ├── config.py
    ├── data_convert.py
    ├── instrn_proto.py
    ├── result
        └── mlp
        │   └── result.txt
    ├── tile_instrn_proto.py
    └── workload
        ├── LSTM2048.tar.gz
        ├── bigLSTM.tar.gz
        ├── mlp.tar.gz
        ├── nmt.tar.gz
        ├── vgg16_small.tar.gz
        └── vgg19_small.tar.gz


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.cfg
 2 | *.swp
 3 | *.dot
 4 | *.pdf
 5 | *.png
 6 | *.pyc
 7 | !*.py
 8 | !*.pyx
 9 | !*/*.py
10 | !*/*.pyx
11 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PIM-SW/PIM-Simulator/40699dc06e6154e7c1a794631eb66b6e24737e08/README.md


--------------------------------------------------------------------------------
/inst-sim/Core.py:
--------------------------------------------------------------------------------
  1 | import GlobalVars as gv
  2 | import numpy as np
  3 | import Operations
  4 | from data_convert import *
  5 | import Profile as pf
  6 | import sys
  7 | 
  8 | class Core:
  9 |     def __init__(self, num, tile):
 10 |         self.num = num
 11 |         self.tile = tile
 12 |         self.inst_list = self.load_inst()
 13 |         self.cyc = 0
 14 |         self.pc = 0
 15 |         self.work_cyc = 0 # core is busy for next work_cyc
 16 |         self.is_halted = False
 17 |         self.reg = {}
 18 |         
 19 |     def load_inst(self):
 20 |         filename = "%s/tile%d/core_imem%d.npy" \
 21 |             % (gv.params["foldername"], self.tile.num, self.num)
 22 |         inst_list = np.load(filename, allow_pickle=True)
 23 | 
 24 |         return inst_list
 25 | 
 26 |     def debug(self, inst):
 27 |         if gv.debug_enabled == False: return
 28 |         #if self.cyc > gv.last_debug_cyc: print ("")
 29 |         gv.last_debug_cyc = self.cyc
 30 |         #print ("[%4d] Tile %d ; Core %2d ; pc %2d: %s    //" 
 31 |         #    % (self.cyc, self.tile.num, self.num, self.pc, inst['opcode']), inst)
 32 |         sys.stdout.flush();
 33 | 
 34 | 
 35 |     def advance(self):
 36 |         self.cyc += 1
 37 | 
 38 |         if self.is_halted == True:
 39 |             return
 40 | 
 41 |         if self.work_cyc > 0:
 42 |             self.work_cyc -= 1
 43 | 
 44 |             if self.work_cyc == 0:
 45 |                 self.pc += 1
 46 |             else: return
 47 | 
 48 |         inst = self.inst_list[self.pc]
 49 | 
 50 |         if inst['opcode'] == 'st':
 51 |             if self.tile.mem_wait == 0:
 52 |                 mem_addr = self.reg[inst['d1']]
 53 |                 counter = inst['r2']
 54 |                 width = inst['imm']
 55 |                 vec = inst['vec']
 56 |                 dat = inst['r1']
 57 | 
 58 |                 #assert (not mem_addr in self.tile.shared_mem)\
 59 |                 #    or self.tile.shared_mem[mem_addr] == 0
 60 | 
 61 |                 #self.tile.shared_mem[mem_addr] = counter
 62 |                 data = 0
 63 |                 if dat in self.reg: data = self.reg[dat]
 64 | 
 65 |                 self.tile.memory.allocate(mem_addr, counter, data)
 66 | 
 67 |                 #self.tile.mem_wait = vec # assuming 1cyc/vec-write
 68 |                 #self.work_cyc = vec
 69 |                 self.tile.mem_wait = 1 # assuming 1cyc/vec-write
 70 |                 self.work_cyc = 1
 71 | 
 72 |                 self.debug(inst)
 73 |                 pf.call_stack["Store"] += 1
 74 |                     
 75 |             else: pass # store block
 76 | 
 77 |         elif inst['opcode'] == 'ld':
 78 |             if self.tile.mem_wait == 0:
 79 |                 mem_addr = self.reg[inst['r1']]
 80 |                 vec = inst['vec']
 81 | 
 82 |                 #if mem_addr in self.tile.shared_mem\
 83 |                 #    and self.tile.shared_mem[mem_addr] > 0:
 84 |                 accessed, data = self.tile.memory.access(mem_addr)
 85 |                 if accessed:
 86 |                     self.reg[inst['d1']] = data
 87 | 
 88 |                     #self.tile.mem_wait = vec # assuming 1cyc/vec-read
 89 |                     #self.work_cyc = vec
 90 |                     self.tile.mem_wait = 1 # assuming 1cyc/vec-read
 91 |                     self.work_cyc = 1
 92 |                     #self.tile.shared_mem[mem_addr] -= 1
 93 |                     #
 94 |                     #if self.tile.shared_mem[mem_addr] == 0:
 95 |                     #    del self.tile.shared_mem[mem_addr] # free the space
 96 | 
 97 |                     self.debug(inst)
 98 |                     pf.call_stack["Load"] += 1
 99 | 
100 |             else: pass # load block
101 | 
102 |         elif inst['opcode'] == 'hlt':
103 |             self.is_halted = True
104 |             self.tile.halted_core_num += 1
105 | 
106 |             self.debug(inst)
107 | 
108 |         elif inst['opcode'] == 'alu' or inst['opcode'] == 'alui':
109 |             vec = inst['vec']
110 |             #self.work_cyc = Operations.latency[inst['opcode']] * vec # possible pipelining of multiple vectors / possible parallelization using multiple alu unit
111 |             self.work_cyc = Operations.latency[inst['opcode']] * 1 # possible pipelining of multiple vectors / possible parallelization using multiple alu unit
112 | 
113 |             self.debug(inst)
114 |             if inst['opcode'] == 'alu': pf.call_stack["ALU"] += 1
115 |             else: pf.call_stack["ALUI"] += 1
116 | 
117 |         elif inst['opcode'] == 'mvm':
118 |             self.work_cyc += Operations.latency[inst['opcode']]
119 | 
120 |             self.debug(inst)
121 |             pf.call_stack["MVM"] += 1
122 | 
123 |         elif inst['opcode'] == 'cp':
124 |             self.work_cyc += Operations.latency[inst['opcode']]
125 | 
126 |             self.debug(inst)
127 |             pf.call_stack["Copy"] += 1
128 | 
129 |         elif inst['opcode'] == 'set':
130 |             self.work_cyc += Operations.latency[inst['opcode']]
131 | 
132 |             reg_addr = inst['d1']
133 |             imm = inst['imm']
134 |             vec = inst['vec']
135 |             #assert vec == 1
136 | 
137 |             self.reg[reg_addr] = bin2int(imm, 22) # 22: default compiler config - address bit #
138 |             self.debug(inst)
139 |             pf.call_stack["Set"] += 1
140 | 
141 |         else:
142 |             self.debug(inst)
143 |             raise NotImplementedError
144 | 
145 | 


--------------------------------------------------------------------------------
/inst-sim/GlobalVars.py:
--------------------------------------------------------------------------------
 1 | debug_enabled = True
 2 | last_debug_cyc = 0
 3 | 
 4 | params = {}
 5 | NoC = None
 6 | halted_tile_num = 0
 7 | fifo_num = 0
 8 | total_inst = 0
 9 | 
10 | def ind_to_coord(ind):
11 |     return ind % params["tile_x"], ind // params["tile_x"]
12 | 


--------------------------------------------------------------------------------
/inst-sim/Main.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | import math
 4 | import config as cfg
 5 | import numpy as np
 6 | import GlobalVars as gv
 7 | import Profile as pf
 8 | import NoC
 9 | import Tile
10 | from collections import deque
11 | from collections import defaultdict
12 | from queue import PriorityQueue
13 | from cProfile import Profile
14 | from pstats import Stats 
15 | 
16 | def init():
17 |     # workload name
18 |     gv.params["workload"] = sys.argv[1]
19 |     gv.params["foldername"] = "workload/%s" % (sys.argv[1])
20 |     gv.params["result_foldername"] = "result/%s" % (sys.argv[1])
21 |     if not os.path.isdir(gv.params["foldername"]):
22 |         raise("No workload folder exists")
23 |     if not os.path.isdir(gv.params["result_foldername"]):
24 |         os.mkdir(gv.params["result_foldername"])
25 | 
26 |     os.listdir()
27 |     total_cores = 0
28 |     total_tiles = 0
29 | 
30 |     ##
31 |     for _, dirnames, filenames in os.walk(gv.params["foldername"]):
32 |         for file in filenames:
33 |             if "-core" in file and not ".swp" in file:
34 |                 total_cores += 1
35 |         total_tiles += len(dirnames)
36 | 
37 |     gv.params["tile_x"] = int(math.sqrt(total_tiles))
38 |     gv.fifo_num = total_tiles
39 | 
40 |     assert(total_cores % total_tiles == 0)
41 | 
42 |     ##
43 |     gv.params["tile_num"] = int(total_tiles)
44 |     gv.params["core_num"] = int(total_cores / total_tiles)
45 | 
46 |     gv.NoC = NoC.NoC()
47 | 
48 | def simulate():
49 |     pf.cyc = 0
50 |     while gv.halted_tile_num < gv.params["tile_num"]:
51 |         pf.cyc += 1
52 |         gv.NoC.advance()
53 |         
54 | 
55 | def stat():
56 |     result_path = os.path.join(gv.params["result_foldername"], "result.txt")
57 |     result_file = open(result_path, "w")
58 | 
59 |     result_file.write("\n====CPI STACK====\n")
60 |     result_file.write("total cyc: {}\n".format(pf.cyc))
61 |     #print ("\n====CPI STACK====")
62 |     #print ("total cyc: {}".format(pf.cyc))
63 |     sum_v = 0
64 |     for _, v in pf.call_stack.items():
65 |         sum_v += v
66 |     for k, v in pf.call_stack.items():
67 |         result_file.write("{} : {} / {}\n".format(k, v, sum_v))
68 |         #print ("{} : {} / {}".format(k, v, sum_v))
69 | 
70 |     result_file.write("\n====LINK====\n")
71 |     #print ("\n====LINK====")
72 |     result_file.write("busiest link: {}KB\n".format(float(pf.busiest_link_data) / (8*1024)))
73 |     #print ("busiest link: {}KB".format(float(pf.busiest_link_data) / (8*1024)))
74 | 
75 |     result_file.write("\n====MAX MEMORY SIZE====\n")
76 |     #print("\n====MAX MEMORY SIZE====")
77 |     for tile in gv.NoC.tiles:
78 |         result_file.write("tile id: {}\t\t\tphysical size: {}\t\t\tvirtual size: {}\n".format(
79 |             tile.num, tile.memory.max_physical_size, len(tile.memory.virtual_mem) * cfg.xbar_size))
80 |         #print(tile.num, tile.memory.max_physical_size, 
81 |         #    len(tile.memory.virtual_mem) * gv.MVMU_DIM)
82 | 
83 | def testrun():
84 |     init()
85 |     simulate()
86 |     stat()
87 | 
88 | profiler = Profile()
89 | profiler.runcall(testrun)
90 | 
91 | stats = Stats(profiler)
92 | stats.strip_dirs()
93 | stats.sort_stats('tottime')
94 | stats.print_stats()
95 | 


--------------------------------------------------------------------------------
/inst-sim/Memory.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import GlobalVars as gv
 3 | import Profile as pf
 4 | import config as cfg
 5 | import Tile
 6 | import sys
 7 | 
 8 | class Memory:
 9 |     def __init__(self, tile):
10 |         self.tile = tile
11 | 
12 |         # map virtual address to the physical address
13 |         # convert the virtual address 
14 |         # to the physical address
15 |         self.virtual_to_physical = {}
16 |         
17 |         # physical memory (counter (0), virtual addr (1), data(2))
18 |         self.physical_mem = {}
19 |         self.max_physical_size = 0
20 | 
21 |         self.virtual_mem = {}
22 | 
23 |     def allocate(self, virtual_addr, counter, data = 0):
24 |         # check alignment
25 |         assert(virtual_addr % cfg.xbar_size == 0)
26 | 
27 |         self.virtual_mem[virtual_addr] = 0
28 | 
29 |         # first check if the virtual address & count
30 |         assert ((not virtual_addr in self.virtual_to_physical)\
31 |             or (self.physical_mem[self.virtual_to_physical[virtual_addr]][0] == 0))
32 | 
33 |         # search for the empty space & allocate to the physical memory
34 |         target_addr = 0
35 |         while(True):
36 |             # if the physical address has not been allocated
37 |             # or the counter is zero
38 |             if(not target_addr in self.physical_mem):
39 |                 # then add a new entry to the dictionary
40 |                 self.virtual_to_physical[virtual_addr] = target_addr
41 |                 # allocate the new entry to the physical mem
42 |                 self.physical_mem[target_addr] = [counter, virtual_addr, data]
43 |                 break
44 |             target_addr += cfg.xbar_size
45 |         
46 |         # renew the maximum size
47 |         if(target_addr > self.max_physical_size): self.max_physical_size = target_addr
48 | 
49 |     def access(self, virtual_addr):
50 |         # if the virtual address exist & the physical memory's counter is not zero
51 | 
52 |         if ((virtual_addr in self.virtual_to_physical)\
53 |             and (self.physical_mem[self.virtual_to_physical[virtual_addr]][0] != 0)):
54 |             
55 |             physical_addr = self.virtual_to_physical[virtual_addr]
56 |             self.physical_mem[physical_addr][0] -= 1
57 |             data = self.physical_mem[physical_addr][2]
58 | 
59 |             if(self.physical_mem[physical_addr][0] == 0):
60 |                 self.virtual_to_physical.pop(self.physical_mem[physical_addr][1])
61 |                 assert(not self.physical_mem[physical_addr][1] in self.virtual_to_physical)
62 |                 self.physical_mem.pop(physical_addr)
63 |                 assert(not physical_addr in self.physical_mem)
64 |                 #del self.virtual_to_physical[self.physical_mem[physical_addr][1]]
65 |             return True, data
66 | 
67 |         return False, -1
68 |         
69 | 


--------------------------------------------------------------------------------
/inst-sim/NoC.py:
--------------------------------------------------------------------------------
 1 | import GlobalVars as gv
 2 | from queue import PriorityQueue
 3 | import Tile
 4 | import Profile as pf
 5 | 
 6 | class NoC:
 7 | 
 8 |     def __init__(self):
 9 |         self.tiles = [Tile.Tile(i, self) for i in range(gv.params["tile_num"])]
10 |         self.cyc = 0
11 |         self.packet_queue = PriorityQueue()
12 | 
13 |         self.total_inst = 0
14 |         for tile in self.tiles: 
15 |             self.total_inst += tile.total_inst
16 | 
17 |         self.pc = 0
18 |         self.pc_prev = 0
19 | 
20 |     def send_packets(self, src_tile_num, dst_tile_num, vtile_id, packet_num, wait_cyc):
21 |         packets = (self.cyc + wait_cyc + self.getRoutingLatency(src_tile_num, dst_tile_num),
22 |                 dst_tile_num, vtile_id, packet_num)
23 |         self.packet_queue.put(packets)
24 | 
25 |     def advance(self):
26 |         self.cyc += 1
27 |         while True:
28 |             if self.packet_queue.empty() == True: break
29 |             (arrival_cyc, dst_tile_num, vtile_id, packet_num) = self.packet_queue.queue[0]
30 |             if arrival_cyc > self.cyc: break
31 |             self.packet_queue.get()
32 |             self.tiles[dst_tile_num].fifo[vtile_id] += packet_num
33 |             
34 | 
35 |         self.pc = 0
36 |         for tile in self.tiles:
37 |             self.pc += tile.pc
38 |             tile.advance()
39 |             for core in tile.cores:
40 |                 self.pc += core.pc
41 | 
42 |         self.pc_prev = self.pc
43 | 
44 |         pf.progress(self.pc, self.total_inst)
45 | 
46 |     # FIXME: add BW support
47 |     def getRoutingLatency(self, src_tile_num, dst_tile_num):
48 |         src_x, src_y = gv.ind_to_coord(src_tile_num)
49 |         dst_x, dst_y = gv.ind_to_coord(dst_tile_num)
50 | 
51 |         return abs(src_x - dst_x) + abs(src_y - dst_y)
52 | 
53 | 


--------------------------------------------------------------------------------
/inst-sim/Operations.py:
--------------------------------------------------------------------------------
 1 | op_list = ['ld', 'cp', 'st', 'set', 'nop', 'alu', 'alui', 'mvm', 'vvo', 'hlt', 'jmp', 'beq', 'alu_int', 'crs']
 2 | aluop_list = ['add', 'sub', 'sna', 'mul', 'sigmoid'] # sna is also used by mvm isntruction
 3 | op_list_tile = ['send', 'receive', 'halt']
 4 | 
 5 | latency = {"ld": None,
 6 |            "cp": 1,
 7 |            "st": None,
 8 |            "set": 1,
 9 |            "nop": 1,
10 |            "alu": 1,
11 |            "alui": 1,
12 |            "mvm": 1000,
13 |            "vvo": 1,
14 |            "hlt": 1,
15 |            "jmp": 1,
16 |            "beq": 1,
17 |            "alu_int": 1,
18 |            "crs": 1,
19 |            "send": None,
20 |            "receive": None,
21 |            "halt": 1
22 |           }
23 | 
24 | 


--------------------------------------------------------------------------------
/inst-sim/Profile.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | 
 4 | def progress(count, total):
 5 |     bar_len = 60
 6 |     filled_len = int(round(bar_len * count / float(total)))
 7 | 
 8 |     percents = round(100.0 * count / float(total), 1)
 9 |     bar = '=' * filled_len + '-' * (bar_len - filled_len)
10 | 
11 |     sys.stdout.write('progress: [%s] %s%s\r' % (bar, percents, '%'))
12 |     sys.stdout.flush()
13 | 
14 | passed_data = None # key:((src_x, src_y), (dst_x, dst_y)), value:data_bits
15 | busiest_link = None
16 | busiest_link_data = -1
17 | cyc = None
18 | cpi_stack = {"Copy": 0,
19 |            "Load": 0,
20 |            "Store": 0,
21 |            "Send": 0,
22 |            "Receive": 0,
23 |            "MVM": 0,
24 |            "ALU": 0,
25 |            "ALUI": 0,
26 |            "Set": 0,
27 |            "WriteInput": 0,
28 |            "ReadOutput": 0}
29 | 
30 | call_stack = {"Copy": 0,
31 |            "Load": 0,
32 |            "Store": 0,
33 |            "Send": 0,
34 |            "Receive": 0,
35 |            "MVM": 0,
36 |            "ALU": 0,
37 |            "ALUI": 0,
38 |            "Set": 0,
39 |            "WriteInput": 0,
40 |            "ReadOutput": 0}
41 | 


--------------------------------------------------------------------------------
/inst-sim/Tile.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import GlobalVars as gv
  3 | import Profile as pf
  4 | import NoC
  5 | import Core
  6 | import Memory
  7 | import sys
  8 | 
  9 | class Tile:
 10 |     def __init__(self, num, noc):
 11 |         self.num = num
 12 |         self.noc = noc
 13 |         #self.x, self.y = gv.ind_to_coord(num)
 14 |         self.inst_list = self.load_inst()
 15 |         self.cyc = 0
 16 |         self.pc = 0
 17 |         self.send_wait_cyc = 0
 18 |         self.is_halted = False
 19 |         self.halted_core_num = 0
 20 | 
 21 |         self.fifo = [0 for _ in range(gv.fifo_num)] # (# of packets) in the fifo
 22 | 
 23 |         self.mem_wait = 0 # how many cycles left to finish serving current request
 24 | 
 25 |         self.memory = Memory.Memory(self)
 26 |         self.cores = [Core.Core(i, self) for i in range(gv.params["core_num"])]
 27 | 
 28 |         self.total_inst = len(self.inst_list)
 29 |         for core in self.cores: self.total_inst += len(core.inst_list)
 30 | 
 31 | 
 32 |     def load_inst(self):
 33 |         filename = "%s/tile%d/tile_imem.npy" % (gv.params["foldername"], self.num)
 34 |         inst_list = np.load(filename, allow_pickle=True)
 35 |         return inst_list
 36 | 
 37 |     def debug(self, inst):
 38 |         if gv.debug_enabled == False: return
 39 |         #if self.cyc > gv.last_debug_cyc: print ("")
 40 |         gv.last_debug_cyc = self.cyc
 41 |         #print ("[%4d] Tile %d ;         ; pc %2d: %s    //" % (self.cyc, self.num, self.pc, inst['opcode']), inst)
 42 |         sys.stdout.flush();
 43 | 
 44 |     def advance(self):
 45 | 
 46 |         self.cyc += 1
 47 |         if self.is_halted == True:
 48 |             return
 49 | 
 50 |         if self.mem_wait > 0:
 51 |             self.mem_wait -= 1
 52 | 
 53 |         if self.send_wait_cyc > 0:
 54 |             self.send_wait_cyc -= 1
 55 |             
 56 |         inst = self.inst_list[self.pc]
 57 |         
 58 |         if inst['opcode'] == 'send':
 59 |             if self.mem_wait == 0:
 60 |                 mem_addr = inst['mem_addr']
 61 |                 #if self.num == 0 or \
 62 |                 #    (mem_addr in self.shared_mem and self.shared_mem[mem_addr] != 0):
 63 |                 accessed = True
 64 |                 if self.num != 0: accessed, data = self.memory.access(mem_addr)
 65 |                 if accessed:
 66 | 
 67 |                     vtile_id = inst['vtile_id']
 68 |                     send_width = inst['r1']
 69 |                     target_tile_num = inst['r2']
 70 |                     vec = inst['vec']
 71 | 
 72 |                     # send_width: scalar #, 16: bit-precision, 32: packet size
 73 |                     packet_num = ((send_width*16+31) / 32) * vec
 74 |                     self.noc.send_packets(self.num, target_tile_num, vtile_id, packet_num, self.send_wait_cyc)
 75 |                     
 76 |                     # shared mem ==> packet geneartion
 77 |                     #self.mem_wait = vec # assuming 1cyc/vec-read, decoupled packet gen
 78 |                     self.mem_wait = 1 # assuming 1cyc/vec-read, decoupled packet gen
 79 | 
 80 |                     # decoupled packet gen, assuming 1cyc/packet 
 81 |                     #self.send_wait_cyc += packet_num
 82 |                     self.send_wait_cyc += 1
 83 | 
 84 |                     self.debug(inst)
 85 |                     self.pc += 1
 86 |                     pf.call_stack["Send"] += 1
 87 | 
 88 |             else: pass #send block - shared memory contention
 89 | 
 90 |         elif inst['opcode'] == 'receive':
 91 |             if self.mem_wait == 0:
 92 |                 mem_addr = inst['mem_addr']
 93 |                 vtile_id = inst['vtile_id']
 94 |                 receive_width = inst['r1']
 95 |                 counter = inst['r2']
 96 |                 vec = inst['vec']
 97 | 
 98 | 
 99 |                 if self.fifo[vtile_id] > 0:
100 |                     #assert (not mem_addr in self.shared_mem)\
101 |                     #    or self.shared_mem[mem_addr] == 0
102 | 
103 |                     packet_num = ((receive_width*16+31) / 32) * vec
104 |                     assert packet_num <= self.fifo[vtile_id]
105 | 
106 |                     self.fifo[vtile_id] -= packet_num
107 |                     self.memory.allocate(mem_addr, counter)
108 | 
109 |                     # fifo ==> shared mem
110 |                     #self.mem_wait = vec # assuming 1cyc/vec-write
111 |                     self.mem_wait = 1 # assuming 1cyc/vec-write
112 | 
113 |                     self.debug(inst)
114 |                     self.pc += 1
115 |                     pf.call_stack["Receive"] += 1
116 | 
117 |             else: 
118 |                 pass #receive block
119 | 
120 |         elif inst['opcode'] == 'halt':
121 |             if self.halted_core_num == gv.params["core_num"]:
122 |                 self.is_halted = True
123 |                 gv.halted_tile_num += 1
124 | 
125 |                 self.debug(inst)
126 |             
127 |         else:
128 |             raise NotImplementedError
129 | 
130 |         for core in self.cores:
131 |             core.advance()
132 | 


--------------------------------------------------------------------------------
/inst-sim/config.py:
--------------------------------------------------------------------------------
 1 | ## Variable to define the type of MVMU
 2 | # One of "Analog", "Digital" (To be added), "PNM" (To be added)
 3 | MVMU_ver = "Analog"
 4 | 
 5 | ## Operand precision (fixed point allowed only): num_bits = int_bits + frac_bits
 6 | num_bits = 16
 7 | 
 8 | ## IMA configurable parameters (permissible values for each parameter provided here)
 9 | ## Instruction generation - affected by xbar_bits, num_xbar, xbar_size.
10 | # xbar_bits: 2, 4, 6
11 | # num_xbar: positive integer
12 | # xbar_size: 32, 64, 128, 256
13 | # dac_res: positive integer <= num_bits
14 | # adc_res: positive integer <= num_bits
15 | # num_adc: positive integer <= num_xbar (doesn't allow more than one ADC per xbar)
16 | # num_ALU: positive integer
17 | # dataMem_size: (in Bytes) - 256, 512, 1024, 2048 (affects instrn width, hence capped)
18 | # instrnMem_size: (in Bytes) - 512, 1024, 2048
19 | 
20 | # Fixed parameters
21 | addr_width = 32 # Added to address larger address space for conv layers (#TODO: Compiler needs to fix shared memory reuse)
22 | data_width = num_bits # (in bits)
23 | xbdata_width = data_width # (in bits)
24 | instrn_width = 48 # (in bits)
25 | 
26 | 
27 | # Input and Weight parameters
28 | input_prec = 16
29 | weight_width = 16
30 | # Change here - Specify the IMA parameters here
31 | xbar_bits = 2
32 | 
33 | #FIXME make num_matrix equal to N_CONSTANT_MVMUS_PER_CORE 
34 | num_matrix = 4 # each matrix is 1-fw logical xbar for inference and 1-fw, 1-bw, and 1 delta logical xbar for training. Each logical xbar for inference is 8-fw physical xbar and for training  8-fw, 8-bw and 16-delta physical xbars.
35 | xbar_size = 128
36 | dac_res = 1
37 | # ADC configuration
38 | adc_res = 8 # around 4 to 8. this value should be
39 | num_adc_per_matrix = 2
40 | num_adc = num_adc_per_matrix * num_matrix
41 | 
42 | # The idea is to have different ADC resolution value for each ADC.
43 | # The number of ADC if defined by num_adc property. Currently it is 2 * num_matrix(2) = 4
44 | # NOTE: Only taking in account indexes 0 and 2, 1 and 3 are ignored, because ADCs 1 and 3 are assumed t be equal to 0 and 2. 
45 | 
46 | num_ALU = num_matrix*2
47 | #dataMem_size = num_matrix*(6*xbar_size) # 4 for 4 input spaces within matrix (1 for f/b each, 2 for d)
48 | dataMem_size = 2048 # 2048 is larger than num_matrix*(6*xbar_size)
49 | instrnMem_size = 512 #in entries
50 | 
51 | ## Tile configurable parameters (permissible values for each parameter provided here)
52 | ## Instruction generation - affected by num_ima
53 | # num_ima: positive integer
54 | # edram buswidth: positive integer <= 16 (actual buswidth - this integer*data_width)
55 | # edram_size: (in KiloBytes) - 64, 128, 256, 512
56 | # receive_buffer_depth: 4, 8, 12, 16, 32 (number of edram buffer entries (each entry maps to a virtual tile)) \
57 | #        puts a cap on the maximum num ber of tiles that can send data to a tile in next layer
58 | # receive_buffer_width: edram_buswidth/data_width (Fixed - in terms of number of neurons)
59 | # tile_instrnMem_size: 256, 512, 1024 (in Bytes)
60 | 
61 | # Fixed parameters
62 | instrn_width = 48 # bits (op-2, vtile_id-6, send/receive_width-8, target_addr/counter-16, vw-8, mem_addr-16)
63 | edram_buswidth = 256 # in bits
64 | #receive_buffer_depth = 16
65 | receive_buffer_depth = 150 #set equal to num_tile_max
66 | receive_buffer_width =  edram_buswidth / num_bits # size of receive buffeer entry (in terms of number of neurons)
67 | 
68 | # Change here - Specify the Tile parameters here
69 | num_ima = 8
70 | edram_size = 64 # in Kilobytes (64 KB - same as issac)
71 | tile_instrnMem_size = 2048 # in entries
72 | 
73 | ## Node configurable parameters (permissible values for each parameter provided here)
74 | ## Instruction generation - affected by num_tile
75 | # num_tile_compute =  positive integer
76 | # inj_rate < 0.2 (depends on the mapping)
77 | # num_port: 4, 8
78 | 
79 | # Fixed parameters
80 | # NOC topology: cmesh (n=2, k=4, c=4) - can fit k*n*c tiles
81 | cmesh_c = 4
82 | num_bits_tileId =32
83 | flit_width = 32
84 | packet_width = edram_buswidth/data_width #in multiples of flits (data considered only - booksim consider address itself)
85 | # (b bit of address = logN, N is the number of nodes)
86 | 
87 | 
88 | ## Node parameters - Our way of simulation just assumes all tile in one actual node
89 | # Change here - Specify the Node parameters here (FIXME to be supported)
90 | num_node = 1
91 | num_tile_max = 138.0 # maximum number of tiles per node
92 | 


--------------------------------------------------------------------------------
/inst-sim/data_convert.py:
--------------------------------------------------------------------------------
  1 | # APIs to convert data from:
  2 | # 1. float to fixed point binary (2s complement) [float to bit-string]
  3 | # 2. fixed point binary (2s complement) to float [bit-string to float]
  4 | # 3. integer to binary (2s complement) [int to bit-string]
  5 | # 4. binary (2s complement) to inetger [bit-string to int]
  6 | import numpy as np
  7 | 
  8 | def bin2int (binary_string, bits):
  9 |     val = int (binary_string,2)
 10 |     if (val & (1 << (bits - 1))) != 0: # if sign bit is set e.g., 8bit: 128-255
 11 |         val = val - (1 << bits)        # compute negative value
 12 |     return val
 13 | 
 14 | def int2bin (int_data, bits):
 15 |     data_str = bin(int_data & (2**bits-1))[2:].zfill(bits)
 16 |     return data_str
 17 | 
 18 | def float2fixed (float_data, int_bits, frac_bits):
 19 |     temp = float_data * (2**frac_bits)
 20 |     temp = int(round (temp))
 21 |     return int2bin (temp, (int_bits+frac_bits))
 22 | 
 23 | def fixed2float (binary_string, int_bits, frac_bits):
 24 |     temp = bin2int (binary_string, (int_bits + frac_bits))
 25 |     return float(temp) / (2**frac_bits)
 26 | 
 27 | # defining float (2d numpy float array) <-> fixed (2d list of strings) conversion
 28 | def float2fixed_2d (float_data_arr, int_bits, frac_bits):
 29 |     (num_row, num_col) = np.shape(float_data_arr)
 30 |     # input type - array, outpt type = 2d list
 31 |     #out_list = [['']*num_col] * num_row
 32 |     out_list = [['' for i in range(num_col)] for j in range(num_row)]
 33 |     for i in range (num_row):
 34 |         for j in range (num_col):
 35 |             float_data = float_data_arr[i,j]
 36 |             out_list[i][j] = float2fixed (float_data, int_bits, frac_bits)
 37 |     return out_list
 38 | 
 39 | # defining fixed (2d list of string) <-> float (2d numpy float array) conversion
 40 | def fixed2float_2d (binary_string_list, int_bits, frac_bits):
 41 |     (num_row, num_col) = np.shape(binary_string_list)
 42 |     # input type - 2d list, outpt type = array
 43 |     out_arr = np.zeros((num_row, num_col), dtype=float)
 44 |     for i in range (num_row):
 45 |         for j in range (num_col):
 46 |             binary_string = binary_string_list[i][j]
 47 |             out_arr[i, j] = fixed2float (binary_string, int_bits, frac_bits)
 48 |     return out_arr
 49 | 
 50 | # defiing a fuction to extract a given num of bits from each element of a 2d binary_string_list
 51 | def getBitsFromList (binary_string_list, start_bit, num_bit):
 52 |     (num_row, num_col) = np.shape(binary_string_list)
 53 |     # input type - 2d list, outpt type = 2d list
 54 |     out_list = [['']*num_col] * num_row
 55 |     for i in range (num_row):
 56 |         for j in range (num_col):
 57 |             out_list[i][j] = binary_string_list[i][j][start_bit:start_bit + num_bit]
 58 |     return out_list
 59 | 
 60 | 
 61 | ## Obsolete - because they were long and less readable
 62 | '''def bin2frac (binary_string):
 63 |     result = 0
 64 |     ex = 2.0
 65 |     for c in binary_string:
 66 |         if c == '1':
 67 |             result += 1/ex
 68 |         ex *= 2
 69 |     return result
 70 | 
 71 | def frac2bin (frac, bits):
 72 |     result = ''
 73 |     ex = 2.0
 74 |     for i in range (bits):
 75 |         frac = frac * ex
 76 |         result += str(int(frac))
 77 |         frac = frac - int(frac)
 78 |     return result
 79 | 
 80 | def float2fixed2 (float_data, int_bits, frac_bits):
 81 |     temp = abs(float_data)
 82 |     int_part = int (temp)
 83 |     bin1 = int2bin (int_part, int_bits)
 84 |     frac_part = temp - int_part
 85 |     bin2 = frac2bin (frac_part, frac_bits)
 86 | 
 87 |     if (float_data >= 0):
 88 |         return bin1 + bin2
 89 |     else:
 90 |         result = -1 * int ((bin1 + bin2), 2)
 91 |         result = int2bin (result, int_bits + frac_bits)
 92 |         return result
 93 | 
 94 | def fixed2float2 (binary_string, int_bits, frac_bits):
 95 |     if (binary_string[0] == 1):
 96 |         print ('aho negative')
 97 |         num = -1 * bin2int (binary_string, int_bits + frac_bits) #negative integer
 98 |         binary_string = int2bin (num, int_bits + frac_bits)
 99 | 
100 |     return bin2int (binary_string[0:int_bits], int_bits) + \
101 |             bin2frac (binary_string[int_bits:])'''
102 | 
103 | # Test the above functions
104 | '''import numpy as np
105 | num = 4
106 | int_bits = 4
107 | frac_bits = 12
108 | 
109 | inp_float = np.random.rand(num)
110 | print ('original: ', inp_float)
111 | 
112 | inp1 = ['']*num
113 | inp2 = ['']*num
114 | for i in range (num):
115 |     inp1[i] = float2fixed (inp_float[i], int_bits, frac_bits)
116 |     inp2[i] = float2fixed2 (inp_float[i], int_bits, frac_bits)
117 | print ('fixed: ', inp1)
118 | print ('fixed2: ', inp2)
119 | 
120 | inp_f1 = [0.0]*num
121 | inp_f2 = [0.0]*num
122 | for i in range (num):
123 |     inp_f1[i] = fixed2float (inp1[i], int_bits, frac_bits)
124 |     inp_f2[i] = fixed2float2 (inp2[i], int_bits, frac_bits)
125 | print ('back2float: ', inp_f1)
126 | print ('back2float2: ', inp_f2)'''
127 | 


--------------------------------------------------------------------------------
/inst-sim/instrn_proto.py:
--------------------------------------------------------------------------------
  1 | # Define the instruction prototypes which will be used by the generate_instrn.py file
  2 | import sys
  3 | 
  4 | import numpy as np
  5 | from data_convert import *
  6 | import config as cfg
  7 | 
  8 | from data_convert import *
  9 | 
 10 | # List of supported opcodes/aluops for IMA - cp will copy data (from data memory of ima to xbarInmem)
 11 | op_list = ['ld', 'cp', 'st', 'set', 'nop', 'alu', 'alui', 'mvm', 'vvo', 'hlt', 'jmp', 'beq', 'alu_int', 'crs']
 12 | aluop_list = ['add', 'sub', 'sna', 'mul', 'sigmoid'] # sna is also used by mvm isntruction
 13 | 
 14 | # Instruction format for IMA
 15 | dummy_instrn = {'opcode' : op_list[0],      # instrn op
 16 |                'aluop'  : aluop_list[0],   # alu function
 17 |                'd1'     : 0,               # destination
 18 |                'r1'     : 0,               # operand1 (stride for mvm)
 19 |                'r2'     : 0,               # operand2
 20 |                'r3'     : 0,               # operand3 (shift)
 21 |                'vec'    : 0,               # vector width
 22 |                'imm'    : 0,               # immediate (scalar) data
 23 |                'xb_nma' : 0 }              # xbar negative-mask, a xbar evaluates if neg-mask = 1
 24 | 
 25 | 
 26 | def i_load (d1, r1, load_width = 1, vec = 1):
 27 |     assert (load_width <= (cfg.edram_buswidth/cfg.data_width)), 'Load width must be smaller than \
 28 |     edram_buswidth/data_width'
 29 |     i_temp = dummy_instrn.copy ()
 30 |     i_temp['opcode'] = 'ld'
 31 |     i_temp['d1'] = d1 # rf addr
 32 |     i_temp['r1'] = r1 # mem addr
 33 |     i_temp['imm'] = load_width
 34 |     i_temp['vec'] = vec
 35 |     return i_temp
 36 | 
 37 | # generate store protoyype - store data from (datamem/sboutmem) to edram
 38 | def i_store (d1, r1, counter = 1, store_width = 1, vec = 1):
 39 |     assert (store_width <= (cfg.edram_buswidth/cfg.data_width)), 'Load width must be smaller than \
 40 |     edram_buswidth/data_width'
 41 |     i_temp = dummy_instrn.copy ()
 42 |     i_temp['opcode'] = 'st'
 43 |     i_temp['d1'] = d1 # mem addr
 44 |     i_temp['r1'] = r1 # rf addr
 45 |     i_temp['r2'] = counter
 46 |     i_temp['imm'] = store_width
 47 |     i_temp['vec'] = vec
 48 |     return i_temp
 49 | 
 50 | # generate cp prototype:
 51 | # src_type = 0: copy data from (datamem/xbInmem) to (datmem/xbInmem)
 52 | # src_type = 1: copy data from (datamem/xbOutmem) to (datmem/xbInmem)
 53 | def i_copy (d1, r1, vec = 1, src_type = 0):
 54 |     i_temp = dummy_instrn.copy ()
 55 |     i_temp['opcode'] = 'cp'
 56 |     i_temp['d1'] = d1
 57 |     i_temp['r1'] = r1
 58 |     i_temp['vec'] = vec
 59 |     return i_temp
 60 | 
 61 | # generate set prototype - set a particular reg value (datamem/xbInmem) to a scalar
 62 | def i_set (d1, imm, vec = 1):
 63 |     i_temp = dummy_instrn.copy ()
 64 |     i_temp['opcode'] = 'set'
 65 |     i_temp['d1'] = d1
 66 |     i_temp['imm'] = imm if (type(imm) == str) else int2bin(imm, cfg.addr_width)
 67 |     i_temp['vec'] = vec
 68 |     return i_temp
 69 | 
 70 | # generate alu prototype - arithmrtic, logical, non-linear opearrions
 71 | def i_alu (aluop, d1, r1, r2=0, imm=0, vec = 1):
 72 |     i_temp = dummy_instrn.copy()
 73 |     i_temp['opcode'] = 'alu'
 74 |     i_temp['aluop'] = aluop
 75 |     i_temp['d1'] = d1
 76 |     i_temp['r1'] = r1
 77 |     i_temp['r2'] = r2
 78 |     i_temp['imm'] = imm # will be used in lsh
 79 |     i_temp['vec'] = vec
 80 |     return i_temp
 81 | 
 82 | # generate alui prototype - arithmrtic, logical, non-linear opearrions with scalars
 83 | def i_alui (aluop, d1, r1, imm, vec = 1):
 84 |     i_temp = dummy_instrn.copy()
 85 |     i_temp['opcode'] = 'alui'
 86 |     i_temp['aluop'] = aluop
 87 |     i_temp['d1'] = d1
 88 |     i_temp['r1'] = r1
 89 |     i_temp['imm'] = float2fixed (imm, cfg.int_bits, cfg.frac_bits)
 90 |     i_temp['vec'] = vec
 91 |     return i_temp
 92 | 
 93 | # generate mvm prototype - xbar isntrn
 94 | def i_mvm (xb_nma = cfg.num_matrix*'0', r1=0, r2=0): # r1 is displacement, r2 is length of a continuum of data
 95 |     xb_nma_str = xb_nma[0]
 96 |     #xb_nma_str = xb_nma
 97 |     xb_nma_list = [xb_nma_str[i]+'00' for i in range(len(xb_nma_str))] # split into list of 3-bit masks
 98 |     assert (len(xb_nma_list) == cfg.num_matrix) # each matrix in a core has a 3-bit mask
 99 |     i_temp = dummy_instrn.copy()
100 |     i_temp['opcode'] = 'mvm'
101 |     i_temp['r1'] = r1
102 |     i_temp['r2'] = r2
103 |     i_temp['xb_nma'] = xb_nma_list
104 |     return i_temp
105 |     
106 | ## Added for COMPILER - i_train, mask as integer
107 | def i_train (xb_nma = cfg.num_matrix*['000'], r1=0, r2=0): # r1 is displacement, r2 is length of a continuum of data
108 |     xb_nma_str = xb_nma[0]
109 |     xb_nma_list = [xb_nma_str[i*3:(i+1)*3] for i in range(len(xb_nma_str)/3)] # split into list of 3-bit masks
110 |     assert (len(xb_nma_list) == cfg.num_matrix) # each matrix in a core has a 3-bit mask
111 |     i_temp = dummy_instrn.copy()
112 |     i_temp['opcode'] = 'mvm'
113 |     i_temp['r1'] = r1
114 |     i_temp['r2'] = r2
115 |     i_temp['xb_nma'] = xb_nma_list
116 |     return i_temp
117 | 
118 | # generate crs instruction
119 | # for each matrix, one bit to specify whether to do crs or not
120 | def i_crs (xb_nma = cfg.num_matrix*['0']):
121 |     assert (len(xb_nma) == cfg.num_matrix) # each matrix in a core has a 1-bit mask
122 |     i_temp = dummy_instrn.copy()
123 |     i_temp['opcode'] = 'crs'
124 |     i_temp['xb_nma'] = xb_nma
125 |     return i_temp
126 | 
127 | # generate halt prototype
128 | def i_hlt ():
129 |     i_temp = dummy_instrn.copy()
130 |     i_temp['opcode'] = 'hlt'
131 |     return i_temp
132 | 
133 | # generate jmp prototype
134 | def i_jmp (imm): # imm is the jump target
135 |     i_temp = dummy_instrn.copy()
136 |     i_temp['opcode'] = 'jmp'
137 |     i_temp['imm'] = imm
138 |     return i_temp
139 | 
140 | # generate beq prototype
141 | def i_beq (r1, r2, imm): # imm is the jump target
142 |     i_temp = dummy_instrn.copy()
143 |     i_temp['opcode'] = 'beq'
144 |     i_temp['r1'] = r1
145 |     i_temp['r2'] = r2
146 |     i_temp['imm'] = imm
147 |     return i_temp
148 | 
149 | # generate alu_int prototype
150 | def i_alu_int (aluop, d1, r1, r2):
151 |     i_temp = dummy_instrn.copy()
152 |     i_temp['opcode'] = 'alu_int'
153 |     i_temp['aluop'] = aluop
154 |     i_temp['d1'] = d1
155 |     i_temp['r1'] = r1
156 |     i_temp['r2'] = r2
157 |     return i_temp
158 | 
159 | 


--------------------------------------------------------------------------------
/inst-sim/result/mlp/result.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | ====CPI STACK====
 3 | total cyc: 10718
 4 | Copy : 2284 / 8818
 5 | Load : 1537 / 8818
 6 | Store : 322 / 8818
 7 | Send : 603 / 8818
 8 | Receive : 603 / 8818
 9 | MVM : 322 / 8818
10 | ALU : 1288 / 8818
11 | ALUI : 0 / 8818
12 | Set : 1859 / 8818
13 | WriteInput : 0 / 8818
14 | ReadOutput : 0 / 8818
15 | 
16 | ====LINK====
17 | busiest link: -0.0001220703125KB
18 | 
19 | ====MAX MEMORY SIZE====
20 | tile id: 0			physical size: 0			virtual size: 0
21 | tile id: 1			physical size: 0			virtual size: 128
22 | tile id: 2			physical size: 896			virtual size: 2560
23 | tile id: 3			physical size: 896			virtual size: 2560
24 | tile id: 4			physical size: 1408			virtual size: 4096
25 | tile id: 5			physical size: 1920			virtual size: 3584
26 | tile id: 6			physical size: 1408			virtual size: 3584
27 | tile id: 7			physical size: 1408			virtual size: 3584
28 | tile id: 8			physical size: 1408			virtual size: 3584
29 | tile id: 9			physical size: 1408			virtual size: 3584
30 | tile id: 10			physical size: 1408			virtual size: 3584
31 | tile id: 11			physical size: 896			virtual size: 3584
32 | tile id: 12			physical size: 1280			virtual size: 5632
33 | tile id: 13			physical size: 2048			virtual size: 4736
34 | tile id: 14			physical size: 1152			virtual size: 4736
35 | tile id: 15			physical size: 2048			virtual size: 4736
36 | tile id: 16			physical size: 1408			virtual size: 4736
37 | tile id: 17			physical size: 2048			virtual size: 4736
38 | tile id: 18			physical size: 1280			virtual size: 4736
39 | tile id: 19			physical size: 2048			virtual size: 4736
40 | tile id: 20			physical size: 1408			virtual size: 4736
41 | tile id: 21			physical size: 2048			virtual size: 4736
42 | tile id: 22			physical size: 1152			virtual size: 4736
43 | tile id: 23			physical size: 2048			virtual size: 4736
44 | tile id: 24			physical size: 2176			virtual size: 6784
45 | tile id: 25			physical size: 1920			virtual size: 4736
46 | tile id: 26			physical size: 1664			virtual size: 4736
47 | tile id: 27			physical size: 1920			virtual size: 4736
48 | tile id: 28			physical size: 3328			virtual size: 5248
49 | 


--------------------------------------------------------------------------------
/inst-sim/tile_instrn_proto.py:
--------------------------------------------------------------------------------
 1 | # Define the instruction prototypes which will be used by the generate_instrn.py file
 2 | import sys
 3 | 
 4 | import numpy as np
 5 | import config as cfg
 6 | 
 7 | # List of supported opcodes for tile
 8 | op_list_tile = ['send', 'receive', 'compute', 'halt']
 9 | 
10 | # Instruction format for Tile
11 | dummy_instrn_tile = {'opcode' : op_list_tile[0],
12 |                      'mem_addr': 0,     # send/receive - edram_addr
13 |                      'r1': 0,     # send-send_width, receive-receive_width
14 |                      'r2': 0,     # send-target_addr, receive-counter
15 |                      'vtile_id': 0, # send/receive-neuron_id
16 |                      'ima_nma': '',      # compute - a bit for each ima
17 |                      'vec': 0} # vector width
18 | 
19 | # Define instruction prototypes
20 | # generate receive prototype
21 | def i_receive (mem_addr, vtile_id, receive_width, counter, vec = 1):
22 |     i_temp = dummy_instrn_tile.copy()
23 |     i_temp['opcode'] = 'receive'
24 |     i_temp['mem_addr'] = mem_addr
25 |     i_temp['vtile_id'] =  vtile_id
26 |     i_temp['r1'] = receive_width
27 |     i_temp['r2'] = counter
28 |     i_temp['vec'] = vec
29 |     return i_temp
30 | 
31 | # generate send prototype
32 | def i_send (mem_addr, vtile_id, send_width, target_addr, vec = 1):
33 |     i_temp = dummy_instrn_tile.copy()
34 |     i_temp['opcode'] = 'send'
35 |     i_temp['mem_addr'] = mem_addr
36 |     i_temp['vtile_id'] = vtile_id
37 |     i_temp['r1'] = send_width
38 |     i_temp['r2'] = target_addr
39 |     i_temp['vec'] = vec
40 |     return i_temp
41 | 
42 | # generate halt prototype
43 | def i_halt ():
44 |     i_temp = dummy_instrn_tile.copy()
45 |     i_temp['opcode'] = 'halt'
46 |     return i_temp
47 | 
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/inst-sim/workload/LSTM2048.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PIM-SW/PIM-Simulator/40699dc06e6154e7c1a794631eb66b6e24737e08/inst-sim/workload/LSTM2048.tar.gz


--------------------------------------------------------------------------------
/inst-sim/workload/bigLSTM.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PIM-SW/PIM-Simulator/40699dc06e6154e7c1a794631eb66b6e24737e08/inst-sim/workload/bigLSTM.tar.gz


--------------------------------------------------------------------------------
/inst-sim/workload/mlp.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PIM-SW/PIM-Simulator/40699dc06e6154e7c1a794631eb66b6e24737e08/inst-sim/workload/mlp.tar.gz


--------------------------------------------------------------------------------
/inst-sim/workload/nmt.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PIM-SW/PIM-Simulator/40699dc06e6154e7c1a794631eb66b6e24737e08/inst-sim/workload/nmt.tar.gz


--------------------------------------------------------------------------------
/inst-sim/workload/vgg16_small.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PIM-SW/PIM-Simulator/40699dc06e6154e7c1a794631eb66b6e24737e08/inst-sim/workload/vgg16_small.tar.gz


--------------------------------------------------------------------------------
/inst-sim/workload/vgg19_small.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PIM-SW/PIM-Simulator/40699dc06e6154e7c1a794631eb66b6e24737e08/inst-sim/workload/vgg19_small.tar.gz


--------------------------------------------------------------------------------