├── .DS_Store
├── LICENSE
├── README.md
├── SATA_mem
├── m_sram.cfg
├── m_sram.cfg.out
├── mem_configs.py
├── s_sram.cfg
├── s_sram.cfg.out
├── u_sram.cfg
├── u_sram.cfg.out
├── w_sram.cfg
├── w_sram.cfg.out
├── w_sram_results.txt
├── z_sram.cfg
└── z_sram.cfg.out
├── cacti
├── 2DDRAM_Samsung2GbDDR2.cfg
├── 2DDRAM_micron1Gb.cfg
├── 2DDRAM_micron1Gb.cfg.out
├── 3DDRAM_Samsung3D8Gb_extened.cfg
├── README
├── TSV.cc
├── TSV.h
├── Ucache.cc
├── Ucache.h
├── arbiter.cc
├── arbiter.h
├── area.cc
├── area.h
├── bank.cc
├── bank.h
├── basic_circuit.cc
├── basic_circuit.h
├── cache.cfg
├── cacti
├── cacti.i
├── cacti.mk
├── cacti_interface.cc
├── cacti_interface.h
├── component.cc
├── component.h
├── const.h
├── contention.dat
├── crossbar.cc
├── crossbar.h
├── ddr3.cfg
├── decoder.cc
├── decoder.h
├── dram-config
│ └── dram-config.cfg
├── dram.cfg
├── dram_read_energy_results.txt
├── dram_results
│ └── ddr3-dram.out
├── extio.cc
├── extio.h
├── extio_technology.cc
├── extio_technology.h
├── htree2.cc
├── htree2.h
├── io.cc
├── io.h
├── lpddr.cfg
├── main.cc
├── makefile
├── mat.cc
├── mat.h
├── memcad.cc
├── memcad.h
├── memcad_parameters.cc
├── memcad_parameters.h
├── memorybus.cc
├── memorybus.h
├── nuca.cc
├── nuca.h
├── obj_dbg
│ ├── TSV.o
│ ├── Ucache.o
│ ├── arbiter.o
│ ├── area.o
│ ├── bank.o
│ ├── basic_circuit.o
│ ├── cacti
│ ├── cacti_interface.o
│ ├── component.o
│ ├── crossbar.o
│ ├── decoder.o
│ ├── extio.o
│ ├── extio_technology.o
│ ├── htree2.o
│ ├── io.o
│ ├── main.o
│ ├── mat.o
│ ├── memcad.o
│ ├── memcad_parameters.o
│ ├── memorybus.o
│ ├── nuca.o
│ ├── parameter.o
│ ├── powergating.o
│ ├── router.o
│ ├── subarray.o
│ ├── technology.o
│ ├── uca.o
│ └── wire.o
├── parameter.cc
├── parameter.h
├── powergating.cc
├── powergating.h
├── regression.test
├── router.cc
├── router.h
├── sample_config_files
│ ├── ddr3_cache.cfg
│ ├── diff_ddr3_cache.cfg
│ ├── lpddr3_cache.cfg
│ └── wideio_cache.cfg
├── sram-config
│ └── sram-config.cfg
├── subarray.cc
├── subarray.h
├── tech_params
│ ├── 16nm.dat
│ ├── 180nm-old.dat
│ ├── 180nm.dat
│ ├── 22nm.dat
│ ├── 32nm.dat
│ ├── 45nm.dat
│ ├── 65nm-old.dat
│ ├── 65nm.dat
│ ├── 90nm-old.dat
│ └── 90nm.dat
├── technology.cc
├── uca.cc
├── uca.h
├── version_cacti.h
├── wire.cc
└── wire.h
├── config
├── sata_config.yaml
└── vgg5_cifar10.yaml
├── har_configs
├── dcl_har.yaml
├── dcl_shar.yaml
├── fcn_har.yaml
├── fcn_shar.yaml
├── sata_ann_watch_config.yaml
└── sata_watch_config.yaml
├── inference-energy-cal
├── .DS_Store
├── __pycache__
│ └── hw_kernels.cpython-39.pyc
├── comp-utils.py
├── cycle-utils.py
├── energy-cal.py
├── hw_kernels.py
├── mem-utils.py
├── related-work-estimate.py
├── results
│ ├── bntt
│ │ ├── comp-stat.yaml
│ │ ├── computation-energy.yaml
│ │ ├── cycle-stat.yaml
│ │ ├── mem-stat.yaml
│ │ └── memory-energy.yaml
│ ├── direct
│ │ ├── comp-stat.yaml
│ │ ├── computation-energy.yaml
│ │ ├── cycle-stat.yaml
│ │ ├── mem-stat.yaml
│ │ └── memory-energy.yaml
│ ├── tdbn
│ │ ├── comp-stat.yaml
│ │ ├── computation-energy.yaml
│ │ ├── cycle-stat.yaml
│ │ ├── mem-stat.yaml
│ │ └── memory-energy.yaml
│ └── tssl
│ │ ├── comp-stat.yaml
│ │ ├── computation-energy.yaml
│ │ ├── cycle-stat.yaml
│ │ ├── mem-stat.yaml
│ │ └── memory-energy.yaml
├── run.py
├── sata-config.yaml
├── workload.yaml
└── workloads
│ ├── workload._bntt.yaml
│ ├── workload_direct.yaml
│ ├── workload_tdbn.yaml
│ └── workload_tssl.yaml
└── training_energy_cal
├── energy_cal.py
├── energy_configs.py
├── get_arch_energy.py
├── get_workload.py
└── get_workload_new.py
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/.DS_Store
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2022 Ruokai Yin
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | For most updated version, please go to the following link: https://github.com/RuokaiYin/SATA_Sim
2 | Normally the updates will be reflected in this repo in 1 or 2 days.
3 |
4 | # SATA_Sim
5 |
6 | ## What's New:
7 |
8 | **2023-Sep-6:**
9 |
10 | A new version of SATA_Sim that supports cycle-accurate energy simulation for SNN inference is online! A more detailed READMe file will be added soon.
11 |
12 | The new version of SATA_Sim takes into consideration both dynamic energy and leakage energy while counting all the data movement energy.
13 |
14 | The new version of SATA_Sim even lets you modify the hardware architecture if required.
15 |
16 | We use cacti-7.0 and scale-sim-v2 as the backbone to simulate the memory component and to get the cycle statics.
17 |
18 | For a quick start:
19 |
20 | 1. Clone the project, download all the dependencies, and go to the inference-energy-cal folder.
21 | 2. Modify the workload.yaml for your targeting workload, and modify the sata-config.yaml if any hardware architecture level changes are needed.
22 | 3. Simply run 'python3 run.py' and find the computation and memory energy results in the results folder. Some of the other related statistics are also provided in the folder.
23 | 4. The simulation might be running slow for large workloads.
24 |
25 | Please do leave a message if any new features are needed. Happy running simulations on SNNs! Go Spike!
26 |
27 |
28 | **2023-Mar-15:**
29 |
30 | SATA_Sim now supports the different operand sizes (weights and membrane potentials) for the forward-stage energy estimation.
31 |
32 | One useful case is to use the tool to estimate the energy cost improvement of the quantized SNN models (both weight and membrane potential quantization is supported).
33 |
34 | To check the energy cost for different operand sizes, simply change the 'fwd_b' variable in the energy_cal.py to the target operand size. Please note that we assume the weights and membrane potentials are always quantized to the same bit-width.
35 |
36 |
37 |
38 | ## Overview
39 |
40 | SATA_Sim is an energy estimation framework for Backpropagation-Through-Time (BPTT) based Spiking Neural Networks (SNNs) training with sparsity awareness.
41 |
42 | ## Prerequisite
43 |
44 | Python (Version >= 3.6)
45 |
46 | ## Citing
47 | If you find SATA_Sim is useful for your research, please use the following bibtex to cite us,
48 |
49 | ```
50 | @article{yin2022sata,
51 | title={Sata: Sparsity-aware training accelerator for spiking neural networks},
52 | author={Yin, Ruokai and Moitra, Abhishek and Bhattacharjee, Abhiroop and Kim, Youngeun and Panda, Priyadarshini},
53 | journal={IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems},
54 | year={2022},
55 | publisher={IEEE}
56 | }
57 | ```
58 |
59 | ## Simple Usage Example
60 |
Please first provide the shape information of the network by writing a yaml file like the vgg5_cifar10.yaml.
61 | Then please specify the architecture like sata_config.yaml. You can directly use the sata_config.yaml to use the architecture of SATA.
62 | Then please specify the dynamic energy of the computation components in energy_configs.py. You can directly use the energy_configs.py for SATA.
63 | Then please specify the dynamic energy of the computation components in energy_configs.py. You can directly use the energy_configs.py for SATA.
64 | Please also specify the dynamic energy of memory components in mem_configs.py. This information can be obtained by using CACTI.
65 | Then please specify the timesteps, all three kinds of sparsity, bitwidth of parameters other than spikes in energy_cal.py.
66 | Finally, run the energy_cal.py, and you will get the energy estimation that is normalized with the energy of a single MAC operation in ANNs.
67 |
68 | ## Contribution
69 | Active contributor:
70 | 1. [Ruokai Yin](https://ruokaiyin.github.io/)
71 |
72 | Please contact me (ruokai.yin@yale.edu) if you are interested in contributing to this project!
73 |
74 | ## TODO:
75 |
76 | A more detailed READMe file will be added for using the new version of SATA_Sim.
77 |
78 | The estimation of backward and weight update computation will be added. :white_check_mark:
79 |
80 | The estimation of memory access energy of forward, backward, and weight update stages will be added. :white_check_mark:
81 |
82 | The instructions for using the codes will be added. :white_check_mark:
83 |
84 | Supporting the configurable bitwidth for internal fwd datapaths. :white_check_mark:
85 |
86 | Supporting the configurable bitwidth for internal bwd & wup datapaths.
87 |
88 | Supporting the other dataflow mappings other than the one used in SATA. :white_check_mark:
89 |
90 | Supporting the estimation mode that considers the leak energy. :white_check_mark:
91 |
--------------------------------------------------------------------------------
/SATA_mem/m_sram.cfg.out:
--------------------------------------------------------------------------------
1 | Tech node (nm), Capacity (bytes), Number of banks, Associativity, Output width (bits), Access time (ns), Random cycle time (ns), Dynamic search energy (nJ), Dynamic read energy (nJ), Dynamic write energy (nJ), Standby leakage per bank(mW), Area (mm2), Ndwl, Ndbl, Nspd, Ndcm, Ndsam_level_1, Ndsam_level_2, Data arrary area efficiency %, Ntwl, Ntbl, Ntspd, Ntcm, Ntsam_level_1, Ntsam_level_2, Tag arrary area efficiency %,
2 | 32, 8192, 16, 1, 8, 0.394476, 0.236779, N/A, 0.00117638, 0.00112894, 0.142009, 0.0324569, 2, 2, 8, 16, 1, 1, 60.7883, N/A, N/A, N/A, N/A, N/A, N/A, N/A,
3 |
--------------------------------------------------------------------------------
/SATA_mem/mem_configs.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 |
4 | def get_args():
5 |
6 | parser = argparse.ArgumentParser("SATA_MEM_Energy_Component")
7 |
8 | # parser.add_argument('--ssram', type=float, default=2.750, help='dynamic energy for isram')
9 | # parser.add_argument('--wsram', type=float, default=6.529, help='dynamic energy for wsram')
10 | # parser.add_argument('--usram', type=float, default=0.239, help='dynamic energy for osram')
11 | # parser.add_argument('--dusram', type=float, default=0.239, help='dynamic energy for osram')
12 | # parser.add_argument('--zsram', type=float, default=0.239, help='dynamic energy for osram')
13 | # parser.add_argument('--dzsram', type=float, default=0.239, help='dynamic energy for osram')
14 | # parser.add_argument('--msram', type=float, default=1.176, help='dynamic energy for osram')
15 | parser.add_argument('--dram', type=float, default=55.58, help='dynamic energy for dram')
16 | parser.add_argument('--sram', type=float, default=1.95, help='dynamic energy for sram')
17 | parser.add_argument('--spad', type=float, default=0.2779, help='dynamic energy for spad')
18 | # parser.add_argument('--ispad', type=float, default=0.239, help='dynamic energy for isram')
19 | # parser.add_argument('--wspad', type=float, default=0.2152, help='dynamic energy for wsram')
20 |
21 |
22 |
23 | args = parser.parse_args()
24 | print(args)
25 |
26 | return args
--------------------------------------------------------------------------------
/SATA_mem/s_sram.cfg.out:
--------------------------------------------------------------------------------
1 | Tech node (nm), Capacity (bytes), Number of banks, Associativity, Output width (bits), Access time (ns), Random cycle time (ns), Dynamic search energy (nJ), Dynamic read energy (nJ), Dynamic write energy (nJ), Standby leakage per bank(mW), Area (mm2), Ndwl, Ndbl, Nspd, Ndcm, Ndsam_level_1, Ndsam_level_2, Data arrary area efficiency %, Ntwl, Ntbl, Ntspd, Ntcm, Ntsam_level_1, Ntsam_level_2, Tag arrary area efficiency %,
2 | 32, 32768, 16, 1, 8, 0.563493, 0.297886, N/A, 0.00274961, 0.00250566, 0.500408, 0.122022, 2, 2, 16, 32, 1, 1, 64.6768, N/A, N/A, N/A, N/A, N/A, N/A, N/A,
3 |
--------------------------------------------------------------------------------
/SATA_mem/u_sram.cfg.out:
--------------------------------------------------------------------------------
1 | Tech node (nm), Capacity (bytes), Number of banks, Associativity, Output width (bits), Access time (ns), Random cycle time (ns), Dynamic search energy (nJ), Dynamic read energy (nJ), Dynamic write energy (nJ), Standby leakage per bank(mW), Area (mm2), Ndwl, Ndbl, Nspd, Ndcm, Ndsam_level_1, Ndsam_level_2, Data arrary area efficiency %, Ntwl, Ntbl, Ntspd, Ntcm, Ntsam_level_1, Ntsam_level_2, Tag arrary area efficiency %,
2 | 32, 262144, 16, 1, 8, 1.18734, 0.451046, N/A, 0.00899083, 0.00788884, 3.15182, 0.871484, 2, 4, 32, 16, 4, 1, 72.4466, N/A, N/A, N/A, N/A, N/A, N/A, N/A,
3 |
--------------------------------------------------------------------------------
/SATA_mem/w_sram.cfg.out:
--------------------------------------------------------------------------------
1 | Tech node (nm), Capacity (bytes), Number of banks, Associativity, Output width (bits), Access time (ns), Random cycle time (ns), Dynamic search energy (nJ), Dynamic read energy (nJ), Dynamic write energy (nJ), Standby leakage per bank(mW), Area (mm2), Ndwl, Ndbl, Nspd, Ndcm, Ndsam_level_1, Ndsam_level_2, Data arrary area efficiency %, Ntwl, Ntbl, Ntspd, Ntcm, Ntsam_level_1, Ntsam_level_2, Tag arrary area efficiency %,
2 | 32, 147456, 16, 1, 8, 0.935514, 0.362277, N/A, 0.0065292, 0.00589278, 1.83923, 0.523663, 2, 4, 32, 16, 4, 1, 67.8185, N/A, N/A, N/A, N/A, N/A, N/A, N/A,
3 | 32, 147456, 8, 1, 8, 0.904971, 0.50647, N/A, 0.00676559, 0.00542787, 3.06862, 0.498412, 2, 4, 64, 32, 4, 1, 71.2544, N/A, N/A, N/A, N/A, N/A, N/A, N/A,
4 |
--------------------------------------------------------------------------------
/SATA_mem/w_sram_results.txt:
--------------------------------------------------------------------------------
1 | Cache size : 147456
2 | Block size : 1
3 | Associativity : 1
4 | Read only ports : 1
5 | Write only ports : 1
6 | Read write ports : 0
7 | Single ended read ports : 0
8 | Cache banks (UCA) : 16
9 | Technology : 0.032
10 | Temperature : 360
11 | Tag size : 42
12 | array type : Scratch RAM
13 | Model as memory : 0
14 | Model as 3D memory : 0
15 | Access mode : 0
16 | Data array cell type : 2
17 | Data array peripheral type : 2
18 | Tag array cell type : 2
19 | Tag array peripheral type : 2
20 | Optimization target : 2
21 | Design objective (UCA wt) : 100 20 20 10 10
22 | Design objective (UCA dev) : 10 1000 1000 1000 1000
23 | Cache model : 0
24 | Nuca bank : 0
25 | Wire inside mat : 1
26 | Wire outside mat : 1
27 | Interconnect projection : 1
28 | Wire signaling : 0
29 | Print level : 0
30 | ECC overhead : 0
31 | Page size : 8192
32 | Burst length : 8
33 | Internal prefetch width : 8
34 | Force cache config : 0
35 | Subarray Driver direction : 1
36 | iostate : WRITE
37 | dram_ecc : NO_ECC
38 | io_type : DDR3
39 | dram_dimm : UDIMM
40 | IO Area (sq.mm) = inf
41 | IO Timing Margin (ps) = -14.1667
42 | IO Votlage Margin (V) = 0.155
43 | IO Dynamic Power (mW) = 1506.36 PHY Power (mW) = 232.752 PHY Wakeup Time (us) = 27.503
44 | IO Termination and Bias Power (mW) = 2505.96
45 |
46 | ---------- CACTI (version 7.0.3DD Prerelease of Aug, 2012), Uniform Cache Access SRAM Model ----------
47 |
48 | Cache Parameters:
49 | Total cache size (bytes): 147456
50 | Number of banks: 16
51 | Associativity: direct mapped
52 | Block size (bytes): 1
53 | Read/write Ports: 0
54 | Read ports: 1
55 | Write ports: 1
56 | Technology size (nm): 32
57 |
58 | Access time (ns): 0.935514
59 | Cycle time (ns): 0.362277
60 | Total dynamic read energy per access (nJ): 0.0065292
61 | Total dynamic write energy per access (nJ): 0.00589278
62 | Total leakage power of a bank (mW): 1.81852
63 | Total gate leakage power of a bank (mW): 0.0207095
64 | Cache height x width (mm): 0.853564 x 0.613501
65 |
66 | Best Ndwl : 2
67 | Best Ndbl : 4
68 | Best Nspd : 32
69 | Best Ndcm : 16
70 | Best Ndsam L1 : 4
71 | Best Ndsam L2 : 1
72 |
73 | Data array, H-tree wire type: Global wires with 5% delay penalty
74 | top 3 best memory configurations are:
75 | Memory cap: 80 GB num_bobs: 1 bw: 533 (MHz) cost: $731.2 energy: 32.6101 (nJ)
76 | {
77 | (0) BoB cap: 80 GB num_channels: 1 bw: 533 (MHz) cost: $731.2 energy: 32.6101 (nJ)
78 | ==============
79 | (0) cap: 80 GB bw: 533 (MHz) cost: $731.2 dpc: 3 energy: 32.6101 (nJ) DIMM: RDIMM low power: F [ 0(4GB) 0(8GB) 1(16GB) 2(32GB) 0(64GB) ]
80 | ==============
81 |
82 | }
83 |
84 | =============================================
--------------------------------------------------------------------------------
/SATA_mem/z_sram.cfg.out:
--------------------------------------------------------------------------------
1 | Tech node (nm), Capacity (bytes), Number of banks, Associativity, Output width (bits), Access time (ns), Random cycle time (ns), Dynamic search energy (nJ), Dynamic read energy (nJ), Dynamic write energy (nJ), Standby leakage per bank(mW), Area (mm2), Ndwl, Ndbl, Nspd, Ndcm, Ndsam_level_1, Ndsam_level_2, Data arrary area efficiency %, Ntwl, Ntbl, Ntspd, Ntcm, Ntsam_level_1, Ntsam_level_2, Tag arrary area efficiency %,
2 | 32, 65536, 16, 1, 8, 0.75728, 0.398596, N/A, 0.00393635, 0.00346167, 0.926132, 0.223867, 2, 2, 16, 32, 1, 1, 70.506, N/A, N/A, N/A, N/A, N/A, N/A, N/A,
3 | 32, 65536, 16, 1, 8, 0.75728, 0.398596, N/A, 0.00393635, 0.00346167, 0.926132, 0.223867, 2, 2, 16, 32, 1, 1, 70.506, N/A, N/A, N/A, N/A, N/A, N/A, N/A,
4 |
--------------------------------------------------------------------------------
/cacti/2DDRAM_micron1Gb.cfg.out:
--------------------------------------------------------------------------------
1 | Tech node (nm), Capacity (bytes), Number of banks, Associativity, Output width (bits), Access time (ns), Random cycle time (ns), Dynamic search energy (nJ), Dynamic read energy (nJ), Dynamic write energy (nJ), Standby leakage per bank(mW), Area (mm2), Ndwl, Ndbl, Nspd, Ndcm, Ndsam_level_1, Ndsam_level_2, Data arrary area efficiency %, Ntwl, Ntbl, Ntspd, Ntcm, Ntsam_level_1, Ntsam_level_2, Tag arrary area efficiency %,
2 | 78, 1, 8, 1, 64, 29.4658, 64.5874, N/A, 1.87983, 1.87986, 0.018124, 72.5955, 16, 16, 1, 1, 1, 1, 62.2986, N/A, N/A, N/A, N/A, N/A, N/A, N/A,
3 |
--------------------------------------------------------------------------------
/cacti/README:
--------------------------------------------------------------------------------
1 | -----------------------------------------------------------
2 |
3 |
4 | ____ __ ________ __
5 | /\ _`\ /\ \__ __ /\_____ \ /'__`\
6 | \ \ \/\_\ __ ___\ \ ,_\/\_\ \/___//'/'/\ \/\ \
7 | \ \ \/_/_ /'__`\ /'___\ \ \/\/\ \ /' /' \ \ \ \ \
8 | \ \ \L\ \/\ \L\.\_/\ \__/\ \ \_\ \ \ /' /'__ \ \ \_\ \
9 | \ \____/\ \__/.\_\ \____\\ \__\\ \_\ /\_/ /\_\ \ \____/
10 | \/___/ \/__/\/_/\/____/ \/__/ \/_/ \// \/_/ \/___/
11 |
12 |
13 | A Tool to Model Caches/Memories, 3D stacking, and off-chip IO
14 | -----------------------------------------------------------
15 |
16 | CACTI is an analytical tool that takes a set of cache/memory para-
17 | meters as input and calculates its access time, power, cycle
18 | time, and area.
19 | CACTI was originally developed by Dr. Jouppi and Dr. Wilton
20 | in 1993 and since then it has undergone six major
21 | revisions.
22 |
23 | List of features (version 1-7):
24 | ===============================
25 | The following is the list of features supported by the tool.
26 |
27 | * Power, delay, area, and cycle time model for
28 | direct mapped caches
29 | set-associative caches
30 | fully associative caches
31 | Embedded DRAM memories
32 | Commodity DRAM memories
33 |
34 | * Support for modeling multi-ported uniform cache access (UCA)
35 | and multi-banked, multi-ported non-uniform cache access (NUCA).
36 |
37 | * Leakage power calculation that also considers the operating
38 | temperature of the cache.
39 |
40 | * Router power model.
41 |
42 | * Interconnect model with different delay, power, and area
43 | properties including low-swing wire model.
44 |
45 | * An interface to perform trade-off analysis involving power, delay,
46 | area, and bandwidth.
47 |
48 | * All process specific values used by the tool are obtained
49 | from ITRS and currently, the tool supports 90nm, 65nm, 45nm,
50 | and 32nm technology nodes.
51 |
52 | * Chip IO model to calculate latency and energy for DDR bus. Users can model
53 | different loads (fan-outs) and evaluate the impact on frequency and energy.
54 | This model can be used to study LR-DIMMs, R-DIMMs, etc.
55 |
56 | Version 7.0 is derived from 6.5 and merged with CACTI 3D.
57 | It has many new additions apart from code refinements and
58 | bug fixes: new IO model, 3D memory model, and power gating models.
59 | Ref: CACTI-IO: CACTI With OFF-chip Power-Area-Timing Models
60 | MemCAD: An Interconnect Exploratory Tool for Innovative Memories Beyond DDR4
61 | CACTI-3DD: Architecture-level modeling for 3D die-stacked DRAM main memory
62 |
63 | --------------------------------------------------------------------------
64 | Version 6.5 has a new c++ code base and includes numerous bug fixes.
65 | CACTI 5.3 and 6.0 activate an entire row of mats to read/write a single
66 | block of data. This technique improves reliability at the cost of
67 | power. CACTI 6.5 activates minimum number of mats just enough to retrieve
68 | a block to minimize power.
69 |
70 | How to use the tool?
71 | ====================
72 | Prior versions of CACTI take input parameters such as cache
73 | size and technology node as a set of command line arguments.
74 | To avoid a long list of command line arguments,
75 | CACTI 6.5 & & let users specify their cache model in a more
76 | detailed manner by using a config file (cache.cfg).
77 |
78 | -> define the cache model using cache.cfg
79 | -> run the "cacti" binary <./cacti -infile cache.cfg>
80 |
81 | CACTI also provides a command line interface similar to earlier versions. The command line interface can be used as
82 |
83 | ./cacti cache_size line_size associativity rw_ports excl_read_ports excl_write_ports
84 | single_ended_read_ports search_ports banks tech_node output_width specific_tag tag_width
85 | access_mode cache main_mem obj_func_delay obj_func_dynamic_power obj_func_leakage_power
86 | obj_func_cycle_time obj_func_area dev_func_delay dev_func_dynamic_power dev_func_leakage_power
87 | dev_func_area dev_func_cycle_time ed_ed2_none temp wt data_arr_ram_cell_tech_flavor_in
88 | data_arr_peri_global_tech_flavor_in tag_arr_ram_cell_tech_flavor_in tag_arr_peri_global_tech_flavor_in
89 | interconnect_projection_type_in wire_inside_mat_type_in wire_outside_mat_type_in
90 | REPEATERS_IN_HTREE_SEGMENTS_in VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in
91 | BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in PAGE_SIZE_BITS_in BURST_LENGTH_in
92 | INTERNAL_PREFETCH_WIDTH_in force_wiretype wiretype force_config ndwl ndbl nspd ndcm
93 | ndsam1 ndsam2 ecc
94 |
95 | For complete documentation of the tool, please refer
96 | to the following publications and reports.
97 |
98 | CACTI-5.3 & 6 reports - Details on Meory/cache organizations and tradeoffs.
99 |
100 | Latency/Energy tradeoffs for large caches and NUCA design:
101 | "Optimizing NUCA Organizations and Wiring Alternatives for Large Caches With CACTI 6.0", that appears in MICRO 2007.
102 |
103 | Memory IO design: CACTI-IO: CACTI With OFF-chip Power-Area-Timing Models,
104 | MemCAD: An Interconnect Exploratory Tool for Innovative Memories Beyond DDR4
105 | CACTI-IO Technical Report - http://www.hpl.hp.com/techreports/2013/HPL-2013-79.pdf
106 |
107 | 3D model:
108 | CACTI-3DD: Architecture-level modeling for 3D die-stacked DRAM main memory
109 |
110 | We are still improving the tool and refining the code. If you
111 | have any comments, questions, or suggestions please write to
112 | us.
113 |
114 | Naveen Muralimanohar
115 | naveen.muralimanohar@hpe.com
116 |
117 | Ali Shafiee
118 | shafiee@cs.utah.edu
119 |
120 | Vaishnav Srinivas
121 | vaishnav.srinivas@gmail.com
122 |
123 |
--------------------------------------------------------------------------------
/cacti/TSV.h:
--------------------------------------------------------------------------------
1 | /*****************************************************************************
2 | * CACTI 7.0
3 | * SOFTWARE LICENSE AGREEMENT
4 | * Copyright 2015 Hewlett-Packard Development Company, L.P.
5 | * All Rights Reserved
6 | *
7 | * Redistribution and use in source and binary forms, with or without
8 | * modification, are permitted provided that the following conditions are
9 | * met: redistributions of source code must retain the above copyright
10 | * notice, this list of conditions and the following disclaimer;
11 | * redistributions in binary form must reproduce the above copyright
12 | * notice, this list of conditions and the following disclaimer in the
13 | * documentation and/or other materials provided with the distribution;
14 | * neither the name of the copyright holders nor the names of its
15 | * contributors may be used to endorse or promote products derived from
16 | * this software without specific prior written permission.
17 |
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
29 | *
30 | ***************************************************************************/
31 |
32 | #ifndef TSV_H_
33 | #define TSV_H_
34 |
35 | #include "basic_circuit.h"
36 | #include "component.h"
37 | #include "parameter.h"
38 | //#include "assert.h"
39 | #include "cacti_interface.h"
40 | #include "const.h"
41 | //#include "area.h"
42 | #include
43 | #include
44 | #include
45 |
46 |
47 | class TSV : public Component
48 | {
49 | public:
50 | TSV(enum TSV_type tsv_type,
51 | /*TechnologyParameter::*/DeviceType * dt = &(g_tp.peri_global));//Should change peri_global to TSV in technology.cc
52 | //TSV():len(20),rad(2.5),pitch(50){}
53 | ~TSV();
54 |
55 | double res;//TSV resistance
56 | double cap;//TSV capacitance
57 | double C_load_TSV;//The intrinsic load plus the load TSV is driving, needs changes?
58 | double min_area;
59 |
60 | //int num_IO;//number of I/O
61 | int num_gates;
62 | int num_gates_min;//Necessary?
63 | double w_TSV_n[MAX_NUMBER_GATES_STAGE];
64 | double w_TSV_p[MAX_NUMBER_GATES_STAGE];
65 |
66 | //double delay_TSV_path;//Delay of TSV path including the parasitics
67 |
68 | double is_dram;//two external arguments, defaulted to be false in constructor
69 | double is_wl_tr;
70 |
71 | void compute_buffer_stage();
72 | void compute_area();
73 | void compute_delay();
74 | void print_TSV();
75 |
76 | Area TSV_metal_area;
77 | Area Buffer_area;
78 |
79 | /*//Herigated from Component
80 | double delay;
81 | Area area;
82 | powerDef power, rt_power;
83 | double delay;
84 | double cycle_time;
85 |
86 | int logical_effort();*/
87 |
88 | private:
89 | double min_w_pmos;
90 | /*TechnologyParameter::*/DeviceType * deviceType;
91 | unsigned int tsv_type;
92 |
93 | };
94 |
95 |
96 | #endif /* TSV_H_ */
97 |
--------------------------------------------------------------------------------
/cacti/Ucache.h:
--------------------------------------------------------------------------------
1 | /*****************************************************************************
2 | * CACTI 7.0
3 | * SOFTWARE LICENSE AGREEMENT
4 | * Copyright 2015 Hewlett-Packard Development Company, L.P.
5 | * All Rights Reserved
6 | *
7 | * Redistribution and use in source and binary forms, with or without
8 | * modification, are permitted provided that the following conditions are
9 | * met: redistributions of source code must retain the above copyright
10 | * notice, this list of conditions and the following disclaimer;
11 | * redistributions in binary form must reproduce the above copyright
12 | * notice, this list of conditions and the following disclaimer in the
13 | * documentation and/or other materials provided with the distribution;
14 | * neither the name of the copyright holders nor the names of its
15 | * contributors may be used to endorse or promote products derived from
16 | * this software without specific prior written permission.
17 |
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
29 | *
30 | ***************************************************************************/
31 |
32 |
33 | #ifndef __UCACHE_H__
34 | #define __UCACHE_H__
35 |
36 | #include
37 | #include "area.h"
38 | #include "router.h"
39 | #include "nuca.h"
40 |
41 |
42 | class min_values_t
43 | {
44 | public:
45 | double min_delay;
46 | double min_dyn;
47 | double min_leakage;
48 | double min_area;
49 | double min_cyc;
50 |
51 | min_values_t() : min_delay(BIGNUM), min_dyn(BIGNUM), min_leakage(BIGNUM), min_area(BIGNUM), min_cyc(BIGNUM) { }
52 |
53 | void update_min_values(const min_values_t * val);
54 | void update_min_values(const uca_org_t & res);
55 | void update_min_values(const nuca_org_t * res);
56 | void update_min_values(const mem_array * res);
57 | };
58 |
59 |
60 |
61 | struct solution
62 | {
63 | int tag_array_index;
64 | int data_array_index;
65 | list::iterator tag_array_iter;
66 | list::iterator data_array_iter;
67 | double access_time;
68 | double cycle_time;
69 | double area;
70 | double efficiency;
71 | powerDef total_power;
72 | };
73 |
74 |
75 |
76 | bool calculate_time(
77 | bool is_tag,
78 | int pure_ram,
79 | bool pure_cam,
80 | double Nspd,
81 | unsigned int Ndwl,
82 | unsigned int Ndbl,
83 | unsigned int Ndcm,
84 | unsigned int Ndsam_lev_1,
85 | unsigned int Ndsam_lev_2,
86 | mem_array *ptr_array,
87 | int flag_results_populate,
88 | results_mem_array *ptr_results,
89 | uca_org_t *ptr_fin_res,
90 | Wire_type wtype, // merge from cacti-7 to cacti3d
91 | bool is_main_mem);
92 | void update(uca_org_t *fin_res);
93 |
94 | void solve(uca_org_t *fin_res);
95 | void init_tech_params(double tech, bool is_tag);
96 |
97 |
98 | struct calc_time_mt_wrapper_struct
99 | {
100 | uint32_t tid;
101 | bool is_tag;
102 | bool pure_ram;
103 | bool pure_cam;
104 | bool is_main_mem;
105 | double Nspd_min;
106 |
107 | min_values_t * data_res;
108 | min_values_t * tag_res;
109 |
110 | list data_arr;
111 | list tag_arr;
112 | };
113 |
114 | void *calc_time_mt_wrapper(void * void_obj);
115 |
116 | void print_g_tp();
117 |
118 | #endif
119 |
--------------------------------------------------------------------------------
/cacti/arbiter.cc:
--------------------------------------------------------------------------------
1 | /*****************************************************************************
2 | * CACTI 7.0
3 | * SOFTWARE LICENSE AGREEMENT
4 | * Copyright 2015 Hewlett-Packard Development Company, L.P.
5 | * All Rights Reserved
6 | *
7 | * Redistribution and use in source and binary forms, with or without
8 | * modification, are permitted provided that the following conditions are
9 | * met: redistributions of source code must retain the above copyright
10 | * notice, this list of conditions and the following disclaimer;
11 | * redistributions in binary form must reproduce the above copyright
12 | * notice, this list of conditions and the following disclaimer in the
13 | * documentation and/or other materials provided with the distribution;
14 | * neither the name of the copyright holders nor the names of its
15 | * contributors may be used to endorse or promote products derived from
16 | * this software without specific prior written permission.
17 |
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
29 | *
30 | ***************************************************************************/
31 |
32 | #include "arbiter.h"
33 |
34 | Arbiter::Arbiter(
35 | double n_req,
36 | double flit_size_,
37 | double output_len,
38 | /*TechnologyParameter::*/DeviceType *dt
39 | ):R(n_req), flit_size(flit_size_),
40 | o_len (output_len), deviceType(dt)
41 | {
42 | min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_;
43 | Vdd = dt->Vdd;
44 | double technology = g_ip->F_sz_um;
45 | NTn1 = 13.5*technology/2;
46 | PTn1 = 76*technology/2;
47 | NTn2 = 13.5*technology/2;
48 | PTn2 = 76*technology/2;
49 | NTi = 12.5*technology/2;
50 | PTi = 25*technology/2;
51 | NTtr = 10*technology/2; /*Transmission gate's nmos tr. length*/
52 | PTtr = 20*technology/2; /* pmos tr. length*/
53 | }
54 |
55 | Arbiter::~Arbiter(){}
56 |
57 | double
58 | Arbiter::arb_req() {
59 | double temp = ((R-1)*(2*gate_C(NTn1, 0)+gate_C(PTn1, 0)) + 2*gate_C(NTn2, 0) +
60 | gate_C(PTn2, 0) + gate_C(NTi, 0) + gate_C(PTi, 0) +
61 | drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def));
62 | return temp;
63 | }
64 |
65 | double
66 | Arbiter::arb_pri() {
67 | double temp = 2*(2*gate_C(NTn1, 0)+gate_C(PTn1, 0)); /* switching capacitance
68 | of flip-flop is ignored */
69 | return temp;
70 | }
71 |
72 |
73 | double
74 | Arbiter::arb_grant() {
75 | double temp = drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def)*2 + drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) + crossbar_ctrline();
76 | return temp;
77 | }
78 |
79 | double
80 | Arbiter::arb_int() {
81 | double temp = (drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def)*2 + drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) +
82 | 2*gate_C(NTn2, 0) + gate_C(PTn2, 0));
83 | return temp;
84 | }
85 |
86 | void
87 | Arbiter::compute_power() {
88 | power.readOp.dynamic = (R*arb_req()*Vdd*Vdd/2 + R*arb_pri()*Vdd*Vdd/2 +
89 | arb_grant()*Vdd*Vdd + arb_int()*0.5*Vdd*Vdd);
90 | double nor1_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTn1*2, min_w_pmos * PTn1*2, 2, nor);
91 | double nor2_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTn2*R, min_w_pmos * PTn2*R, 2, nor);
92 | double not_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTi, min_w_pmos * PTi, 1, inv);
93 | double nor1_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTn1*2, min_w_pmos * PTn1*2, 2, nor);
94 | double nor2_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTn2*R, min_w_pmos * PTn2*R, 2, nor);
95 | double not_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTi, min_w_pmos * PTi, 1, inv);
96 | power.readOp.leakage = (nor1_leak + nor2_leak + not_leak)*Vdd; //FIXME include priority table leakage
97 | power.readOp.gate_leakage = nor1_leak_gate*Vdd + nor2_leak_gate*Vdd + not_leak_gate*Vdd;
98 | }
99 |
100 | double //wire cap with triple spacing
101 | Arbiter::Cw3(double length) {
102 | Wire wc(g_ip->wt, length, 1, 3, 3);
103 | double temp = (wc.wire_cap(length,true));
104 | return temp;
105 | }
106 |
107 | double
108 | Arbiter::crossbar_ctrline() {
109 | double temp = (Cw3(o_len * 1e-6 /* m */) +
110 | drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def) +
111 | gate_C(NTi, 0) + gate_C(PTi, 0));
112 | return temp;
113 | }
114 |
115 | double
116 | Arbiter::transmission_buf_ctrcap() {
117 | double temp = gate_C(NTtr, 0)+gate_C(PTtr, 0);
118 | return temp;
119 | }
120 |
121 |
122 | void Arbiter::print_arbiter()
123 | {
124 | cout << "\nArbiter Stats (" << R << " input arbiter" << ")\n\n";
125 | cout << "Flit size : " << flit_size << " bits" << endl;
126 | cout << "Dynamic Power : " << power.readOp.dynamic*1e9 << " (nJ)" << endl;
127 | cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" << endl;
128 | }
129 |
130 |
131 |
--------------------------------------------------------------------------------
/cacti/arbiter.h:
--------------------------------------------------------------------------------
1 | /*****************************************************************************
2 | * CACTI 7.0
3 | * SOFTWARE LICENSE AGREEMENT
4 | * Copyright 2015 Hewlett-Packard Development Company, L.P.
5 | * All Rights Reserved
6 | *
7 | * Redistribution and use in source and binary forms, with or without
8 | * modification, are permitted provided that the following conditions are
9 | * met: redistributions of source code must retain the above copyright
10 | * notice, this list of conditions and the following disclaimer;
11 | * redistributions in binary form must reproduce the above copyright
12 | * notice, this list of conditions and the following disclaimer in the
13 | * documentation and/or other materials provided with the distribution;
14 | * neither the name of the copyright holders nor the names of its
15 | * contributors may be used to endorse or promote products derived from
16 | * this software without specific prior written permission.
17 |
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
29 | *
30 | ***************************************************************************/
31 |
32 | #ifndef __ARBITER__
33 | #define __ARBITER__
34 |
35 | #include
36 | #include
37 | #include "basic_circuit.h"
38 | #include "cacti_interface.h"
39 | #include "component.h"
40 | #include "parameter.h"
41 | #include "mat.h"
42 | #include "wire.h"
43 |
44 | class Arbiter : public Component
45 | {
46 | public:
47 | Arbiter(
48 | double Req,
49 | double flit_sz,
50 | double output_len,
51 | /*TechnologyParameter::*/DeviceType *dt = &(g_tp.peri_global));
52 | ~Arbiter();
53 |
54 | void print_arbiter();
55 | double arb_req();
56 | double arb_pri();
57 | double arb_grant();
58 | double arb_int();
59 | void compute_power();
60 | double Cw3(double len);
61 | double crossbar_ctrline();
62 | double transmission_buf_ctrcap();
63 |
64 |
65 |
66 | private:
67 | double NTn1, PTn1, NTn2, PTn2, R, PTi, NTi;
68 | double flit_size;
69 | double NTtr, PTtr;
70 | double o_len;
71 | /*TechnologyParameter::*/DeviceType *deviceType;
72 | double TriS1, TriS2;
73 | double min_w_pmos, Vdd;
74 |
75 | };
76 |
77 | #endif
78 |
--------------------------------------------------------------------------------
/cacti/area.cc:
--------------------------------------------------------------------------------
1 | /*****************************************************************************
2 | * CACTI 7.0
3 | * SOFTWARE LICENSE AGREEMENT
4 | * Copyright 2015 Hewlett-Packard Development Company, L.P.
5 | * All Rights Reserved
6 | *
7 | * Redistribution and use in source and binary forms, with or without
8 | * modification, are permitted provided that the following conditions are
9 | * met: redistributions of source code must retain the above copyright
10 | * notice, this list of conditions and the following disclaimer;
11 | * redistributions in binary form must reproduce the above copyright
12 | * notice, this list of conditions and the following disclaimer in the
13 | * documentation and/or other materials provided with the distribution;
14 | * neither the name of the copyright holders nor the names of its
15 | * contributors may be used to endorse or promote products derived from
16 | * this software without specific prior written permission.
17 |
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
29 | *
30 | ***************************************************************************/
31 |
32 |
33 |
34 | #include "area.h"
35 | #include "component.h"
36 | #include "decoder.h"
37 | #include "parameter.h"
38 | #include "basic_circuit.h"
39 | #include
40 | #include
41 | #include
42 |
43 | using namespace std;
44 |
45 |
46 |
47 |
--------------------------------------------------------------------------------
/cacti/area.h:
--------------------------------------------------------------------------------
1 | /*****************************************************************************
2 | * CACTI 7.0
3 | * SOFTWARE LICENSE AGREEMENT
4 | * Copyright 2015 Hewlett-Packard Development Company, L.P.
5 | * All Rights Reserved
6 | *
7 | * Redistribution and use in source and binary forms, with or without
8 | * modification, are permitted provided that the following conditions are
9 | * met: redistributions of source code must retain the above copyright
10 | * notice, this list of conditions and the following disclaimer;
11 | * redistributions in binary form must reproduce the above copyright
12 | * notice, this list of conditions and the following disclaimer in the
13 | * documentation and/or other materials provided with the distribution;
14 | * neither the name of the copyright holders nor the names of its
15 | * contributors may be used to endorse or promote products derived from
16 | * this software without specific prior written permission.
17 |
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
29 | *
30 | ***************************************************************************/
31 |
32 |
33 |
34 | #ifndef __AREA_H__
35 | #define __AREA_H__
36 |
37 | #include "cacti_interface.h"
38 | #include "basic_circuit.h"
39 |
40 | using namespace std;
41 |
42 | class Area
43 | {
44 | public:
45 | double w;
46 | double h;
47 |
48 | Area():w(0), h(0), area(0) { }
49 | double get_w() const { return w; }
50 | double get_h() const { return h; }
51 | double get_area() const
52 | {
53 | if (w == 0 && h == 0)
54 | {
55 | return area;
56 | }
57 | else
58 | {
59 | return w*h;
60 | }
61 | }
62 | void set_w(double w_) { w = w_; }
63 | void set_h(double h_) { h = h_; }
64 | void set_area(double a_) { area = a_; }
65 |
66 | private:
67 | double area;
68 | };
69 |
70 | #endif
71 |
72 |
--------------------------------------------------------------------------------
/cacti/bank.h:
--------------------------------------------------------------------------------
1 | /*****************************************************************************
2 | * CACTI 7.0
3 | * SOFTWARE LICENSE AGREEMENT
4 | * Copyright 2015 Hewlett-Packard Development Company, L.P.
5 | * All Rights Reserved
6 | *
7 | * Redistribution and use in source and binary forms, with or without
8 | * modification, are permitted provided that the following conditions are
9 | * met: redistributions of source code must retain the above copyright
10 | * notice, this list of conditions and the following disclaimer;
11 | * redistributions in binary form must reproduce the above copyright
12 | * notice, this list of conditions and the following disclaimer in the
13 | * documentation and/or other materials provided with the distribution;
14 | * neither the name of the copyright holders nor the names of its
15 | * contributors may be used to endorse or promote products derived from
16 | * this software without specific prior written permission.
17 |
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
29 | *
30 | ***************************************************************************/
31 |
32 |
33 |
34 | #ifndef __BANK_H__
35 | #define __BANK_H__
36 |
37 | #include "component.h"
38 | #include "decoder.h"
39 | #include "mat.h"
40 | #include "htree2.h"
41 |
42 |
43 | class Bank : public Component
44 | {
45 | public:
46 | Bank(const DynamicParameter & dyn_p);
47 | ~Bank();
48 | double compute_delays(double inrisetime); // return outrisetime
49 | void compute_power_energy();
50 |
51 | const DynamicParameter & dp;
52 | Mat mat;
53 | Htree2 *htree_in_add;
54 | Htree2 *htree_in_data;
55 | Htree2 *htree_out_data;
56 | Htree2 *htree_in_search;
57 | Htree2 *htree_out_search;
58 |
59 | int num_addr_b_mat;
60 | int num_mats_hor_dir;
61 | int num_mats_ver_dir;
62 |
63 | int num_addr_b_row_dec;
64 | int num_addr_b_routed_to_mat_for_act;
65 | int num_addr_b_routed_to_mat_for_rd_or_wr;
66 |
67 | double array_leakage;
68 | double wl_leakage;
69 | double cl_leakage;
70 | };
71 |
72 |
73 |
74 | #endif
75 |
--------------------------------------------------------------------------------
/cacti/cacti:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/cacti
--------------------------------------------------------------------------------
/cacti/cacti.i:
--------------------------------------------------------------------------------
1 | %module cacti
2 | %{
3 | /* Includes the header in the wrapper code */
4 | #include "cacti_interface.h"
5 | %}
6 |
7 | /* Parse the header file to generate wrappers */
8 | %include "cacti_interface.h"
--------------------------------------------------------------------------------
/cacti/cacti.mk:
--------------------------------------------------------------------------------
1 | TARGET = cacti
2 | SHELL = /bin/sh
3 | .PHONY: all depend clean
4 | .SUFFIXES: .cc .o
5 |
6 | ifndef NTHREADS
7 | NTHREADS = 8
8 | endif
9 |
10 |
11 | LIBS =
12 | INCS = -lm
13 |
14 | ifeq ($(TAG),dbg)
15 | DBG = -Wall
16 | OPT = -ggdb -g -O0 -DNTHREADS=1 -gstabs+
17 | else
18 | DBG =
19 | OPT = -g -msse2 -mfpmath=sse -DNTHREADS=$(NTHREADS)
20 | endif
21 |
22 | #CXXFLAGS = -Wall -Wno-unknown-pragmas -Winline $(DBG) $(OPT)
23 | CXXFLAGS = -Wno-unknown-pragmas $(DBG) $(OPT)
24 | CXX = g++ -m64
25 | CC = gcc -m64
26 |
27 | SRCS = area.cc bank.cc mat.cc main.cc Ucache.cc io.cc technology.cc basic_circuit.cc parameter.cc \
28 | decoder.cc component.cc uca.cc subarray.cc wire.cc htree2.cc extio.cc extio_technology.cc \
29 | cacti_interface.cc router.cc nuca.cc crossbar.cc arbiter.cc powergating.cc TSV.cc memorybus.cc \
30 | memcad.cc memcad_parameters.cc
31 |
32 |
33 | OBJS = $(patsubst %.cc,obj_$(TAG)/%.o,$(SRCS))
34 | PYTHONLIB_SRCS = $(patsubst main.cc, ,$(SRCS)) obj_$(TAG)/cacti_wrap.cc
35 | PYTHONLIB_OBJS = $(patsubst %.cc,%.o,$(PYTHONLIB_SRCS))
36 | INCLUDES = -I /usr/include/python2.4 -I /usr/lib/python2.4/config
37 |
38 | all: obj_$(TAG)/$(TARGET)
39 | cp -f obj_$(TAG)/$(TARGET) $(TARGET)
40 |
41 | obj_$(TAG)/$(TARGET) : $(OBJS)
42 | $(CXX) $(OBJS) -o $@ $(INCS) $(CXXFLAGS) $(LIBS) -pthread
43 |
44 | #obj_$(TAG)/%.o : %.cc
45 | # $(CXX) -c $(CXXFLAGS) $(INCS) -o $@ $<
46 |
47 | obj_$(TAG)/%.o : %.cc
48 | $(CXX) $(CXXFLAGS) -c $< -o $@
49 |
50 | clean:
51 | -rm -f *.o _cacti.so cacti.py $(TARGET)
52 |
53 |
54 |
--------------------------------------------------------------------------------
/cacti/component.h:
--------------------------------------------------------------------------------
1 | /*****************************************************************************
2 | * CACTI 7.0
3 | * SOFTWARE LICENSE AGREEMENT
4 | * Copyright 2015 Hewlett-Packard Development Company, L.P.
5 | * All Rights Reserved
6 | *
7 | * Redistribution and use in source and binary forms, with or without
8 | * modification, are permitted provided that the following conditions are
9 | * met: redistributions of source code must retain the above copyright
10 | * notice, this list of conditions and the following disclaimer;
11 | * redistributions in binary form must reproduce the above copyright
12 | * notice, this list of conditions and the following disclaimer in the
13 | * documentation and/or other materials provided with the distribution;
14 | * neither the name of the copyright holders nor the names of its
15 | * contributors may be used to endorse or promote products derived from
16 | * this software without specific prior written permission.
17 |
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
29 | *
30 | ***************************************************************************/
31 |
32 |
33 |
34 | #ifndef __COMPONENT_H__
35 | #define __COMPONENT_H__
36 |
37 | #include "parameter.h"
38 | #include "area.h"
39 |
40 | using namespace std;
41 |
42 | class Crossbar;
43 | class Bank;
44 |
45 | class Component
46 | {
47 | public:
48 | Component();
49 | ~Component();
50 |
51 | Area area;
52 | powerDef power,rt_power;
53 | double delay;
54 | double cycle_time;
55 |
56 | double compute_gate_area(
57 | int gate_type,
58 | int num_inputs,
59 | double w_pmos,
60 | double w_nmos,
61 | double h_gate);
62 |
63 | double compute_tr_width_after_folding(double input_width, double threshold_folding_width);
64 | double height_sense_amplifier(double pitch_sense_amp);
65 |
66 | protected:
67 | int logical_effort(
68 | int num_gates_min,
69 | double g,
70 | double F,
71 | double * w_n,
72 | double * w_p,
73 | double C_load,
74 | double p_to_n_sz_ratio,
75 | bool is_dram_,
76 | bool is_wl_tr_,
77 | double max_w_nmos);
78 |
79 | private:
80 | double compute_diffusion_width(int num_stacked_in, int num_folded_tr);
81 | };
82 |
83 | #endif
84 |
85 |
--------------------------------------------------------------------------------
/cacti/contention.dat:
--------------------------------------------------------------------------------
1 | l34c64l1b: 1000 1000 1000 1000 1000 1000 1000 1000
2 | l34c64l2b: 9 11 19 29 43 62 81 102
3 | l34c64l4b: 6 8 12 17 24 29 39 47
4 | l34c64l8b: 7 8 10 14 18 22 25 30
5 | l34c64l16b: 7 7 9 12 14 17 20 24
6 | l34c64l32b: 7 7 9 12 14 17 20 24 -r
7 | l34c64l64b: 7 7 9 12 14 17 20 24 -r
8 | l34c128l1b: 1000 1000 1000 1000 1000 1000 1000 1000
9 | l34c128l2b: 4 10 19 30 44 64 82 103
10 | l34c128l4b: 3 6 11 17 24 31 38 47
11 | l34c128l8b: 3 5 9 13 17 21 25 29
12 | l34c128l16b: 4 5 7 10 13 16 19 22
13 | l34c128l32b: 4 5 7 10 13 16 19 22 -r
14 | l34c128l64b: 4 5 7 10 13 16 19 22 -r
15 | l34c256l1b: 1000 1000 1000 1000 1000 1000 1000 1000
16 | l34c256l2b: 3 10 19 30 44 63 82 103
17 | l34c256l4b: 3 6 11 17 24 31 38 47
18 | l34c256l8b: 2 5 8 12 16 20 24 29
19 | l34c256l16b: 2 4 7 9 12 15 18 21
20 | l34c256l32b: 2 4 7 9 12 15 18 21 -r
21 | l34c256l64b: 2 4 7 9 12 15 18 21 -r
22 | l38c64l1b: 1000 1000 1000 1000 1000 1000 1000 1000
23 | l38c64l2b: 57 59 77 90 137 187 219 245
24 | l38c64l4b: 35 40 48 56 43 61 80 101
25 | l38c64l8b: 18 27 41 45 52 58 58 58 -r
26 | l38c64l16b: 16 17 19 35 40 49 53 53 -r
27 | l38c64l32b: 15 15 17 19 22 25 30 30 -r
28 | l38c64l64b: 15 15 17 19 22 25 30 30 -r
29 | l38c128l1b: 1000 1000 1000 1000 1000 1000 1000 1000
30 | l38c128l2b: 38 50 78 93 139 188 220 245
31 | l38c128l4b: 29 37 46 56 43 61 81 102
32 | l38c128l8b: 16 30 39 44 50 57 57 57 -r
33 | l38c128l16b: 14 16 19 33 40 47 52 52 -r
34 | l38c128l32b: 14 15 17 20 23 27 31 31 -r
35 | l38c128l64b: 14 15 17 20 23 27 31 31 -r
36 | l38c256l1b: 1000 1000 1000 1000 1000 1000 1000 1000
37 | l38c256l2b: 35 50 78 94 139 188 220 246
38 | l38c256l4b: 28 36 45 55 55 61 81 102
39 | l38c256l8b: 17 30 38 43 50 57 57 57 -r
40 | l38c256l16b: 15 17 21 32 40 47 51 51
41 | l38c256l32b: 15 17 19 21 24 29 33 33
42 | l38c256l64b: 15 17 19 21 24 29 33 33 -r
43 | l316c64l1b: 1000 1000 1000 1000 1000 1000 1000 1000
44 | l316c64l2b: 1000 1000 1000 1000 1000 1000 1000 1000
45 | l316c64l4b: 34 35 78 126 178 220 252 274
46 | l316c64l8b: 9 11 23 43 62 87 105 130
47 | l316c64l16b: 7 9 13 23 33 45 56 67
48 | l316c64l32b: 5 6 7 10 13 19 25 30
49 | l316c64l64b: 4 5 6 8 10 14 18 21
50 | l316c128l1b: 1000 1000 1000 1000 1000 1000 1000 1000
51 | l316c128l2b: 25 131 243 1000 1000 1000 1000 1000
52 | l316c128l4b: 8 28 79 127 179 221 253 274
53 | l316c128l8b: 4 9 22 43 62 88 106 131
54 | l316c128l16b: 4 6 11 21 32 44 55 67
55 | l316c128l32b: 4 6 11 12 12 18 24 29
56 | l316c128l64b: 2 3 5 7 9 13 17 21
57 | l316c256l1b: 1000 1000 1000 1000 1000 1000 1000 1000
58 | l316c256l2b: 1000 1000 1000 1000 1000 1000 1000 1000
59 | l316c256l4b: 5 28 80 128 180 221 253 274
60 | l316c256l8b: 3 8 22 43 63 88 107 131
61 | l316c256l16b: 2 5 11 21 32 44 55 67
62 | l316c256l32b: 2 3 5 8 12 18 24 29
63 | l316c256l64b: 2 3 4 6 9 13 17 21
64 | l24c64l1b: 1000 1000 1000 1000 1000 1000 1000 1000
65 | l24c64l2b: 10 12 24 41 60 86 105 122
66 | l24c64l4b: 5 7 13 20 29 38 47 56
67 | l24c64l8b: 5 6 9 14 18 24 29 35
68 | l24c64l16b: 4 5 7 10 12 16 19 22
69 | l24c64l32b: 5 5 6 8 10 12 14 17
70 | l24c64l64b: 5 5 6 8 10 12 14 16
71 | l24c128l1b: 1000 1000 1000 1000 1000 1000 1000 1000
72 | l24c128l2b: 1000 1000 1000 1000 1000 1000 1000 1000
73 | l24c128l4b: 3 7 13 20 29 38 47 57
74 | l24c128l8b: 3 5 9 13 18 23 29 35
75 | l24c128l16b: 3 4 6 9 12 15 19 22
76 | l24c128l32b: 3 4 5 7 9 11 14 16
77 | l24c128l64b: 1000 1000 1000 1000 1000 1000 1000 1000
78 | l24c256l1b: 1000 1000 1000 1000 1000 1000 1000 1000
79 | l24c256l2b: 1000 1000 1000 1000 1000 1000 1000 1000
80 | l24c256l4b: 2 6 13 20 29 38 47 57
81 | l24c256l8b: 2 4 8 13 18 23 28 35
82 | l24c256l16b: 2 3 6 8 11 15 18 22
83 | l24c256l32b: 2 3 5 6 8 11 14 16
84 | l24c256l64b: 1000 1000 1000 1000 1000 1000 1000 1000
85 | l28c64l1b: 1000 1000 1000 1000 1000 1000 1000 1000
86 | l28c64l2b: 46 52 117 157 188 225 246 261
87 | l28c64l4b: 19 25 39 54 96 107 120 150
88 | l28c64l8b: 9 12 21 30 39 47 58 79
89 | l28c64l16b: 8 9 11 16 25 32 37 42
90 | l28c64l32b: 7 8 9 11 14 19 23 28
91 | l28c64l64b: 7 7 8 10 12 14 18 22
92 | l28c128l1b: 1000 1000 1000 1000 1000 1000 1000 1000
93 | l28c128l2b: 1000 1000 1000 1000 1000 1000 1000 1000
94 | l28c128l4b: 12 22 39 54 98 108 130 151
95 | l28c128l8b: 7 12 21 30 39 48 59 80
96 | l28c128l16b: 6 8 11 16 24 31 37 42
97 | l28c128l32b: 6 7 9 11 14 19 24 28
98 | l28c128l64b: 6 7 9 11 14 19 24 28
99 | l28c256l1b: 1000 1000 1000 1000 1000 1000 1000 1000
100 | l28c256l2b: 1000 1000 1000 1000 1000 1000 1000 1000
101 | l28c256l4b: 12 22 39 54 100 108 130 152
102 | l28c256l8b: 7 12 21 30 39 48 59 81
103 | l28c256l16b: 6 8 11 16 24 31 37 42
104 | l28c256l32b: 6 7 9 11 14 19 24 28
105 | l28c256l64b: 6 7 9 11 14 19 24 28
106 | l216c64l1b: 1000 1000 1000 1000 1000 1000 1000 1000
107 | l216c64l2b: 1000 1000 1000 1000 1000 1000 1000 1000
108 | l216c64l4b: 34 35 78 126 178 220 252 274
109 | l216c64l8b: 9 11 23 43 62 87 105 130
110 | l216c64l16b: 7 9 13 23 33 45 56 67
111 | l216c64l32b: 5 6 7 10 13 19 25 30
112 | l216c64l64b: 4 5 6 8 10 14 18 21
113 | l216c128l1b: 1000 1000 1000 1000 1000 1000 1000 1000
114 | l216c128l2b: 25 131 243 1000 1000 1000 1000 1000
115 | l216c128l4b: 8 28 79 127 179 221 253 274
116 | l216c128l8b: 4 9 22 43 62 88 106 131
117 | l216c128l16b: 4 6 11 21 32 44 55 67
118 | l216c128l32b: 4 6 11 12 12 18 24 29
119 | l216c128l64b: 2 3 5 7 9 13 17 21
120 | l216c256l1b: 1000 1000 1000 1000 1000 1000 1000 1000
121 | l216c256l2b: 1000 1000 1000 1000 1000 1000 1000 1000
122 | l216c256l4b: 5 28 80 128 180 221 253 274
123 | l216c256l8b: 3 8 22 43 63 88 107 131
124 | l216c256l16b: 2 5 11 21 32 44 55 67
125 | l216c256l32b: 2 3 5 8 12 18 24 29
126 | l216c256l64b: 2 3 4 6 9 13 17 21
127 |
--------------------------------------------------------------------------------
/cacti/crossbar.h:
--------------------------------------------------------------------------------
1 | /*****************************************************************************
2 | * CACTI 7.0
3 | * SOFTWARE LICENSE AGREEMENT
4 | * Copyright 2015 Hewlett-Packard Development Company, L.P.
5 | * All Rights Reserved
6 | *
7 | * Redistribution and use in source and binary forms, with or without
8 | * modification, are permitted provided that the following conditions are
9 | * met: redistributions of source code must retain the above copyright
10 | * notice, this list of conditions and the following disclaimer;
11 | * redistributions in binary form must reproduce the above copyright
12 | * notice, this list of conditions and the following disclaimer in the
13 | * documentation and/or other materials provided with the distribution;
14 | * neither the name of the copyright holders nor the names of its
15 | * contributors may be used to endorse or promote products derived from
16 | * this software without specific prior written permission.
17 |
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
29 | *
30 | ***************************************************************************/
31 |
32 |
33 | #ifndef __CROSSBAR__
34 | #define __CROSSBAR__
35 |
36 | #include
37 | #include
38 | #include "basic_circuit.h"
39 | #include "cacti_interface.h"
40 | #include "component.h"
41 | #include "parameter.h"
42 | #include "mat.h"
43 | #include "wire.h"
44 |
45 | class Crossbar : public Component
46 | {
47 | public:
48 | Crossbar(
49 | double in,
50 | double out,
51 | double flit_sz,
52 | /*TechnologyParameter::*/DeviceType *dt = &(g_tp.peri_global));
53 | ~Crossbar();
54 |
55 | void print_crossbar();
56 | double output_buffer();
57 | void compute_power();
58 |
59 | double n_inp, n_out;
60 | double flit_size;
61 | double tri_inp_cap, tri_out_cap, tri_ctr_cap, tri_int_cap;
62 |
63 | private:
64 | double CB_ADJ;
65 | /*
66 | * Adjust factor of the height of the cross-point (tri-state buffer) cell (layout) in crossbar
67 | * buffer is adjusted to get an aspect ratio of whole cross bar close to one;
68 | * when adjust the ratio, the number of wires route over the tri-state buffers does not change,
69 | * however, the effective wiring pitch changes. Specifically, since CB_ADJ will increase
70 | * during the adjust, the tri-state buffer will become taller and thiner, and the effective wiring pitch
71 | * will increase. As a result, the height of the crossbar (area.h) will increase.
72 | */
73 |
74 | /*TechnologyParameter::*/DeviceType *deviceType;
75 | double TriS1, TriS2;
76 | double min_w_pmos, Vdd;
77 |
78 | };
79 |
80 |
81 |
82 |
83 | #endif
84 |
--------------------------------------------------------------------------------
/cacti/dram.cfg:
--------------------------------------------------------------------------------
1 | //-size (bytes) 16777216
2 | //-size (bytes) 33554432
3 | -size (bytes) 134217728
4 | //-size (bytes) 67108864
5 | //-size (bytes) 1073741824
6 |
7 | -block size (bytes) 64
8 | -associativity 1
9 | -read-write port 1
10 | -exclusive read port 0
11 | -exclusive write port 0
12 | -single ended read ports 0
13 | -UCA bank count 1
14 | //-technology (u) 0.032
15 | //-technology (u) 0.045
16 | -technology (u) 0.068
17 | //-technology (u) 0.078
18 |
19 | # following three parameters are meaningful only for main memories
20 | -page size (bits) 8192
21 | -burst length 8
22 | -internal prefetch width 8
23 |
24 | # following parameter can have one of the five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram)
25 | -Data array cell type - "comm-dram"
26 |
27 | # following parameter can have one of the three values -- (itrs-hp, itrs-lstp, itrs-lop)
28 | -Data array peripheral type - "itrs-hp"
29 |
30 | # following parameter can have one of the five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram)
31 | -Tag array cell type - "itrs-hp"
32 |
33 | # following parameter can have one of the three values -- (itrs-hp, itrs-lstp, itrs-lop)
34 | -Tag array peripheral type - "itrs-hp"
35 |
36 | # Bus width include data bits and address bits required by the decoder
37 | //-output/input bus width 512
38 | -output/input bus width 64
39 |
40 | -operating temperature (K) 350
41 |
42 | -cache type "main memory"
43 |
44 | # to model special structure like branch target buffers, directory, etc.
45 | # change the tag size parameter
46 | # if you want cacti to calculate the tagbits, set the tag size to "default"
47 | -tag size (b) "default"
48 | //-tag size (b) 45
49 |
50 | # fast - data and tag access happen in parallel
51 | # sequential - data array is accessed after accessing the tag array
52 | # normal - data array lookup and tag access happen in parallel
53 | # final data block is broadcasted in data array h-tree
54 | # after getting the signal from the tag array
55 | //-access mode (normal, sequential, fast) - "fast"
56 | -access mode (normal, sequential, fast) - "normal"
57 | //-access mode (normal, sequential, fast) - "sequential"
58 |
59 | # DESIGN OBJECTIVE for UCA (or banks in NUCA)
60 | //-design objective (weight delay, dynamic power, leakage power, cycle time, area) 100:100:0:0:0
61 | -design objective (weight delay, dynamic power, leakage power, cycle time, area) 0:0:0:100:0
62 | -deviate (delay, dynamic power, leakage power, cycle time, area) 20:100000:100000:100000:1000000
63 | //-deviate (delay, dynamic power, leakage power, cycle time, area) 200:100000:100000:100000:20
64 |
65 | -Optimize ED or ED^2 (ED, ED^2, NONE): "NONE"
66 |
67 | -Cache model (NUCA, UCA) - "UCA"
68 |
69 | //-Wire signalling (fullswing, lowswing, default) - "default"
70 | -Wire signalling (fullswing, lowswing, default) - "Global_10"
71 |
72 | -Wire inside mat - "global"
73 | //-Wire inside mat - "semi-global"
74 | -Wire outside mat - "global"
75 |
76 | -Interconnect projection - "conservative"
77 | //-Interconnect projection - "aggressive"
78 |
79 | -Add ECC - "true"
80 |
81 | -Print level (DETAILED, CONCISE) - "DETAILED"
82 |
83 | # for debugging
84 | -Print input parameters - "true"
85 | # force CACTI to model the cache with the
86 | # following Ndbl, Ndwl, Nspd, Ndsam,
87 | # and Ndcm values
88 | //-Force cache config - "true"
89 | -Force cache config - "false"
90 | -Ndwl 1
91 | -Ndbl 1
92 | -Nspd 0
93 | -Ndcm 1
94 | -Ndsam1 0
95 | -Ndsam2 0
96 |
97 | ########### NUCA Params ############
98 |
99 | # Objective for NUCA
100 | -NUCAdesign objective (weight delay, dynamic power, leakage power, cycle time, area) 100:100:0:0:100
101 | -NUCAdeviate (delay, dynamic power, leakage power, cycle time, area) 10:10000:10000:10000:10000
102 |
103 | # Contention in network (which is a function of core count and cache level) is one of
104 | # the critical factor used for deciding the optimal bank count value
105 | # core count can be 4, 8, or 16
106 | //-Core count 4
107 | -Core count 8
108 | //-Core count 16
109 | -Cache level (L2/L3) - "L3"
110 |
111 | # In order for CACTI to find the optimal NUCA bank value the following
112 | # variable should be assigned 0.
113 | -NUCA bank count 0
114 |
115 |
--------------------------------------------------------------------------------
/cacti/dram_read_energy_results.txt:
--------------------------------------------------------------------------------
1 | DRAM:
2 | - Read energy: 0.468762 nJ
3 | name: dram-config-DRAM-system.log
4 |
--------------------------------------------------------------------------------
/cacti/extio.h:
--------------------------------------------------------------------------------
1 | #ifndef _extio_H_
2 | #define _extio_H_
3 | #include "parameter.h"
4 | #include "component.h"
5 | #include "extio_technology.h"
6 |
7 | class Extio : public Component
8 | {
9 | public:
10 |
11 | Extio(IOTechParam *);
12 |
13 | void extio_area();
14 | void extio_eye();
15 | void extio_power_dynamic();
16 | void extio_power_phy();
17 | void extio_power_term();
18 |
19 | private:
20 | IOTechParam *io_param;
21 |
22 | double io_area;
23 |
24 | double io_power_term;
25 | double power_termination_write;
26 | double power_termination_read;
27 | double power_bias;
28 | double power_clk_bias;
29 |
30 | double phy_power;
31 | double phy_wtime;
32 | double phy_static_power;
33 | double phy_dynamic_power;
34 |
35 | double io_power_dynamic;
36 |
37 | double power_dq_write, power_dqs_write, power_ca_write,
38 | power_dq_read, power_dqs_read, power_ca_read,
39 | power_clk;
40 |
41 | double io_tmargin, io_vmargin;
42 |
43 | };
44 |
45 |
46 | #endif // _extio_H_
47 |
--------------------------------------------------------------------------------
/cacti/htree2.h:
--------------------------------------------------------------------------------
1 | /*****************************************************************************
2 | * CACTI 7.0
3 | * SOFTWARE LICENSE AGREEMENT
4 | * Copyright 2015 Hewlett-Packard Development Company, L.P.
5 | * All Rights Reserved
6 | *
7 | * Redistribution and use in source and binary forms, with or without
8 | * modification, are permitted provided that the following conditions are
9 | * met: redistributions of source code must retain the above copyright
10 | * notice, this list of conditions and the following disclaimer;
11 | * redistributions in binary form must reproduce the above copyright
12 | * notice, this list of conditions and the following disclaimer in the
13 | * documentation and/or other materials provided with the distribution;
14 | * neither the name of the copyright holders nor the names of its
15 | * contributors may be used to endorse or promote products derived from
16 | * this software without specific prior written permission.
17 |
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
29 | *
30 | ***************************************************************************/
31 |
32 |
33 | #ifndef __HTREE2_H__
34 | #define __HTREE2_H__
35 |
36 | #include "basic_circuit.h"
37 | #include "component.h"
38 | #include "parameter.h"
39 | #include "assert.h"
40 | #include "subarray.h"
41 | #include "cacti_interface.h"
42 | #include "wire.h"
43 |
44 | // leakge power includes entire htree in a bank (when uca_tree == false)
45 | // leakge power includes only part to one bank when uca_tree == true
46 |
47 | class Htree2 : public Component
48 | {
49 | public:
50 | Htree2(enum Wire_type wire_model,
51 | double mat_w, double mat_h, int add, int data_in, int search_data_in, int data_out, int search_data_out, int bl, int wl,
52 | enum Htree_type h_type, bool uca_tree_ = false, bool search_tree_ = false,
53 | /*TechnologyParameter::*/DeviceType *dt = &(g_tp.peri_global));
54 | ~Htree2() {};
55 |
56 | void in_htree();
57 | void out_htree();
58 |
59 | // repeaters only at h-tree nodes
60 | void limited_in_htree();
61 | void limited_out_htree();
62 | void input_nand(double s1, double s2, double l);
63 | void output_buffer(double s1, double s2, double l);
64 |
65 | double in_rise_time, out_rise_time;
66 |
67 | void set_in_rise_time(double rt)
68 | {
69 | in_rise_time = rt;
70 | }
71 |
72 | double max_unpipelined_link_delay;
73 | powerDef power_bit;
74 |
75 |
76 | private:
77 | double wire_bw;
78 | double init_wire_bw; // bus width at root
79 | enum Htree_type tree_type;
80 | double htree_hnodes;
81 | double htree_vnodes;
82 | double mat_width;
83 | double mat_height;
84 | int add_bits, data_in_bits,search_data_in_bits,data_out_bits, search_data_out_bits;
85 | int ndbl, ndwl;
86 | bool uca_tree; // should have full bandwidth to access all banks in the array simultaneously
87 | bool search_tree;
88 |
89 | enum Wire_type wt;
90 | double min_w_nmos;
91 | double min_w_pmos;
92 |
93 | /*TechnologyParameter::*/DeviceType *deviceType;
94 |
95 | };
96 |
97 | #endif
98 |
--------------------------------------------------------------------------------
/cacti/io.h:
--------------------------------------------------------------------------------
1 | /*****************************************************************************
2 | * CACTI 7.0
3 | * SOFTWARE LICENSE AGREEMENT
4 | * Copyright 2015 Hewlett-Packard Development Company, L.P.
5 | * All Rights Reserved
6 | *
7 | * Redistribution and use in source and binary forms, with or without
8 | * modification, are permitted provided that the following conditions are
9 | * met: redistributions of source code must retain the above copyright
10 | * notice, this list of conditions and the following disclaimer;
11 | * redistributions in binary form must reproduce the above copyright
12 | * notice, this list of conditions and the following disclaimer in the
13 | * documentation and/or other materials provided with the distribution;
14 | * neither the name of the copyright holders nor the names of its
15 | * contributors may be used to endorse or promote products derived from
16 | * this software without specific prior written permission.
17 |
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
29 | *
30 | ***************************************************************************/
31 |
32 |
33 | #ifndef __IO_H__
34 | #define __IO_H__
35 |
36 |
37 | #include "const.h"
38 | #include "cacti_interface.h"
39 |
40 |
41 | void output_data_csv(const uca_org_t & fin_res, string fn="out.csv");
42 | void output_UCA(uca_org_t * fin_res);
43 | void output_data_csv_3dd(const uca_org_t & fin_res);
44 |
45 | #endif
46 |
--------------------------------------------------------------------------------
/cacti/makefile:
--------------------------------------------------------------------------------
1 | TAR = cacti
2 |
3 | .PHONY: dbg opt depend clean clean_dbg clean_opt
4 |
5 | all: dbg
6 |
7 | dbg: $(TAR).mk obj_dbg
8 | @$(MAKE) TAG=dbg -C . -f $(TAR).mk
9 |
10 | opt: $(TAR).mk obj_opt
11 | @$(MAKE) TAG=opt -C . -f $(TAR).mk
12 |
13 | obj_dbg:
14 | mkdir $@
15 |
16 | obj_opt:
17 | mkdir $@
18 |
19 | clean: clean_dbg clean_opt
20 |
21 | clean_dbg: obj_dbg
22 | @$(MAKE) TAG=dbg -C . -f $(TAR).mk clean
23 | rm -rf $<
24 |
25 | clean_opt: obj_opt
26 | @$(MAKE) TAG=opt -C . -f $(TAR).mk clean
27 | rm -rf $<
28 |
29 |
--------------------------------------------------------------------------------
/cacti/memcad.h:
--------------------------------------------------------------------------------
1 | #ifndef __MEMCAD_H__
2 | #define __MEMCAD_H__
3 |
4 | #include "memcad_parameters.h"
5 | #include
6 |
7 |
8 | extern vector *memcad_all_channels;
9 |
10 | extern vector *memcad_all_bobs;
11 |
12 | extern vector *memcad_all_memories;
13 |
14 | extern vector *memcad_best_results;
15 |
16 |
17 |
18 | void find_all_channels(MemCadParameters * memcad_params);
19 |
20 | void find_all_bobs(MemCadParameters * memcad_params);
21 |
22 | bool find_all_memories(MemCadParameters * memcad_params);
23 |
24 | void clean_results();
25 |
26 | void solve_memcad(MemCadParameters * memcad_params);
27 |
28 | #endif
29 |
30 |
31 |
--------------------------------------------------------------------------------
/cacti/memcad_parameters.h:
--------------------------------------------------------------------------------
1 | #ifndef __MEMCAD_PARAMS_H__
2 | #define __MEMCAD_PARAMS_H__
3 |
4 | #include
5 | #include
6 | #include "cacti_interface.h"
7 | #include "const.h"
8 | #include "parameter.h"
9 |
10 | using namespace std;
11 |
12 | ///#define INF 1000000
13 | #define EPS 0.0000001
14 |
15 | #define MAX_DIMM_PER_CHANNEL 3
16 | #define MAX_CAP_PER_DIMM 64
17 | #define MAX_RANKS_PER_DIMM 4
18 | #define MIN_BW_PER_CHANNEL 400
19 | #define MAX_DDR3_CHANNEL_BW 800
20 | #define MAX_DDR4_CHANNEL_BW 1600
21 | #define MAX_NUM_CHANNELS_PER_BOB 2
22 | #define MAX_NUM_BOBS 6
23 | #define DIMM_PER_CHANNEL 3
24 |
25 | /*
26 | enum Mem_IO_type
27 | {
28 | DDR3,
29 | DDR4,
30 | LPDDR2,
31 | WideIO,
32 | Low_Swing_Diff,
33 | Serial
34 | };
35 |
36 | enum Mem_DIMM
37 | {
38 | UDIMM,
39 | RDIMM,
40 | LRDIMM
41 | };
42 | */
43 |
44 |
45 |
46 | class MemCadParameters
47 | {
48 | public:
49 |
50 | Mem_IO_type io_type; // DDR3 vs. DDR4
51 |
52 | int capacity; // in GB
53 |
54 | int num_bobs; // default=4me
55 |
56 | ///int bw_per_channel; // defaul=1600 MHz;
57 |
58 | ///bool with_bob;
59 |
60 | int num_channels_per_bob; // 1 means no bob
61 |
62 | bool capacity_wise; // true means the load on each channel is proportional to its capacity.
63 |
64 | ///int min_bandwith;
65 |
66 | MemCad_metrics first_metric;
67 |
68 | MemCad_metrics second_metric;
69 |
70 | MemCad_metrics third_metric;
71 |
72 | DIMM_Model dimm_model;
73 |
74 | bool low_power_permitted; // Not yet implemented. It determines acceptable VDDs.
75 |
76 | double load; // between 0 to 1
77 |
78 | double row_buffer_hit_rate;
79 |
80 | double rd_2_wr_ratio;
81 |
82 | bool same_bw_in_bob; // true if all the channels in the bob have the same bandwidth.
83 |
84 |
85 | bool mirror_in_bob;// true if all the channels in the bob have the same configs
86 |
87 | bool total_power; // false means just considering I/O Power
88 |
89 | bool verbose;
90 |
91 | // Functions
92 | MemCadParameters(InputParameter * g_ip);
93 | void print_inputs();
94 | bool sanity_check();
95 |
96 | };
97 |
98 |
99 | //////////////////////////////////////////////////////////////////////////////////
100 |
101 | class MemoryParameters
102 | {
103 | public:
104 | // Power Parameteres
105 | static double VDD[2][2][4];
106 |
107 | static double IDD0[2][4];
108 |
109 | static double IDD1[2][4];
110 |
111 | static double IDD2P0[2][4];
112 |
113 | static double IDD2P1[2][4];
114 |
115 | static double IDD2N[2][4];
116 |
117 | static double IDD3P[2][4];
118 |
119 | static double IDD3N[2][4];
120 |
121 | static double IDD4R[2][4];
122 |
123 | static double IDD4W[2][4];
124 |
125 | static double IDD5[2][4];
126 |
127 | static double io_energy_read[2][3][3][4];
128 |
129 | static double io_energy_write[2][3][3][4];
130 |
131 | // Timing Parameters
132 | static double T_RAS[2];
133 |
134 | static double T_RC[2];
135 |
136 | static double T_RP[2];
137 |
138 | static double T_RFC[2];
139 |
140 | static double T_REFI[2];
141 |
142 | // Bandwidth Parameters
143 | static int bandwidth_load[2][4];
144 |
145 | // Cost Parameters
146 | static double cost[2][3][5];
147 |
148 |
149 | // Functions
150 | MemoryParameters();
151 |
152 | int bw_index(Mem_IO_type type, int bandwidth);
153 | };
154 |
155 | ///////////////////////////////////////////////////////////////////////////
156 |
157 | int bw_index(Mem_IO_type type, int bandwidth);
158 |
159 |
160 | ///////////////////////////////////////////////////////////////////////////
161 |
162 | class channel_conf
163 | {
164 | public:
165 | MemCadParameters *memcad_params;
166 |
167 | Mem_DIMM type;
168 | int num_dimm_per_channel;
169 | int histogram_capacity[5]; // 0->4GB, 1->8GB, 2->16GB, 3->32GB, 4->64GB
170 | bool low_power;
171 |
172 | int capacity;
173 | int bandwidth;
174 | double energy_per_read;
175 | double energy_per_write;
176 | double energy_per_access;
177 |
178 | double cost;
179 | double latency;
180 |
181 | bool valid;
182 | // Functions
183 | channel_conf(MemCadParameters * memcad_params, const vector& dimm_cap, int bandwidth, Mem_DIMM type, bool low_power);
184 |
185 | void calc_power();
186 |
187 | friend channel_conf* clone(channel_conf*);
188 | friend ostream & operator<<(ostream &os, const channel_conf& ch_cnf);
189 |
190 | };
191 |
192 |
193 | ///////////////////////////////////////////////////////////////////////////
194 |
195 | class bob_conf
196 | {
197 | public:
198 | MemCadParameters *memcad_params;
199 | int num_channels;
200 | channel_conf *channels[MAX_NUM_CHANNELS_PER_BOB];
201 |
202 | int capacity;
203 | int bandwidth;
204 | double energy_per_read;
205 | double energy_per_write;
206 | double energy_per_access;
207 |
208 | double cost;
209 | double latency;
210 |
211 | bool valid;
212 |
213 | bob_conf(MemCadParameters * memcad_params, vector * channels);
214 |
215 | friend bob_conf* clone(bob_conf*);
216 | friend ostream & operator <<(ostream &os, const bob_conf& bob_cnf);
217 | };
218 |
219 | ///////////////////////////////////////////////////////////////////////////
220 |
221 |
222 | class memory_conf
223 | {
224 | public:
225 | MemCadParameters *memcad_params;
226 | int num_bobs;
227 | bob_conf* bobs[MAX_NUM_BOBS];
228 |
229 | int capacity;
230 | int bandwidth;
231 | double energy_per_read;
232 | double energy_per_write;
233 | double energy_per_access;
234 |
235 | double cost;
236 | double latency;
237 |
238 | bool valid;
239 |
240 | memory_conf(MemCadParameters * memcad_params, vector * bobs);
241 | friend ostream & operator <<(ostream &os, const memory_conf& bob_cnf);
242 | };
243 |
244 |
245 |
246 |
247 |
248 |
249 | #endif
250 |
251 |
252 |
--------------------------------------------------------------------------------
/cacti/nuca.h:
--------------------------------------------------------------------------------
1 | /*****************************************************************************
2 | * CACTI 7.0
3 | * SOFTWARE LICENSE AGREEMENT
4 | * Copyright 2015 Hewlett-Packard Development Company, L.P.
5 | * All Rights Reserved
6 | *
7 | * Redistribution and use in source and binary forms, with or without
8 | * modification, are permitted provided that the following conditions are
9 | * met: redistributions of source code must retain the above copyright
10 | * notice, this list of conditions and the following disclaimer;
11 | * redistributions in binary form must reproduce the above copyright
12 | * notice, this list of conditions and the following disclaimer in the
13 | * documentation and/or other materials provided with the distribution;
14 | * neither the name of the copyright holders nor the names of its
15 | * contributors may be used to endorse or promote products derived from
16 | * this software without specific prior written permission.
17 |
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
29 | *
30 | ***************************************************************************/
31 |
32 |
33 | #ifndef __NUCA_H__
34 | #define __NUCA_H__
35 |
36 | #include "basic_circuit.h"
37 | #include "component.h"
38 | #include "parameter.h"
39 | #include "assert.h"
40 | #include "cacti_interface.h"
41 | #include "wire.h"
42 | #include "mat.h"
43 | #include "io.h"
44 | #include "router.h"
45 | #include
46 |
47 |
48 |
49 | class nuca_org_t {
50 | public:
51 | ~nuca_org_t();
52 | // int size;
53 | /* area, power, access time, and cycle time stats */
54 | Component nuca_pda;
55 | Component bank_pda;
56 | Component wire_pda;
57 | Wire *h_wire;
58 | Wire *v_wire;
59 | Router *router;
60 | /* for particular network configuration
61 | * calculated based on a cycle accurate
62 | * simulation Ref: CACTI 6 - Tech report
63 | */
64 | double contention;
65 |
66 | /* grid network stats */
67 | double avg_hops;
68 | int rows;
69 | int columns;
70 | int bank_count;
71 | };
72 |
73 |
74 |
75 | class Nuca : public Component
76 | {
77 | public:
78 | Nuca(
79 | /*TechnologyParameter::*/DeviceType *dt);
80 | void print_router();
81 | ~Nuca();
82 | void sim_nuca();
83 | void init_cont();
84 | int calc_cycles(double lat, double oper_freq);
85 | void calculate_nuca_area (nuca_org_t *nuca);
86 | int check_nuca_org (nuca_org_t *n, min_values_t *minval);
87 | nuca_org_t * find_optimal_nuca (list *n, min_values_t *minval);
88 | void print_nuca(nuca_org_t *n);
89 | void print_cont_stats();
90 |
91 | private:
92 |
93 | /*TechnologyParameter::*/DeviceType *deviceType;
94 | int wt_min, wt_max;
95 | Wire *wire_vertical[WIRE_TYPES],
96 | *wire_horizontal[WIRE_TYPES];
97 |
98 | };
99 |
100 |
101 | #endif
102 |
--------------------------------------------------------------------------------
/cacti/obj_dbg/TSV.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/TSV.o
--------------------------------------------------------------------------------
/cacti/obj_dbg/Ucache.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/Ucache.o
--------------------------------------------------------------------------------
/cacti/obj_dbg/arbiter.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/arbiter.o
--------------------------------------------------------------------------------
/cacti/obj_dbg/area.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/area.o
--------------------------------------------------------------------------------
/cacti/obj_dbg/bank.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/bank.o
--------------------------------------------------------------------------------
/cacti/obj_dbg/basic_circuit.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/basic_circuit.o
--------------------------------------------------------------------------------
/cacti/obj_dbg/cacti:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/cacti
--------------------------------------------------------------------------------
/cacti/obj_dbg/cacti_interface.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/cacti_interface.o
--------------------------------------------------------------------------------
/cacti/obj_dbg/component.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/component.o
--------------------------------------------------------------------------------
/cacti/obj_dbg/crossbar.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/crossbar.o
--------------------------------------------------------------------------------
/cacti/obj_dbg/decoder.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/decoder.o
--------------------------------------------------------------------------------
/cacti/obj_dbg/extio.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/extio.o
--------------------------------------------------------------------------------
/cacti/obj_dbg/extio_technology.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/extio_technology.o
--------------------------------------------------------------------------------
/cacti/obj_dbg/htree2.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/htree2.o
--------------------------------------------------------------------------------
/cacti/obj_dbg/io.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/io.o
--------------------------------------------------------------------------------
/cacti/obj_dbg/main.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/main.o
--------------------------------------------------------------------------------
/cacti/obj_dbg/mat.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/mat.o
--------------------------------------------------------------------------------
/cacti/obj_dbg/memcad.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/memcad.o
--------------------------------------------------------------------------------
/cacti/obj_dbg/memcad_parameters.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/memcad_parameters.o
--------------------------------------------------------------------------------
/cacti/obj_dbg/memorybus.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/memorybus.o
--------------------------------------------------------------------------------
/cacti/obj_dbg/nuca.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/nuca.o
--------------------------------------------------------------------------------
/cacti/obj_dbg/parameter.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/parameter.o
--------------------------------------------------------------------------------
/cacti/obj_dbg/powergating.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/powergating.o
--------------------------------------------------------------------------------
/cacti/obj_dbg/router.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/router.o
--------------------------------------------------------------------------------
/cacti/obj_dbg/subarray.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/subarray.o
--------------------------------------------------------------------------------
/cacti/obj_dbg/technology.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/technology.o
--------------------------------------------------------------------------------
/cacti/obj_dbg/uca.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/uca.o
--------------------------------------------------------------------------------
/cacti/obj_dbg/wire.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/wire.o
--------------------------------------------------------------------------------
/cacti/powergating.cc:
--------------------------------------------------------------------------------
1 | /*****************************************************************************
2 | * CACTI 7.0
3 | * SOFTWARE LICENSE AGREEMENT
4 | * Copyright 2015 Hewlett-Packard Development Company, L.P.
5 | * All Rights Reserved
6 | *
7 | * Redistribution and use in source and binary forms, with or without
8 | * modification, are permitted provided that the following conditions are
9 | * met: redistributions of source code must retain the above copyright
10 | * notice, this list of conditions and the following disclaimer;
11 | * redistributions in binary form must reproduce the above copyright
12 | * notice, this list of conditions and the following disclaimer in the
13 | * documentation and/or other materials provided with the distribution;
14 | * neither the name of the copyright holders nor the names of its
15 | * contributors may be used to endorse or promote products derived from
16 | * this software without specific prior written permission.
17 |
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
29 | *
30 | ***************************************************************************/
31 |
32 | #include "area.h"
33 | #include "powergating.h"
34 | #include "parameter.h"
35 | #include
36 | #include
37 | #include
38 |
39 | using namespace std;
40 |
41 | //TODO: although DTSN is used,since for memory array, the number of sleep txs
42 | //is related to the number of rows and cols. so All calculations are still base on
43 | //single sleep tx cases
44 |
45 | Sleep_tx::Sleep_tx(
46 | double _perf_with_sleep_tx,
47 | double _active_Isat,//of circuit block, not sleep tx
48 | bool _is_footer,
49 | double _c_circuit_wakeup,
50 | double _V_delta,
51 | int _num_sleep_tx,
52 | // double _vt_circuit,
53 | // double _vt_sleep_tx,
54 | // double _mobility,//of sleep tx
55 | // double _c_ox,//of sleep tx
56 | const Area & cell_)
57 | :perf_with_sleep_tx(_perf_with_sleep_tx),
58 | active_Isat(_active_Isat),
59 | is_footer(_is_footer),
60 | c_circuit_wakeup(_c_circuit_wakeup),
61 | V_delta(_V_delta),
62 | num_sleep_tx(_num_sleep_tx),
63 | // vt_circuit(_vt_circuit),
64 | // vt_sleep_tx(_vt_sleep_tx),
65 | // mobility(_mobility),
66 | // c_ox(_c_ox)
67 | cell(cell_),
68 | is_sleep_tx(true)
69 | {
70 |
71 | //a single sleep tx in a network
72 | double raw_area, raw_width, raw_hight;
73 | double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(false, false, true);
74 | vdd = g_tp.peri_global.Vdd;
75 | vt_circuit = g_tp.peri_global.Vth;
76 | vt_sleep_tx = g_tp.sleep_tx.Vth;
77 | mobility = g_tp.sleep_tx.Mobility_n;
78 | c_ox = g_tp.sleep_tx.C_ox;
79 |
80 | width = active_Isat/(perf_with_sleep_tx*mobility*c_ox*(vdd-vt_circuit)*(vdd-vt_sleep_tx))*g_ip->F_sz_um;//W/L uses physical numbers
81 | width /= num_sleep_tx;
82 |
83 | raw_area = compute_gate_area(INV, 1, width, p_to_n_sz_ratio*width, cell.w*2)/2; //Only single device, assuming device is laide on the side
84 | raw_width = cell.w;
85 | raw_hight = raw_area/cell.w;
86 | area.set_h(raw_hight);
87 | area.set_w(raw_width);
88 |
89 | compute_penalty();
90 |
91 | }
92 |
93 | double Sleep_tx::compute_penalty()
94 | {
95 | //V_delta = VDD - VCCmin nothing to do with threshold of sleep tx. Although it might be OK to use sleep tx to control the V_delta
96 | // double c_load;
97 | double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(false, false, true);
98 |
99 | if (is_footer)
100 | {
101 | c_intrinsic_sleep = drain_C_(width, NCH, 1, 1, area.h, false, false, false,is_sleep_tx);
102 | // V_delta = _V_delta;
103 | wakeup_delay = (c_circuit_wakeup + c_intrinsic_sleep)*V_delta/(simplified_nmos_Isat(width, false, false, false,is_sleep_tx)/Ilinear_to_Isat_ratio);
104 | wakeup_power.readOp.dynamic = (c_circuit_wakeup + c_intrinsic_sleep)*g_tp.sram_cell.Vdd*V_delta;
105 | //no 0.5 because the half of the energy spend in entering sleep and half of the energy will be spent in waking up. And they are pairs
106 | }
107 | else
108 | {
109 | c_intrinsic_sleep = drain_C_(width*p_to_n_sz_ratio, PCH, 1, 1, area.h, false, false, false,is_sleep_tx);
110 | // V_delta = _V_delta;
111 | wakeup_delay = (c_circuit_wakeup + c_intrinsic_sleep)*V_delta/(simplified_pmos_Isat(width, false, false, false,is_sleep_tx)/Ilinear_to_Isat_ratio);
112 | wakeup_power.readOp.dynamic = (c_circuit_wakeup + c_intrinsic_sleep)*g_tp.sram_cell.Vdd*V_delta;
113 | }
114 |
115 | return wakeup_delay;
116 |
117 | /*
118 | The number of cycles in the wake-up latency set the constraint on the
119 | minimum number of idle clock cycles needed before a processor
120 | can enter in the corresponding sleep mode without any wakeup
121 | overhead.
122 |
123 | If the circuit is half way to sleep then waken up, it is still OK
124 | just the wakeup latency will be shorter than the wakeup time from full asleep.
125 | So, the sleep time and energy does not matter
126 | */
127 |
128 | }
129 |
130 |
--------------------------------------------------------------------------------
/cacti/powergating.h:
--------------------------------------------------------------------------------
1 | /*****************************************************************************
2 | * CACTI 7.0
3 | * SOFTWARE LICENSE AGREEMENT
4 | * Copyright 2015 Hewlett-Packard Development Company, L.P.
5 | * All Rights Reserved
6 | *
7 | * Redistribution and use in source and binary forms, with or without
8 | * modification, are permitted provided that the following conditions are
9 | * met: redistributions of source code must retain the above copyright
10 | * notice, this list of conditions and the following disclaimer;
11 | * redistributions in binary form must reproduce the above copyright
12 | * notice, this list of conditions and the following disclaimer in the
13 | * documentation and/or other materials provided with the distribution;
14 | * neither the name of the copyright holders nor the names of its
15 | * contributors may be used to endorse or promote products derived from
16 | * this software without specific prior written permission.
17 |
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
29 | *
30 | ***************************************************************************/
31 |
32 | #ifndef POWERGATING_H_
33 | #define POWERGATING_H_
34 |
35 | #include "component.h"
36 |
37 | class Sleep_tx : public Component
38 | {
39 | public:
40 | Sleep_tx(
41 | double _perf_with_sleep_tx,
42 | double _active_Isat,//of circuit block, not sleep tx
43 | bool _is_footer,
44 | double _c_circuit_wakeup,
45 | double _V_delta,
46 | int _num_sleep_tx,
47 | // double _vt_circuit,
48 | // double _vt_sleep_tx,
49 | // double _mobility,//of sleep tx
50 | // double _c_ox,//of sleep tx
51 | const Area & cell_);
52 |
53 | double perf_with_sleep_tx;
54 | double active_Isat;
55 | bool is_footer;
56 |
57 | double vt_circuit;
58 | double vt_sleep_tx;
59 | double vdd;// of circuit block not sleep tx
60 | double mobility;//of sleep tx
61 | double c_ox;
62 | double width;
63 | double c_circuit_wakeup;
64 | double c_intrinsic_sleep;
65 | double delay, wakeup_delay;
66 | powerDef power, wakeup_power;
67 | // double c_circuit_sleep;
68 | // double sleep_delay;
69 | // powerDef sleep_power;
70 | double V_delta;
71 |
72 | int num_sleep_tx;
73 |
74 | const Area & cell;
75 | bool is_sleep_tx;
76 |
77 |
78 |
79 | // void compute_area();
80 | double compute_penalty(); // return outrisetime
81 |
82 | void leakage_feedback(double temperature){};
83 | ~Sleep_tx(){};
84 | };
85 |
86 | #endif /* POWERGATING_H_ */
87 |
--------------------------------------------------------------------------------
/cacti/regression.test:
--------------------------------------------------------------------------------
1 | cache 4 types
2 | ./cacti -infile test_configs/cache1.cfg #L1 2-way 32K
3 | ./cacti -infile test_configs/cache2.cfg #L2 4-way 256K
4 | ./cacti -infile test_configs/cache3.cfg #L3 8-way 16M
5 | ./cacti -infile test_configs/cache4.cfg #L1 full-asso 4K with single search port
6 | RAM 4 types
7 | ./cacti -infile test_configs/ram1.cfg # 16M
8 | ./cacti -infile test_configs/ram2.cfg # itrs-hp itrs-lstp
9 | ./cacti -infile test_configs/ram3.cfg # two banks no-ecc 128M
10 | ./cacti -infile test_configs/ram4.cfg # 32K 2-way
11 | CAM 4 types
12 | ./cacti -infile test_configs/cam1.cfg # same as ram1 but ram->cam and full-asso
13 | ./cacti -infile test_configs/cam2.cfg # same as cam1 with line size = 128
14 | ./cacti -infile test_configs/cam3.cfg # cam1 for 40nm technology
15 | ./cacti -infile test_configs/cam4.cfg # ca1 with exclusive read and write port
16 | NUCA 4 types
17 | ./cacti -infile test_configs/nuca1.cfg #
18 | ./cacti -infile test_configs/nuca2.cfg
19 | ./cacti -infile test_configs/nuca3.cfg
20 | ./cacti -infile test_configs/nuca3.cfg
21 | eDRAM 4 types
22 | ./cacti -infile test_configs/edram1.cfg #
23 | ./cacti -infile test_configs/edram2.cfg
24 | ./cacti -infile test_configs/edram3.cfg
25 | ./cacti -infile test_configs/edram4.cfg
26 | DRAM 4 types
27 | ./cacti -infile test_configs/dram1.cfg #
28 | ./cacti -infile test_configs/dram2.cfg
29 | ./cacti -infile test_configs/dram3.cfg
30 | ./cacti -infile test_configs/dram4.cfg
31 | IO 4 different parameters
32 | ./cacti -infile test_configs/io1.cfg #
33 | ./cacti -infile test_configs/io2.cfg
34 | ./cacti -infile test_configs/io3.cfg
35 | ./cacti -infile test_configs/io4.cfg
36 | Power gating 4 types
37 | ./cacti -infile test_configs/power_gate1.cfg
38 | ./cacti -infile test_configs/power_gate2.cfg
39 | ./cacti -infile test_configs/power_gate3.cfg
40 | ./cacti -infile test_configs/power_gate4.cfg
41 | 3D 4 types
42 | ./cacti -infile test_configs/3D1.cfg
43 | ./cacti -infile test_configs/3D2.cfg
44 | ./cacti -infile test_configs/3D3.cfg
45 | ./cacti -infile test_configs/3D4.cfg
--------------------------------------------------------------------------------
/cacti/router.h:
--------------------------------------------------------------------------------
1 | /*****************************************************************************
2 | * CACTI 7.0
3 | * SOFTWARE LICENSE AGREEMENT
4 | * Copyright 2015 Hewlett-Packard Development Company, L.P.
5 | * All Rights Reserved
6 | *
7 | * Redistribution and use in source and binary forms, with or without
8 | * modification, are permitted provided that the following conditions are
9 | * met: redistributions of source code must retain the above copyright
10 | * notice, this list of conditions and the following disclaimer;
11 | * redistributions in binary form must reproduce the above copyright
12 | * notice, this list of conditions and the following disclaimer in the
13 | * documentation and/or other materials provided with the distribution;
14 | * neither the name of the copyright holders nor the names of its
15 | * contributors may be used to endorse or promote products derived from
16 | * this software without specific prior written permission.
17 |
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
29 | *
30 | ***************************************************************************/
31 |
32 |
33 |
34 | #ifndef __ROUTER_H__
35 | #define __ROUTER_H__
36 |
37 | #include
38 | #include
39 | #include "basic_circuit.h"
40 | #include "cacti_interface.h"
41 | #include "component.h"
42 | #include "mat.h"
43 | #include "parameter.h"
44 | #include "wire.h"
45 | #include "crossbar.h"
46 | #include "arbiter.h"
47 |
48 |
49 |
50 | class Router : public Component
51 | {
52 | public:
53 | Router(
54 | double flit_size_,
55 | double vc_buf, /* vc size = vc_buffer_size * flit_size */
56 | double vc_count,
57 | /*TechnologyParameter::*/DeviceType *dt = &(g_tp.peri_global),
58 | double I_ = 5,
59 | double O_ = 5,
60 | double M_ = 0.6);
61 | ~Router();
62 |
63 |
64 | void print_router();
65 |
66 | Component arbiter, crossbar, buffer;
67 |
68 | double cycle_time, max_cyc;
69 | double flit_size;
70 | double vc_count;
71 | double vc_buffer_size; /* vc size = vc_buffer_size * flit_size */
72 |
73 | private:
74 | /*TechnologyParameter::*/DeviceType *deviceType;
75 | double FREQUENCY; // move this to config file --TODO
76 | double Cw3(double len);
77 | double gate_cap(double w);
78 | double diff_cap(double w, int type /*0 for n-mos and 1 for p-mos*/, double stack);
79 | enum Wire_type wtype;
80 | enum Wire_placement wire_placement;
81 | //corssbar
82 | double NTtr, PTtr, wt, ht, I, O, NTi, PTi, NTid, PTid, NTod, PTod, TriS1, TriS2;
83 | double M; //network load
84 | double transmission_buf_inpcap();
85 | double transmission_buf_outcap();
86 | double transmission_buf_ctrcap();
87 | double crossbar_inpline();
88 | double crossbar_outline();
89 | double crossbar_ctrline();
90 | double tr_crossbar_power();
91 | void cb_stats ();
92 | double arb_power();
93 | void arb_stats ();
94 | double buffer_params();
95 | void buffer_stats();
96 |
97 |
98 | //arbiter
99 |
100 | //buffer
101 |
102 | //router params
103 | double Vdd;
104 |
105 | void calc_router_parameters();
106 | void get_router_area();
107 | void get_router_power();
108 | void get_router_delay();
109 |
110 | double min_w_pmos;
111 |
112 |
113 | };
114 |
115 | #endif
116 |
--------------------------------------------------------------------------------
/cacti/subarray.h:
--------------------------------------------------------------------------------
1 | /*****************************************************************************
2 | * CACTI 7.0
3 | * SOFTWARE LICENSE AGREEMENT
4 | * Copyright 2015 Hewlett-Packard Development Company, L.P.
5 | * All Rights Reserved
6 | *
7 | * Redistribution and use in source and binary forms, with or without
8 | * modification, are permitted provided that the following conditions are
9 | * met: redistributions of source code must retain the above copyright
10 | * notice, this list of conditions and the following disclaimer;
11 | * redistributions in binary form must reproduce the above copyright
12 | * notice, this list of conditions and the following disclaimer in the
13 | * documentation and/or other materials provided with the distribution;
14 | * neither the name of the copyright holders nor the names of its
15 | * contributors may be used to endorse or promote products derived from
16 | * this software without specific prior written permission.
17 |
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
29 | *
30 | ***************************************************************************/
31 |
32 |
33 |
34 | #ifndef __SUBARRAY_H__
35 | #define __SUBARRAY_H__
36 |
37 | #include "area.h"
38 | #include "component.h"
39 | #include "parameter.h"
40 |
41 | using namespace std;
42 |
43 |
44 | class Subarray : public Component
45 | {
46 | public:
47 | Subarray(const DynamicParameter & dp, bool is_fa_);
48 | ~Subarray();
49 |
50 | const DynamicParameter & dp;
51 | double get_total_cell_area();
52 | unsigned int num_rows;
53 | unsigned int num_cols;
54 | int32_t num_cols_fa_cam;
55 | int32_t num_cols_fa_ram;
56 | Area cell, cam_cell;
57 |
58 | bool is_fa;
59 | double C_wl, C_wl_cam, C_wl_ram;
60 | double R_wl, R_wl_cam, R_wl_ram;
61 | double C_bl, C_bl_cam;
62 | private:
63 |
64 | void compute_C(); // compute bitline and wordline capacitance
65 | };
66 |
67 |
68 |
69 | #endif
70 |
71 |
--------------------------------------------------------------------------------
/cacti/tech_params/16nm.dat:
--------------------------------------------------------------------------------
1 | Invalid technology nodes
2 |
--------------------------------------------------------------------------------
/cacti/tech_params/180nm.dat:
--------------------------------------------------------------------------------
1 | parameters (unit) hp lstp lop lp-dram comm-dram
2 | -C_g_ideal (F/um) 1.328e-15 0 0 0 0
3 | -C_fringe (F/um) 1.6e-16 0 0 0 0
4 | -C_junc (F/um^2) 2e-15 0 0 0 0
5 | -C_junc_sw (F/um^2) 2.5e-16 2.5e-16 2.5e-16 2.5e-16 2.5e-16
6 | -l_phy (um) 0.12 0 0 0 0
7 | -l_elec (um) 0.1 0 0 0 0
8 | -nmos_effective_resistance_multiplier (-) 1.54 0 0 0 0
9 | -Vdd (V) 1.5 0 0 0 0
10 | -Vth (V) 0.4407 0 0 0 0
11 | -Vdsat (V) 0.256 0 0 0 0
12 | -I_on_n (A/um) 0.00075 0 0 0 0
13 | -I_on_p (A/um) 0.00035 0 0 0 0
14 |
15 |
16 |
17 | parameters (unit) temp hp lstp lop lp-dram comm-dram
18 | -I_off_n (A/um) 0 7e-10 0 0 0 0
19 | -I_off_n (A/um) 10 8.26e-10 0 0 0 0
20 | -I_off_n (A/um) 20 9.74e-10 0 0 0 0
21 | -I_off_n (A/um) 30 1.15e-09 0 0 0 0
22 | -I_off_n (A/um) 40 1.35e-09 0 0 0 0
23 | -I_off_n (A/um) 50 1.6e-09 0 0 0 0
24 | -I_off_n (A/um) 60 1.88e-09 0 0 0 0
25 | -I_off_n (A/um) 70 2.29e-09 0 0 0 0
26 | -I_off_n (A/um) 80 2.7e-09 0 0 0 0
27 | -I_off_n (A/um) 90 3.19e-09 0 0 0 0
28 | -I_off_n (A/um) 100 3.76e-09 0 0 0 0
29 | -I_g_on_n (A/um) 0 1.65e-10 0 0 0 0
30 | -I_g_on_n (A/um) 10 1.65e-10 0 0 0 0
31 | -I_g_on_n (A/um) 20 1.65e-10 0 0 0 0
32 | -I_g_on_n (A/um) 30 1.65e-10 0 0 0 0
33 | -I_g_on_n (A/um) 40 1.65e-10 0 0 0 0
34 | -I_g_on_n (A/um) 50 1.65e-10 0 0 0 0
35 | -I_g_on_n (A/um) 60 1.65e-10 0 0 0 0
36 | -I_g_on_n (A/um) 70 1.65e-10 0 0 0 0
37 | -I_g_on_n (A/um) 80 1.65e-10 0 0 0 0
38 | -I_g_on_n (A/um) 90 1.65e-10 0 0 0 0
39 | -I_g_on_n (A/um) 100 1.65e-10 0 0 0 0
40 |
41 |
42 | parameters (unit) hp lstp lop lp-dram comm-dram
43 | -C_ox (F/um^2) 3.58e-14 0 0 0 0
44 | -t_ox (um) 0.0024 0 0 0 0
45 | -n2p_drv_rt (-) 2.45 0 0 0 0
46 | -lch_lk_rdc (-) 1 0 0 0 0
47 | -Mobility_n (um^2/V.sec) 3.0216e+10 0 0 0 0
48 | -gmp_to_gmn_multiplier (-) 1.22 0 0 0 0
49 | -vpp (V) 0 0 0 0 0
50 |
51 | SRAM
52 | parameters cell_type hp lstp lop lp-dram comm-dram
53 | -Wmemcella (um) 0 1.31 1.31 1.31 1.31 1.31
54 | -Wmemcellpmos (um) 0 1.23 1.23 1.23 1.23 1.23
55 | -Wmemcellnmos (um) 0 2.08 2.08 2.08 2.08 2.08
56 | -area_cell (um^2) 0 146 146 146 146 146
57 | -asp_ratio_cell (-) 0 1.46 1.46 1.46 1.46 1.46
58 |
59 | CAM
60 | parameters cell_type hp lstp lop lp-dram comm-dram
61 | -Wmemcella (um) 1 1.31 1.31 1.31 1.31 1.31
62 | -Wmemcellpmos (um) 1 1.23 1.23 1.23 1.23 1.23
63 | -Wmemcellnmos (um) 1 2.08 2.08 2.08 2.08 2.08
64 | -area_cell (um^2) 1 292 292 292 292 292
65 | -asp_ratio_cell (-) 1 2.92 2.92 2.92 2.92 2.92
66 |
67 | DRAM
68 | parameters cell_type hp lstp lop lp-dram comm-dram
69 | -vdd_cell (V) 2 0 0 0 0 0
70 | -Wmemcella (um) 2 0 0 0 0 0
71 | -Wmemcellpmos (um) 2 0 0 0 0 0
72 | -Wmemcellnmos (um) 2 0 0 0 0 0
73 | -area_cell (um^2) 2 0 0 0 0 0
74 | -asp_ratio_cell (-) 2 0 0 0 0 0
75 |
76 | parameters hp lstp lop lp-dram comm-dram
77 | -dram_cell_I_on (A/um) 0 0 0 0 0
78 | -dram_cell_Vdd (V) 0 0 0 0 0
79 | -dram_cell_C (F) 0 0 0 0 0
80 | -dram_cell_I_off_worst_case_len_temp (A/um) 0 0 0 0 0
81 |
82 |
83 | -logic_scaling_co_eff (-) 1.5
84 | -core_tx_density (1/um^2) 0.245
85 | -sckt_co_eff (-) 1.11
86 | -chip_layout_overhead (-) 1
87 | -macro_layout_overhead (-) 1
88 | -sense_delay (sec) 2.8e-10
89 | -sense_dy_power (J) 1.47e-14
90 |
91 | parameters 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3
92 | -wire_pitch (um) 2.5 4 8 0 2.5 4 8 2
93 | -barrier_thickness (um) 0.017 0.017 0.017 0 0.017 0.017 0.017 0
94 | -dishing_thickness (um) 0 0 0 0 0 0 0.1584 0
95 | -alpha_scatter (-) 1 1 1 0 1 1 1 0
96 | -aspect_ratio (-) 2 2.4 2.2 0 2 2 2.2 0
97 | -miller_value (-) 1.5 1.5 1.5 0 1.5 1.5 1.5 0
98 | -horiz_dielectric_constant (-) 2.709 2.709 2.709 0 3.038 3.038 3.038 0
99 | -vert_dielectric_constant (-) 3.9 3.9 3.9 0 3.9 3.9 3.9 0
100 | -ild_thickness (um) 0.75 0.75 1.5 0 0.75 0.75 1.98 0
101 | -fringe_cap (F/um) 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16
102 | -resistivity (u-ohm.m) 0.022 0.022 0.022 0.022 0.022 0.022 0.022 0.022
103 |
104 | parameters 0/0 0/1 0/2 1/0 1/1 1/2
105 | -wire_r_per_micron (ohm/um) 0 0 0 0 0 0 0 66.6667
106 | -wire_c_per_micron (F/um) 0 0 0 0 0 0 0 6.51042e-16
107 | -tsv_pitch (um) 0 0 0 0 0 0
108 | -tsv_diameter (um) 0 0 0 0 0 0
109 | -tsv_length (um) 0 0 0 0 0 0
110 | -tsv_dielec_thickness (um) 0 0 0 0 0 0
111 | -tsv_contact_resistance (ohm) 0 0 0 0 0 0
112 | -tsv_depletion_width (um) 0 0 0 0 0 0
113 | -tsv_liner_dielectric_cons (-) 0 0 0 0 0 0
114 |
--------------------------------------------------------------------------------
/cacti/tech_params/22nm.dat:
--------------------------------------------------------------------------------
1 | parameters (unit) hp lstp lop lp-dram comm-dram
2 | -C_g_ideal (F/um) 3.27e-16 3.22e-16 3.16e-16 0 1.99e-16
3 | -C_fringe (F/um) 6e-17 8e-17 8e-17 0 5.3e-17
4 | -C_junc (F/um^2) 0 0 0 0 1e-15
5 | -C_junc_sw (F/um^2) 2.5e-16 2.5e-16 2.5e-16 2.5e-16 2.5e-16
6 | -l_phy (um) 0.009 0.014 0.011 0 0.022
7 | -l_elec (um) 0.00468 0.008 0.00604 0 0.0181
8 | -nmos_effective_resistance_multiplier (-) 1.45 1.99 1.73 0 1.69
9 | -Vdd (V) 0.8 0.8 0.6 0 0.9
10 | -Vth (V) 0.1395 0.40126 0.2315 0 1
11 | -Vdsat (V) 0.0233 0.0664 0.0181 0 0.0972
12 | -I_on_n (A/um) 0.0026264 0.0007276 0.0009161 0 0.0009105
13 | -I_on_p (A/um) 0.0013132 0.0003638 0.00045805 0 0.00045525
14 |
15 |
16 |
17 | parameters (unit) temp hp lstp lop lp-dram comm-dram
18 | -I_off_n (A/um) 0 1.216e-07 2.43e-11 1.31e-08 0 1.1e-13
19 | -I_off_n (A/um) 10 1.24e-07 4.85e-11 2.6e-08 0 2.11e-13
20 | -I_off_n (A/um) 20 1.272e-07 9.68e-11 5.14e-08 0 3.88e-13
21 | -I_off_n (A/um) 30 1.344e-07 1.94e-10 1.02e-07 0 6.9e-13
22 | -I_off_n (A/um) 40 1.52e-07 3.87e-10 2.02e-07 0 1.19e-12
23 | -I_off_n (A/um) 50 2.152e-07 7.73e-10 3.99e-07 0 1.98e-12
24 | -I_off_n (A/um) 60 4.256e-07 3.55e-10 7.91e-07 0 3.22e-12
25 | -I_off_n (A/um) 70 8.16e-07 3.09e-09 1.09e-06 0 5.09e-12
26 | -I_off_n (A/um) 80 1.296e-06 6.19e-09 2.09e-06 0 7.85e-12
27 | -I_off_n (A/um) 90 2.184e-06 1.24e-08 4.04e-06 0 1.18e-11
28 | -I_off_n (A/um) 100 4.88e-06 2.48e-08 4.48e-06 0 1.72e-11
29 | -I_g_on_n (A/um) 0 1.81e-09 4.51e-10 2.74e-09 0 0
30 | -I_g_on_n (A/um) 10 1.81e-09 4.51e-10 2.74e-09 0 0
31 | -I_g_on_n (A/um) 20 1.81e-09 4.51e-10 2.74e-09 0 0
32 | -I_g_on_n (A/um) 30 1.81e-09 4.51e-10 2.74e-09 0 0
33 | -I_g_on_n (A/um) 40 1.81e-09 4.51e-10 2.74e-09 0 0
34 | -I_g_on_n (A/um) 50 1.81e-09 4.51e-10 2.74e-09 0 0
35 | -I_g_on_n (A/um) 60 1.81e-09 4.51e-10 2.74e-09 0 0
36 | -I_g_on_n (A/um) 70 1.81e-09 4.51e-10 2.74e-09 0 0
37 | -I_g_on_n (A/um) 80 1.81e-09 4.51e-10 2.74e-09 0 0
38 | -I_g_on_n (A/um) 90 1.81e-09 4.51e-10 2.74e-09 0 0
39 | -I_g_on_n (A/um) 100 1.81e-09 4.51e-10 2.74e-09 0 0
40 |
41 |
42 | parameters (unit) hp lstp lop lp-dram comm-dram
43 | -C_ox (F/um^2) 3.63e-14 2.3e-14 2.87e-14 0 9.06e-15
44 | -t_ox (um) 0.00055 0.0011 0.0008 0 0.0035
45 | -n2p_drv_rt (-) 2 2 2 0 1.95
46 | -lch_lk_rdc (-) 0.305437 0.529101 0.420168 0 1
47 | -Mobility_n (um^2/V.sec) 4.2607e+10 7.3809e+10 6.9837e+10 0 3.6729e+10
48 | -gmp_to_gmn_multiplier (-) 1.38 0.99 1.11 0 0.9
49 | -vpp (V) 0 0 0 0 2.3
50 |
51 | SRAM
52 | parameters cell_type hp lstp lop lp-dram comm-dram
53 | -Wmemcella (um) 0 1.31 1.31 1.31 1.31 1.31
54 | -Wmemcellpmos (um) 0 1.23 1.23 1.23 1.23 1.23
55 | -Wmemcellnmos (um) 0 2.08 2.08 2.08 2.08 2.08
56 | -area_cell (um^2) 0 146 146 146 146 146
57 | -asp_ratio_cell (-) 0 1.46 1.46 1.46 1.46 1.46
58 |
59 | CAM
60 | parameters cell_type hp lstp lop lp-dram comm-dram
61 | -Wmemcella (um) 1 1.31 1.31 1.31 1.31 1.31
62 | -Wmemcellpmos (um) 1 1.23 1.23 1.23 1.23 1.23
63 | -Wmemcellnmos (um) 1 2.08 2.08 2.08 2.08 2.08
64 | -area_cell (um^2) 1 292 292 292 292 292
65 | -asp_ratio_cell (-) 1 2.92 2.92 2.92 2.92 2.92
66 |
67 | DRAM
68 | parameters cell_type hp lstp lop lp-dram comm-dram
69 | -vdd_cell (V) 2 0 0 0 0 0
70 | -Wmemcella (um) 2 0 0 0 0 0.022
71 | -Wmemcellpmos (um) 2 0 0 0 0 0
72 | -Wmemcellnmos (um) 2 0 0 0 0 0
73 | -area_cell (um^2) 2 0 0 0 0 0.001936
74 | -asp_ratio_cell (-) 2 0 0 0 0 1
75 |
76 | parameters hp lstp lop lp-dram comm-dram
77 | -dram_cell_I_on (A/um) 0 0 0 0 2e-05
78 | -dram_cell_Vdd (V) 0 0 0 0 0.9
79 | -dram_cell_C (F) 0 0 0 0 3e-14
80 | -dram_cell_I_off_worst_case_len_temp (A/um) 0 0 0 0 1e-15
81 |
82 |
83 | -logic_scaling_co_eff (-) 0.2401
84 | -core_tx_density (1/um^2) 2.55102
85 | -sckt_co_eff (-) 1.1296
86 | -chip_layout_overhead (-) 1.2
87 | -macro_layout_overhead (-) 1.1
88 | -sense_delay (sec) 3e-11
89 | -sense_dy_power (J) 2.16e-15
90 |
91 | parameters 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3
92 | -wire_pitch (um) 2.5 4 8 0 2.5 4 8 2
93 | -barrier_thickness (um) 0 0 0 0 0.003 0.003 0.003 0
94 | -dishing_thickness (um) 0 0 0 0 0 0 0.01936 0
95 | -alpha_scatter (-) 1 1 1 0 1.05 1.05 1.05 0
96 | -aspect_ratio (-) 3 3 3 0 2 2 2.2 0
97 | -miller_value (-) 1.5 1.5 1.5 0 1.5 1.5 1.5 0
98 | -horiz_dielectric_constant (-) 1.414 1.414 1.414 0 2.104 2.104 2.104 0
99 | -vert_dielectric_constant (-) 3.9 3.9 3.9 0 3.9 3.9 3.9 0
100 | -ild_thickness (um) 0.15 0.15 0.3 0 0.15 0.15 0.275 0
101 | -fringe_cap (F/um) 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16
102 | -resistivity (u-ohm.m) 0.018 0.018 0.018 0.018 0.022 0.022 0.022 0.022
103 |
104 | parameters 0/0 0/1 0/2 1/0 1/1 1/2
105 | -wire_r_per_micron (ohm/um) 0 0 0 0 0 0 0 545.455
106 | -wire_c_per_micron (F/um) 0 0 0 0 0 0 0 2.75213e-15
107 | -tsv_pitch (um) 0.8 40 0 1.5 9 0
108 | -tsv_diameter (um) 0.4 7.5 0 0.8 4.5 0
109 | -tsv_length (um) 4 50 0 10 25 0
110 | -tsv_dielec_thickness (um) 0.1 0.2 0 0.1 0.1 0
111 | -tsv_contact_resistance (ohm) 0.1 0.2 0 0.1 0.1 0
112 | -tsv_depletion_width (um) 0.6 0.6 0 0.6 0.6 0
113 | -tsv_liner_dielectric_cons (-) 1.414 1.414 0 2.104 2.104 0
114 |
--------------------------------------------------------------------------------
/cacti/tech_params/32nm.dat:
--------------------------------------------------------------------------------
1 | parameters (unit) hp lstp lop lp-dram comm-dram
2 | -C_g_ideal (F/um) 5.34e-16 4.58e-16 4.54e-16 7.45e-16 2.56e-16
3 | -C_fringe (F/um) 4e-17 5.3e-17 5.7e-17 5.3e-17 5.3e-17
4 | -C_junc (F/um^2) 1e-15 1e-15 1e-15 1e-15 1e-15
5 | -C_junc_sw (F/um^2) 2.5e-16 2.5e-16 2.5e-16 2.5e-16 2.5e-16
6 | -l_phy (um) 0.013 0.02 0.016 0.056 0.032
7 | -l_elec (um) 0.01013 0.0173 0.01232 0.0419 0.0205
8 | -nmos_effective_resistance_multiplier (-) 1.49 1.99 1.73 1.65 1.69
9 | -Vdd (V) 0.9 1 0.6 1 1
10 | -Vth (V) 0.21835 0.513 0.24227 0.44467 1
11 | -Vdsat (V) 0.0509 0.0864 0.0464 0.174 0.129
12 | -I_on_n (A/um) 0.0022117 0.0006836 0.0008278 0.0010554 0.0010245
13 | -I_on_p (A/um) 0.00110585 0.0003418 0.0004139 0.0005277 0.00051225
14 |
15 |
16 |
17 | parameters (unit) temp hp lstp lop lp-dram comm-dram
18 | -I_off_n (A/um) 0 1.52e-07 2.06e-11 5.94e-08 3.57e-11 3.63e-14
19 | -I_off_n (A/um) 10 1.55e-07 3.3e-11 7.23e-08 5.51e-11 7.18e-14
20 | -I_off_n (A/um) 20 1.59e-07 5.15e-11 8.7e-08 8.27e-11 1.36e-13
21 | -I_off_n (A/um) 30 1.68e-07 7.83e-11 1.04e-07 1.21e-10 2.49e-13
22 | -I_off_n (A/um) 40 1.9e-07 1.16e-10 1.22e-07 1.74e-10 4.41e-13
23 | -I_off_n (A/um) 50 2.69e-07 1.69e-10 1.43e-07 2.45e-10 7.55e-13
24 | -I_off_n (A/um) 60 5.32e-07 2.4e-10 1.65e-07 3.38e-10 1.26e-12
25 | -I_off_n (A/um) 70 1.02e-06 3.34e-10 1.9e-07 4.53e-10 2.03e-12
26 | -I_off_n (A/um) 80 1.62e-06 4.54e-10 2.15e-07 5.87e-10 3.19e-12
27 | -I_off_n (A/um) 90 2.73e-06 5.96e-10 2.39e-07 7.29e-10 4.87e-12
28 | -I_off_n (A/um) 100 6.1e-06 7.44e-10 2.63e-07 8.87e-10 7.16e-12
29 | -I_g_on_n (A/um) 0 6.55e-08 3.73e-11 2.93e-09 0 0
30 | -I_g_on_n (A/um) 10 6.55e-08 3.73e-11 2.93e-09 0 0
31 | -I_g_on_n (A/um) 20 6.55e-08 3.73e-11 2.93e-09 0 0
32 | -I_g_on_n (A/um) 30 6.55e-08 3.73e-11 2.93e-09 0 0
33 | -I_g_on_n (A/um) 40 6.55e-08 3.73e-11 2.93e-09 0 0
34 | -I_g_on_n (A/um) 50 6.55e-08 3.73e-11 2.93e-09 0 0
35 | -I_g_on_n (A/um) 60 6.55e-08 3.73e-11 2.93e-09 0 0
36 | -I_g_on_n (A/um) 70 6.55e-08 3.73e-11 2.93e-09 0 0
37 | -I_g_on_n (A/um) 80 6.55e-08 3.73e-11 2.93e-09 0 0
38 | -I_g_on_n (A/um) 90 6.55e-08 3.73e-11 2.93e-09 0 0
39 | -I_g_on_n (A/um) 100 6.55e-08 3.73e-11 2.93e-09 0 0
40 |
41 |
42 | parameters (unit) hp lstp lop lp-dram comm-dram
43 | -C_ox (F/um^2) 4.11e-14 2.29e-14 2.84e-14 1.48e-14 7.99e-15
44 | -t_ox (um) 0.0005 0.0012 0.0009 0.002 0.004
45 | -n2p_drv_rt (-) 2.41 2.23 2.28 2.05 1.95
46 | -lch_lk_rdc (-) 0.269833 0.518135 0.529101 1 1
47 | -Mobility_n (um^2/V.sec) 3.6184e+10 3.4746e+10 5.1352e+10 4.0812e+10 3.8076e+10
48 | -gmp_to_gmn_multiplier (-) 1.38 0.99 1.11 0.9 0.9
49 | -vpp (V) 0 0 0 1.5 2.6
50 |
51 | SRAM
52 | parameters cell_type hp lstp lop lp-dram comm-dram
53 | -Wmemcella (um) 0 1.31 1.31 1.31 1.31 1.31
54 | -Wmemcellpmos (um) 0 1.23 1.23 1.23 1.23 1.23
55 | -Wmemcellnmos (um) 0 2.08 2.08 2.08 2.08 2.08
56 | -area_cell (um^2) 0 146 146 146 146 146
57 | -asp_ratio_cell (-) 0 1.46 1.46 1.46 1.46 1.46
58 |
59 | CAM
60 | parameters cell_type hp lstp lop lp-dram comm-dram
61 | -Wmemcella (um) 1 1.31 1.31 1.31 1.31 1.31
62 | -Wmemcellpmos (um) 1 1.23 1.23 1.23 1.23 1.23
63 | -Wmemcellnmos (um) 1 2.08 2.08 2.08 2.08 2.08
64 | -area_cell (um^2) 1 292 292 292 292 292
65 | -asp_ratio_cell (-) 1 2.92 2.92 2.92 2.92 2.92
66 |
67 | DRAM
68 | parameters cell_type hp lstp lop lp-dram comm-dram
69 | -vdd_cell (V) 2 0 0 0 1 1
70 | -Wmemcella (um) 2 0 0 0 0.056 0.032
71 | -Wmemcellpmos (um) 2 0 0 0 0 0
72 | -Wmemcellnmos (um) 2 0 0 0 0 0
73 | -area_cell (um^2) 2 0 0 0 0.03136 0.006144
74 | -asp_ratio_cell (-) 2 0 0 0 1.46 1.5
75 |
76 | parameters hp lstp lop lp-dram comm-dram
77 | -dram_cell_I_on (A/um) 0 0 0 3.6e-05 2e-05
78 | -dram_cell_Vdd (V) 0 0 0 1 1
79 | -dram_cell_C (F) 0 0 0 2e-14 3e-14
80 | -dram_cell_I_off_worst_case_len_temp (A/um) 0 0 0 1.89e-11 1e-15
81 |
82 |
83 | -logic_scaling_co_eff (-) 0.343
84 | -core_tx_density (1/um^2) 1.78571
85 | -sckt_co_eff (-) 1.1111
86 | -chip_layout_overhead (-) 1.2
87 | -macro_layout_overhead (-) 1.1
88 | -sense_delay (sec) 3e-11
89 | -sense_dy_power (J) 2.16e-15
90 |
91 | parameters 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3
92 | -wire_pitch (um) 2.5 4 8 0 2.5 4 8 2
93 | -barrier_thickness (um) 0 0 0 0 0.003 0.003 0.003 0
94 | -dishing_thickness (um) 0 0 0 0 0 0 0.02816 0
95 | -alpha_scatter (-) 1 1 1 0 1 1 1 0
96 | -aspect_ratio (-) 3 3 3 0 2 2 2.2 0
97 | -miller_value (-) 1.5 1.5 1.5 0 1.5 1.5 1.5 0
98 | -horiz_dielectric_constant (-) 1.664 1.664 1.664 0 2.214 2.214 2.214 0
99 | -vert_dielectric_constant (-) 3.9 3.9 3.9 0 3.9 3.9 3.9 0
100 | -ild_thickness (um) 0.21 0.21 0.42 0 0.21 0.21 0.385 0
101 | -fringe_cap (F/um) 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16
102 | -resistivity (u-ohm.m) 0.018 0.018 0.018 0.018 0.022 0.022 0.022 0.022
103 |
104 | parameters 0/0 0/1 0/2 1/0 1/1 1/2
105 | -wire_r_per_micron (ohm/um) 0 0 0 0 0 0 0 375
106 | -wire_c_per_micron (F/um) 0 0 0 0 0 0 0 1.89209e-15
107 | -tsv_pitch (um) 1.4 15 0 4 30 0
108 | -tsv_diameter (um) 0.7 2.3 0 2 3.8 0
109 | -tsv_length (um) 5 30 0 15 37.5 0
110 | -tsv_dielec_thickness (um) 0.1 0.2 0 0.1 0.5 0
111 | -tsv_contact_resistance (ohm) 0.1 0.2 0 0.1 0.2 0
112 | -tsv_depletion_width (um) 0.6 0.6 0 0.6 0.6 0
113 | -tsv_liner_dielectric_cons (-) 1.664 1.664 0 2.214 2.214 0
114 |
--------------------------------------------------------------------------------
/cacti/tech_params/45nm.dat:
--------------------------------------------------------------------------------
1 | parameters (unit) hp lstp lop lp-dram comm-dram
2 | -C_g_ideal (F/um) 6.78e-16 5.18e-16 6.2e-16 1.1e-15 3.59e-16
3 | -C_fringe (F/um) 5e-17 8e-17 7.3e-17 8e-17 8e-17
4 | -C_junc (F/um^2) 1e-15 1e-15 1e-15 1e-15 1e-15
5 | -C_junc_sw (F/um^2) 2.5e-16 2.5e-16 2.5e-16 2.5e-16 2.5e-16
6 | -l_phy (um) 0.018 0.028 0.022 0.078 0.045
7 | -l_elec (um) 0.01345 0.0212 0.016 0.0504 0.0298
8 | -nmos_effective_resistance_multiplier (-) 1.51 1.99 1.76 1.65 1.69
9 | -Vdd (V) 1 1.1 0.7 1.1 1.1
10 | -Vth (V) 0.18035 0.50245 0.22599 0.44559 1
11 | -Vdsat (V) 0.0938 0.0912 0.0571 0.181 0.147
12 | -I_on_n (A/um) 0.0020466 0.0006662 0.0007489 0.000456 0.0009994
13 | -I_on_p (A/um) 0.0010233 0.0003331 0.00037445 0.000228 0.0004997
14 |
15 |
16 |
17 | parameters (unit) temp hp lstp lop lp-dram comm-dram
18 | -I_off_n (A/um) 0 2.8e-07 1.01e-11 4.03e-09 2.54e-11 1.31e-14
19 | -I_off_n (A/um) 10 3.28e-07 1.65e-11 5.02e-09 3.94e-11 2.68e-14
20 | -I_off_n (A/um) 20 3.81e-07 2.62e-11 6.18e-09 5.95e-11 5.25e-14
21 | -I_off_n (A/um) 30 4.39e-07 4.06e-11 7.51e-09 8.79e-11 9.88e-14
22 | -I_off_n (A/um) 40 5.02e-07 6.12e-11 9.04e-09 1.27e-10 1.79e-13
23 | -I_off_n (A/um) 50 5.69e-07 9.02e-11 1.08e-08 1.79e-10 3.15e-13
24 | -I_off_n (A/um) 60 6.42e-07 1.3e-10 1.27e-08 2.47e-10 5.36e-13
25 | -I_off_n (A/um) 70 7.2e-07 1.83e-10 1.47e-08 3.31e-10 8.86e-13
26 | -I_off_n (A/um) 80 8.03e-07 2.51e-10 1.66e-08 4.26e-10 1.42e-12
27 | -I_off_n (A/um) 90 8.91e-07 3.29e-10 1.84e-08 5.27e-10 2.2e-12
28 | -I_off_n (A/um) 100 9.84e-07 4.1e-10 2.03e-08 6.46e-10 3.29e-12
29 | -I_g_on_n (A/um) 0 3.59e-08 9.47e-12 3.24e-08 0 0
30 | -I_g_on_n (A/um) 10 3.59e-08 9.47e-12 4.01e-08 0 0
31 | -I_g_on_n (A/um) 20 3.59e-08 9.47e-12 4.9e-08 0 0
32 | -I_g_on_n (A/um) 30 3.59e-08 9.47e-12 5.92e-08 0 0
33 | -I_g_on_n (A/um) 40 3.59e-08 9.47e-12 7.08e-08 0 0
34 | -I_g_on_n (A/um) 50 3.59e-08 9.47e-12 8.38e-08 0 0
35 | -I_g_on_n (A/um) 60 3.59e-08 9.47e-12 9.82e-08 0 0
36 | -I_g_on_n (A/um) 70 3.59e-08 9.47e-12 1.14e-07 0 0
37 | -I_g_on_n (A/um) 80 3.59e-08 9.47e-12 1.29e-07 0 0
38 | -I_g_on_n (A/um) 90 3.59e-08 9.47e-12 1.43e-07 0 0
39 | -I_g_on_n (A/um) 100 3.59e-08 9.47e-12 1.54e-07 0 0
40 |
41 |
42 | parameters (unit) hp lstp lop lp-dram comm-dram
43 | -C_ox (F/um^2) 3.77e-14 2.01e-14 2.82e-14 1.41e-14 7.98e-15
44 | -t_ox (um) 0.00065 0.0014 0.0009 0.0021 0.004
45 | -n2p_drv_rt (-) 2.41 2.23 2.28 2.05 1.95
46 | -lch_lk_rdc (-) 0.282008 0.480769 0.520833 1 1
47 | -Mobility_n (um^2/V.sec) 2.6668e+10 3.6396e+10 5.089e+10 4.263e+10 3.6858e+10
48 | -gmp_to_gmn_multiplier (-) 1.38 0.99 1.11 0.9 0.9
49 | -vpp (V) 0 0 0 1.5 2.7
50 |
51 | SRAM
52 | parameters cell_type hp lstp lop lp-dram comm-dram
53 | -Wmemcella (um) 0 1.31 1.31 1.31 1.31 1.31
54 | -Wmemcellpmos (um) 0 1.23 1.23 1.23 1.23 1.23
55 | -Wmemcellnmos (um) 0 2.08 2.08 2.08 2.08 2.08
56 | -area_cell (um^2) 0 146 146 146 146 146
57 | -asp_ratio_cell (-) 0 1.46 1.46 1.46 1.46 1.46
58 |
59 | CAM
60 | parameters cell_type hp lstp lop lp-dram comm-dram
61 | -Wmemcella (um) 1 1.31 1.31 1.31 1.31 1.31
62 | -Wmemcellpmos (um) 1 1.23 1.23 1.23 1.23 1.23
63 | -Wmemcellnmos (um) 1 2.08 2.08 2.08 2.08 2.08
64 | -area_cell (um^2) 1 292 292 292 292 292
65 | -asp_ratio_cell (-) 1 2.92 2.92 2.92 2.92 2.92
66 |
67 | DRAM
68 | parameters cell_type hp lstp lop lp-dram comm-dram
69 | -vdd_cell (V) 2 0 0 0 1.1 1.1
70 | -Wmemcella (um) 2 0 0 0 0.079 0.045
71 | -Wmemcellpmos (um) 2 0 0 0 0 0
72 | -Wmemcellnmos (um) 2 0 0 0 0 0
73 | -area_cell (um^2) 2 0 0 0 0.06162 0.01215
74 | -asp_ratio_cell (-) 2 0 0 0 1.46 1.5
75 |
76 | parameters hp lstp lop lp-dram comm-dram
77 | -dram_cell_I_on (A/um) 0 0 0 3.6e-05 2e-05
78 | -dram_cell_Vdd (V) 0 0 0 1.1 1.1
79 | -dram_cell_C (F) 0 0 0 2e-14 3e-14
80 | -dram_cell_I_off_worst_case_len_temp (A/um) 0 0 0 1.95e-11 1e-15
81 |
82 |
83 | -logic_scaling_co_eff (-) 0.49
84 | -core_tx_density (1/um^2) 1.25
85 | -sckt_co_eff (-) 1.1387
86 | -chip_layout_overhead (-) 1.2
87 | -macro_layout_overhead (-) 1.1
88 | -sense_delay (sec) 4e-11
89 | -sense_dy_power (J) 2.7e-15
90 |
91 | parameters 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3
92 | -wire_pitch (um) 2.5 4 8 0 2.5 4 8 2
93 | -barrier_thickness (um) 0 0 0 0 0.004 0.004 0.004 0
94 | -dishing_thickness (um) 0 0 0 0 0 0 0.0396 0
95 | -alpha_scatter (-) 1 1 1 0 1 1 1 0
96 | -aspect_ratio (-) 3 3 3 0 2 2 2.2 0
97 | -miller_value (-) 1.5 1.5 1.5 0 1.5 1.5 1.5 0
98 | -horiz_dielectric_constant (-) 1.958 1.958 1.958 0 2.46 2.46 2.46 0
99 | -vert_dielectric_constant (-) 3.9 3.9 3.9 0 3.9 3.9 3.9 0
100 | -ild_thickness (um) 0.315 0.315 0.63 0 0.315 0.315 0.55 0
101 | -fringe_cap (F/um) 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16
102 | -resistivity (u-ohm.m) 0.018 0.018 0.018 0.018 0.022 0.022 0.022 0.022
103 |
104 | parameters 0/0 0/1 0/2 1/0 1/1 1/2
105 | -wire_r_per_micron (ohm/um) 0 0 0 0 0 0 0 266.667
106 | -wire_c_per_micron (F/um) 0 0 0 0 0 0 0 1.6276e-15
107 | -tsv_pitch (um) 2.2 20 0 3.4 40 0
108 | -tsv_diameter (um) 1.1 3.1 0 1.7 5 0
109 | -tsv_length (um) 6 40 0 20 50 0
110 | -tsv_dielec_thickness (um) 0.1 0.2 0 0.1 0.5 0
111 | -tsv_contact_resistance (ohm) 0.1 0.2 0 0.1 0.2 0
112 | -tsv_depletion_width (um) 0.6 0.6 0 0.6 0.6 0
113 | -tsv_liner_dielectric_cons (-) 1.958 1.958 0 2.46 2.46 0
114 |
--------------------------------------------------------------------------------
/cacti/tech_params/65nm.dat:
--------------------------------------------------------------------------------
1 | parameters (unit) hp lstp lop lp-dram comm-dram
2 | -C_g_ideal (F/um) 4.69e-16 6.14e-16 6e-16 1.46e-15 4e-16
3 | -C_fringe (F/um) 7.7e-17 8e-17 8e-17 8e-17 8e-17
4 | -C_junc (F/um^2) 1e-15 1e-15 1e-15 1e-15 1e-15
5 | -C_junc_sw (F/um^2) 2.5e-16 2.5e-16 2.5e-16 2.5e-16 2.5e-16
6 | -l_phy (um) 0.025 0.045 0.032 0.12 0.065
7 | -l_elec (um) 0.019 0.0298 0.0216 0.0756 0.0426
8 | -nmos_effective_resistance_multiplier (-) 1.5 1.96 1.82 1.65 1.69
9 | -Vdd (V) 1.1 1.2 0.8 1.2 1.3
10 | -Vth (V) 0.19491 0.52354 0.28512 0.43806 1
11 | -Vdsat (V) 0.0771 0.128 0.292 0.43806 0.385
12 | -I_on_n (A/um) 0.0011972 0.0005192 0.0005731 0.0003998 0.001031
13 | -I_on_p (A/um) 0.0008708 0.000266 0.0003406 0.0002434 0.0005155
14 |
15 |
16 |
17 | parameters (unit) temp hp lstp lop lp-dram comm-dram
18 | -I_off_n (A/um) 0 1.96e-07 9.12e-12 4.9e-09 2.23e-11 1.8e-14
19 | -I_off_n (A/um) 10 2.29e-07 1.49e-11 6.49e-09 3.46e-11 3.64e-14
20 | -I_off_n (A/um) 20 2.66e-07 2.36e-11 8.45e-09 5.24e-11 7.03e-14
21 | -I_off_n (A/um) 30 3.05e-07 3.64e-11 1.08e-08 7.75e-11 1.31e-13
22 | -I_off_n (A/um) 40 3.49e-07 5.48e-11 1.37e-08 1.12e-10 2.35e-13
23 | -I_off_n (A/um) 50 3.95e-07 8.05e-11 1.71e-08 1.58e-10 4.09e-13
24 | -I_off_n (A/um) 60 4.45e-07 1.15e-10 2.09e-08 2.18e-10 6.89e-13
25 | -I_off_n (A/um) 70 4.97e-07 1.59e-10 2.48e-08 2.88e-10 1.13e-12
26 | -I_off_n (A/um) 80 5.48e-07 2.1e-10 2.84e-08 3.63e-10 1.78e-12
27 | -I_off_n (A/um) 90 5.94e-07 2.62e-10 3.13e-08 4.41e-10 2.71e-12
28 | -I_off_n (A/um) 100 6.3e-07 3.21e-10 3.42e-08 5.36e-10 3.99e-12
29 | -I_g_on_n (A/um) 0 4.09e-08 1.09e-10 9.61e-09 0 0
30 | -I_g_on_n (A/um) 10 4.09e-08 1.09e-10 9.61e-09 0 0
31 | -I_g_on_n (A/um) 20 4.09e-08 1.09e-10 9.61e-09 0 0
32 | -I_g_on_n (A/um) 30 4.09e-08 1.09e-10 9.61e-09 0 0
33 | -I_g_on_n (A/um) 40 4.09e-08 1.09e-10 9.61e-09 0 0
34 | -I_g_on_n (A/um) 50 4.09e-08 1.09e-10 9.61e-09 0 0
35 | -I_g_on_n (A/um) 60 4.09e-08 1.09e-10 9.61e-09 0 0
36 | -I_g_on_n (A/um) 70 4.09e-08 1.09e-10 9.61e-09 0 0
37 | -I_g_on_n (A/um) 80 4.09e-08 1.09e-10 9.61e-09 0 0
38 | -I_g_on_n (A/um) 90 4.09e-08 1.09e-10 9.61e-09 0 0
39 | -I_g_on_n (A/um) 100 4.09e-08 1.09e-10 9.61e-09 0 0
40 |
41 |
42 | parameters (unit) hp lstp lop lp-dram comm-dram
43 | -C_ox (F/um^2) 1.88e-14 1.36e-14 1.87e-14 1.22e-14 6.16e-15
44 | -t_ox (um) 0.0011 0.0019 0.0012 0.0022 0.005
45 | -n2p_drv_rt (-) 2.41 2.23 2.28 2.05 2.39
46 | -lch_lk_rdc (-) 0.26738 0.35461 0.487805 1 1
47 | -Mobility_n (um^2/V.sec) 4.3624e+10 3.4121e+10 4.9519e+10 3.2832e+10 3.0344e+10
48 | -gmp_to_gmn_multiplier (-) 1.38 0.99 1.11 0.9 0.9
49 | -vpp (V) 0 0 0 1.6 3.3
50 |
51 | SRAM
52 | parameters cell_type hp lstp lop lp-dram comm-dram
53 | -Wmemcella (um) 0 1.31 1.31 1.31 1.31 1.31
54 | -Wmemcellpmos (um) 0 1.23 1.23 1.23 1.23 1.23
55 | -Wmemcellnmos (um) 0 2.08 2.08 2.08 2.08 2.08
56 | -area_cell (um^2) 0 146 146 146 146 146
57 | -asp_ratio_cell (-) 0 1.46 1.46 1.46 1.46 1.46
58 |
59 | CAM
60 | parameters cell_type hp lstp lop lp-dram comm-dram
61 | -Wmemcella (um) 1 1.31 1.31 1.31 1.31 1.31
62 | -Wmemcellpmos (um) 1 1.23 1.23 1.23 1.23 1.23
63 | -Wmemcellnmos (um) 1 2.08 2.08 2.08 2.08 2.08
64 | -area_cell (um^2) 1 292 292 292 292 292
65 | -asp_ratio_cell (-) 1 2.92 2.92 2.92 2.92 2.92
66 |
67 | DRAM
68 | parameters cell_type hp lstp lop lp-dram comm-dram
69 | -vdd_cell (V) 2 0 0 0 0 1.2
70 | -Wmemcella (um) 2 0 0 0 0.09 0.065
71 | -Wmemcellpmos (um) 2 0 0 0 0 0
72 | -Wmemcellnmos (um) 2 0 0 0 0 0
73 | -area_cell (um^2) 2 0 0 0 0.11 0.02535
74 | -asp_ratio_cell (-) 2 0 0 0 1.46 1.5
75 |
76 | parameters hp lstp lop lp-dram comm-dram
77 | -dram_cell_I_on (A/um) 0 0 0 3.6e-05 2e-05
78 | -dram_cell_Vdd (V) 0 0 0 1.2 1.3
79 | -dram_cell_C (F) 0 0 0 2e-14 3e-14
80 | -dram_cell_I_off_worst_case_len_temp (A/um) 0 0 0 1.96e-11 1e-15
81 |
82 |
83 | -logic_scaling_co_eff (-) 0.7
84 | -core_tx_density (1/um^2) 0.875
85 | -sckt_co_eff (-) 1.1359
86 | -chip_layout_overhead (-) 1.2
87 | -macro_layout_overhead (-) 1.1
88 | -sense_delay (sec) 2e-10
89 | -sense_dy_power (J) 5.7e-15
90 |
91 | parameters 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3
92 | -wire_pitch (um) 2.5 4 8 0 2.5 4 8 2
93 | -barrier_thickness (um) 0 0 0 0 0.006 0.006 0.006 0
94 | -dishing_thickness (um) 0 0 0 0 0 0 0.0572 0
95 | -alpha_scatter (-) 1 1 1 0 1 1 1 0
96 | -aspect_ratio (-) 2.7 2.7 2.8 0 2 2 2.2 0
97 | -miller_value (-) 1.5 1.5 1.5 0 1.5 1.5 1.5 0
98 | -horiz_dielectric_constant (-) 2.303 2.303 2.303 0 2.734 2.734 2.734 0
99 | -vert_dielectric_constant (-) 3.9 3.9 3.9 0 3.9 3.9 3.9 0
100 | -ild_thickness (um) 0.405 0.405 0.81 0 0.405 0.405 0.77 0
101 | -fringe_cap (F/um) 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16
102 | -resistivity (u-ohm.m) 0.018 0.018 0.018 0.018 0.022 0.022 0.022 0.022
103 |
104 | parameters 0/0 0/1 0/2 1/0 1/1 1/2
105 | -wire_r_per_micron (ohm/um) 0 0 0 0 0 0 0 184.615
106 | -wire_c_per_micron (F/um) 0 0 0 0 0 0 0 1.57752e-15
107 | -tsv_pitch (um) 3.2 30 0 5 60 0
108 | -tsv_diameter (um) 1.6 4.6 0 2.5 7.5 0
109 | -tsv_length (um) 7 50 0 25 62.5 0
110 | -tsv_dielec_thickness (um) 0.1 0.2 0 0.1 0.5 0
111 | -tsv_contact_resistance (ohm) 0.1 0.2 0 0.1 0.2 0
112 | -tsv_depletion_width (um) 0.6 0.6 0 0.6 0.6 0
113 | -tsv_liner_dielectric_cons (-) 2.303 2.303 0 2.734 2.734 0
114 |
--------------------------------------------------------------------------------
/cacti/tech_params/90nm.dat:
--------------------------------------------------------------------------------
1 | parameters (unit) hp lstp lop lp-dram comm-dram
2 | -C_g_ideal (F/um) 6.64e-16 9.15e-16 8.45e-16 1.47e-15 5.08e-16
3 | -C_fringe (F/um) 8e-17 8e-17 8e-17 8e-17 8e-17
4 | -C_junc (F/um^2) 1e-15 1e-15 1e-15 1e-15 1e-15
5 | -C_junc_sw (F/um^2) 2.5e-16 2.5e-16 2.5e-16 2.5e-16 2.5e-16
6 | -l_phy (um) 0.037 0.075 0.053 0.12 0.09
7 | -l_elec (um) 0.0266 0.0486 0.0354 0.0756 0.0576
8 | -nmos_effective_resistance_multiplier (-) 1.54 1.92 1.77 1.65 1.62
9 | -Vdd (V) 1.2 1.3 0.9 1.2 1.6
10 | -Vth (V) 0.23707 0.48203 0.30764 0.4545 1
11 | -Vdsat (V) 0.128 0.373 0.113 0.3 0.32
12 | -I_on_n (A/um) 0.0010769 0.0005036 0.0003866 0.0003216 0.0010943
13 | -I_on_p (A/um) 0.0007126 0.0002351 0.0002097 0.0002033 0.00054715
14 |
15 |
16 |
17 | parameters (unit) temp hp lstp lop lp-dram comm-dram
18 | -I_off_n (A/um) 0 3.24e-08 2.81e-12 2.14e-09 1.42e-11 5.8e-15
19 | -I_off_n (A/um) 10 4.01e-08 4.76e-12 2.9e-09 2.25e-11 1.21e-14
20 | -I_off_n (A/um) 20 4.9e-08 7.82e-12 3.87e-09 3.46e-11 2.42e-14
21 | -I_off_n (A/um) 30 5.92e-08 1.25e-11 5.07e-09 5.18e-11 4.65e-14
22 | -I_off_n (A/um) 40 7.08e-08 1.94e-11 6.54e-09 7.58e-11 8.6e-14
23 | -I_off_n (A/um) 50 8.38e-08 2.94e-11 8.27e-08 1.08e-10 1.54e-13
24 | -I_off_n (A/um) 60 9.82e-08 4.36e-11 1.02e-07 1.51e-10 2.66e-13
25 | -I_off_n (A/um) 70 1.14e-07 6.32e-11 1.2e-07 2.02e-10 4.45e-13
26 | -I_off_n (A/um) 80 1.29e-07 8.95e-11 1.36e-08 2.57e-10 7.17e-13
27 | -I_off_n (A/um) 90 1.43e-07 1.25e-10 1.52e-08 3.14e-10 1.11e-12
28 | -I_off_n (A/um) 100 1.54e-07 1.7e-10 1.73e-08 3.85e-10 1.67e-12
29 | -I_g_on_n (A/um) 0 1.65e-08 3.87e-11 4.31e-08 0 0
30 | -I_g_on_n (A/um) 10 1.65e-08 3.87e-11 4.31e-08 0 0
31 | -I_g_on_n (A/um) 20 1.65e-08 3.87e-11 4.31e-08 0 0
32 | -I_g_on_n (A/um) 30 1.65e-08 3.87e-11 4.31e-08 0 0
33 | -I_g_on_n (A/um) 40 1.65e-08 3.87e-11 4.31e-08 0 0
34 | -I_g_on_n (A/um) 50 1.65e-08 3.87e-11 4.31e-08 0 0
35 | -I_g_on_n (A/um) 60 1.65e-08 3.87e-11 4.31e-08 0 0
36 | -I_g_on_n (A/um) 70 1.65e-08 3.87e-11 4.31e-08 0 0
37 | -I_g_on_n (A/um) 80 1.65e-08 3.87e-11 4.31e-08 0 0
38 | -I_g_on_n (A/um) 90 1.65e-08 3.87e-11 4.31e-08 0 0
39 | -I_g_on_n (A/um) 100 1.65e-08 3.87e-11 4.31e-08 0 0
40 |
41 |
42 | parameters (unit) hp lstp lop lp-dram comm-dram
43 | -C_ox (F/um^2) 1.79e-14 1.22e-14 1.59e-14 1.22e-14 5.65e-15
44 | -t_ox (um) 0.0012 0.0022 0.0015 0.0022 0.0055
45 | -n2p_drv_rt (-) 2.45 2.44 2.54 1.95 2.05
46 | -lch_lk_rdc (-) 1 1 1 1 1
47 | -Mobility_n (um^2/V.sec) 3.4216e+10 3.5676e+10 4.6039e+10 3.2395e+10 3.022e+10
48 | -gmp_to_gmn_multiplier (-) 1.22 0.88 0.98 0.9 0.9
49 | -vpp (V) 0 0 0 1.6 3.7
50 |
51 | SRAM
52 | parameters cell_type hp lstp lop lp-dram comm-dram
53 | -Wmemcella (um) 0 1.31 1.31 1.31 1.31 1.31
54 | -Wmemcellpmos (um) 0 1.23 1.23 1.23 1.23 1.23
55 | -Wmemcellnmos (um) 0 2.08 2.08 2.08 2.08 2.08
56 | -area_cell (um^2) 0 146 146 146 146 146
57 | -asp_ratio_cell (-) 0 1.46 1.46 1.46 1.46 1.46
58 |
59 | CAM
60 | parameters cell_type hp lstp lop lp-dram comm-dram
61 | -Wmemcella (um) 1 1.31 1.31 1.31 1.31 1.31
62 | -Wmemcellpmos (um) 1 1.23 1.23 1.23 1.23 1.23
63 | -Wmemcellnmos (um) 1 2.08 2.08 2.08 2.08 2.08
64 | -area_cell (um^2) 1 292 292 292 292 292
65 | -asp_ratio_cell (-) 1 2.92 2.92 2.92 2.92 2.92
66 |
67 | DRAM
68 | parameters cell_type hp lstp lop lp-dram comm-dram
69 | -vdd_cell (V) 2 0 0 0 1.2 1.6
70 | -Wmemcella (um) 2 0 0 0 0.14 0.09
71 | -Wmemcellpmos (um) 2 0 0 0 0 0
72 | -Wmemcellnmos (um) 2 0 0 0 0 0
73 | -area_cell (um^2) 2 0 0 0 0.168 0.0486
74 | -asp_ratio_cell (-) 2 0 0 0 1.46 1.5
75 |
76 | parameters hp lstp lop lp-dram comm-dram
77 | -dram_cell_I_on (A/um) 0 0 0 4.5e-05 2e-05
78 | -dram_cell_Vdd (V) 0 0 0 1.2 1.6
79 | -dram_cell_C (F) 0 0 0 2e-14 3e-14
80 | -dram_cell_I_off_worst_case_len_temp (A/um) 0 0 0 2.11e-11 1e-15
81 |
82 |
83 | -logic_scaling_co_eff (-) 1
84 | -core_tx_density (1/um^2) 0.6125
85 | -sckt_co_eff (-) 1.1539
86 | -chip_layout_overhead (-) 1.2
87 | -macro_layout_overhead (-) 1.1
88 | -sense_delay (sec) 2.8e-10
89 | -sense_dy_power (J) 1.47e-14
90 |
91 | parameters 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3
92 | -wire_pitch (um) 2.5 4 8 0 2.5 4 8 2
93 | -barrier_thickness (um) 0.01 0.01 0.01 0 0.008 0.008 0.008 0
94 | -dishing_thickness (um) 0 0 0 0 0 0 0.0792 0
95 | -alpha_scatter (-) 1 1 1 0 1 1 1 0
96 | -aspect_ratio (-) 2.4 2.4 2.7 0 2 2 2.2 0
97 | -miller_value (-) 1.5 1.5 1.5 0 1.5 1.5 1.5 0
98 | -horiz_dielectric_constant (-) 2.709 2.709 2.709 0 3.038 3.038 3.038 0
99 | -vert_dielectric_constant (-) 3.9 3.9 3.9 0 3.9 3.9 3.9 0
100 | -ild_thickness (um) 0.48 0.48 0.96 0 0.48 0.48 1.1 0
101 | -fringe_cap (F/um) 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16
102 | -resistivity (u-ohm.m) 0.022 0.022 0.022 0.022 0.022 0.022 0.022 0.022
103 |
104 | parameters 0/0 0/1 0/2 1/0 1/1 1/2
105 | -wire_r_per_micron (ohm/um) 0 0 0 0 0 0 0 133.333
106 | -wire_c_per_micron (F/um) 0 0 0 0 0 0 0 1.30208e-15
107 | -tsv_pitch (um) 4 45 0 6.9 90 0
108 | -tsv_diameter (um) 2 6.9 0 3.5 11.3 0
109 | -tsv_length (um) 8 60 0 30 75 0
110 | -tsv_dielec_thickness (um) 0.1 0.2 0 0.1 0.5 0
111 | -tsv_contact_resistance (ohm) 0.1 0.2 0 0.1 0.2 0
112 | -tsv_depletion_width (um) 0.6 0.6 0 0.6 0.6 0
113 | -tsv_liner_dielectric_cons (-) 2.709 2.709 0 3.038 3.038 0
114 |
--------------------------------------------------------------------------------
/cacti/uca.h:
--------------------------------------------------------------------------------
1 | /*****************************************************************************
2 | * CACTI 7.0
3 | * SOFTWARE LICENSE AGREEMENT
4 | * Copyright 2015 Hewlett-Packard Development Company, L.P.
5 | * All Rights Reserved
6 | *
7 | * Redistribution and use in source and binary forms, with or without
8 | * modification, are permitted provided that the following conditions are
9 | * met: redistributions of source code must retain the above copyright
10 | * notice, this list of conditions and the following disclaimer;
11 | * redistributions in binary form must reproduce the above copyright
12 | * notice, this list of conditions and the following disclaimer in the
13 | * documentation and/or other materials provided with the distribution;
14 | * neither the name of the copyright holders nor the names of its
15 | * contributors may be used to endorse or promote products derived from
16 | * this software without specific prior written permission.
17 |
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
29 | *
30 | ***************************************************************************/
31 |
32 |
33 |
34 | #ifndef __UCA_H__
35 | #define __UCA_H__
36 |
37 | #include "area.h"
38 | #include "bank.h"
39 | #include "component.h"
40 | #include "parameter.h"
41 | #include "htree2.h"
42 | #include "memorybus.h"
43 | #include "basic_circuit.h"
44 | #include "cacti_interface.h"
45 |
46 |
47 |
48 | class UCA : public Component
49 | {
50 | public:
51 | UCA(const DynamicParameter & dyn_p);
52 | ~UCA();
53 | double compute_delays(double inrisetime); // returns outrisetime
54 | void compute_power_energy();
55 |
56 | DynamicParameter dp;
57 | Bank bank;
58 |
59 | Htree2 * htree_in_add;
60 | Htree2 * htree_in_data;
61 | Htree2 * htree_out_data;
62 | Htree2 * htree_in_search;
63 | Htree2 * htree_out_search;
64 |
65 | Memorybus * membus_RAS;
66 | Memorybus * membus_CAS;
67 | Memorybus * membus_data;
68 |
69 | powerDef power_routing_to_bank;
70 |
71 | uint32_t nbanks;
72 |
73 | int num_addr_b_bank;
74 | int num_di_b_bank;
75 | int num_do_b_bank;
76 | int num_si_b_bank;
77 | int num_so_b_bank;
78 | int RWP, ERP, EWP,SCHP;
79 | double area_all_dataramcells;
80 | double total_area_per_die;
81 |
82 | double dyn_read_energy_from_closed_page;
83 | double dyn_read_energy_from_open_page;
84 | double dyn_read_energy_remaining_words_in_burst;
85 |
86 | double refresh_power; // only for DRAM
87 | double activate_energy;
88 | double read_energy;
89 | double write_energy;
90 | double precharge_energy;
91 | double leak_power_subbank_closed_page;
92 | double leak_power_subbank_open_page;
93 | double leak_power_request_and_reply_networks;
94 |
95 | double delay_array_to_sa_mux_lev_1_decoder;
96 | double delay_array_to_sa_mux_lev_2_decoder;
97 | double delay_before_subarray_output_driver;
98 | double delay_from_subarray_out_drv_to_out;
99 | double access_time;
100 | double precharge_delay;
101 | double multisubbank_interleave_cycle_time;
102 |
103 | double t_RAS, t_CAS, t_RCD, t_RC, t_RP, t_RRD;
104 | double activate_power, read_power, write_power;
105 |
106 | double delay_TSV_tot, area_TSV_tot, dyn_pow_TSV_tot, dyn_pow_TSV_per_access;
107 | unsigned int num_TSV_tot;
108 | unsigned int comm_bits, row_add_bits, col_add_bits, data_bits;
109 | double area_lwl_drv, area_row_predec_dec, area_col_predec_dec,
110 | area_subarray, area_bus, area_address_bus, area_data_bus, area_data_drv, area_IOSA, area_sense_amp,
111 | area_per_bank;
112 |
113 | };
114 |
115 | #endif
116 |
117 |
--------------------------------------------------------------------------------
/cacti/version_cacti.h:
--------------------------------------------------------------------------------
1 | /*****************************************************************************
2 | * McPAT
3 | * SOFTWARE LICENSE AGREEMENT
4 | * Copyright 2015 Hewlett-Packard Development Company, L.P.
5 | * All Rights Reserved
6 | *
7 | * Redistribution and use in source and binary forms, with or without
8 | * modification, are permitted provided that the following conditions are
9 | * met: redistributions of source code must retain the above copyright
10 | * notice, this list of conditions and the following disclaimer;
11 | * redistributions in binary form must reproduce the above copyright
12 | * notice, this list of conditions and the following disclaimer in the
13 | * documentation and/or other materials provided with the distribution;
14 | * neither the name of the copyright holders nor the names of its
15 | * contributors may be used to endorse or promote products derived from
16 | * this software without specific prior written permission.
17 |
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
29 | *
30 | ***************************************************************************/
31 |
32 | #ifndef VERSION_H_
33 | #define VERSION_H_
34 |
35 | #define VER_MAJOR_CACTI 7 /* 3dd */
36 | #define VER_MINOR_CACTI 0
37 | #define VER_COMMENT_CACTI "3DD Prerelease"
38 | #define VER_UPDATE_CACTI "Aug, 2012"
39 |
40 | #endif /* VERSION_H_ */
41 |
--------------------------------------------------------------------------------
/cacti/wire.h:
--------------------------------------------------------------------------------
1 | /*****************************************************************************
2 | * CACTI 7.0
3 | * SOFTWARE LICENSE AGREEMENT
4 | * Copyright 2015 Hewlett-Packard Development Company, L.P.
5 | * All Rights Reserved
6 | *
7 | * Redistribution and use in source and binary forms, with or without
8 | * modification, are permitted provided that the following conditions are
9 | * met: redistributions of source code must retain the above copyright
10 | * notice, this list of conditions and the following disclaimer;
11 | * redistributions in binary form must reproduce the above copyright
12 | * notice, this list of conditions and the following disclaimer in the
13 | * documentation and/or other materials provided with the distribution;
14 | * neither the name of the copyright holders nor the names of its
15 | * contributors may be used to endorse or promote products derived from
16 | * this software without specific prior written permission.
17 |
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
29 | *
30 | ***************************************************************************/
31 |
32 |
33 |
34 | #ifndef __WIRE_H__
35 | #define __WIRE_H__
36 |
37 | #include "basic_circuit.h"
38 | #include "component.h"
39 | #include "parameter.h"
40 | #include "assert.h"
41 | #include "cacti_interface.h"
42 | #include
43 | #include
44 |
45 | class Wire : public Component
46 | {
47 | public:
48 | Wire(enum Wire_type wire_model, double len /* in u*/,
49 | int nsense = 1/* no. of sense amps connected to the low-swing wire */,
50 | double width_scaling = 1,
51 | double spacing_scaling = 1,
52 | enum Wire_placement wire_placement = outside_mat,
53 | double resistivity = CU_RESISTIVITY,
54 | /*TechnologyParameter::*/DeviceType *dt = &(g_tp.peri_global));
55 | ~Wire();
56 |
57 | Wire( double width_scaling = 1,
58 | double spacing_scaling = 1,
59 | enum Wire_placement wire_placement = outside_mat,
60 | double resistivity = CU_RESISTIVITY,
61 | /*TechnologyParameter::*/DeviceType *dt = &(g_tp.peri_global)
62 | ); // should be used only once for initializing static members
63 | void init_wire();
64 |
65 | void calculate_wire_stats();
66 | void delay_optimal_wire();
67 | double wire_cap(double len, bool call_from_outside=false);
68 | double wire_res(double len);
69 | void low_swing_model();
70 | double signal_fall_time();
71 | double signal_rise_time();
72 | double sense_amp_input_cap();
73 |
74 | enum Wire_type wt;
75 | double wire_spacing;
76 | double wire_width;
77 | enum Wire_placement wire_placement;
78 | double repeater_size;
79 | double repeater_spacing;
80 | double wire_length;
81 | double in_rise_time, out_rise_time;
82 |
83 | void set_in_rise_time(double rt)
84 | {
85 | in_rise_time = rt;
86 | }
87 | static Component global;
88 | static Component global_5;
89 | static Component global_10;
90 | static Component global_20;
91 | static Component global_30;
92 | static Component low_swing;
93 | static double wire_width_init;
94 | static double wire_spacing_init;
95 | void print_wire();
96 |
97 | private:
98 |
99 | int nsense; // no. of sense amps connected to a low-swing wire if it
100 | // is broadcasting data to multiple destinations
101 | // width and spacing scaling factor can be used
102 | // to model low level wires or special
103 | // fat wires
104 | double w_scale, s_scale;
105 | double resistivity;
106 | powerDef wire_model (double space, double size, double *delay);
107 | list repeated_wire;
108 | void update_fullswing();
109 | static int initialized;
110 |
111 |
112 | //low-swing
113 | Component transmitter;
114 | Component l_wire;
115 | Component sense_amp;
116 |
117 | double min_w_pmos;
118 |
119 | /*TechnologyParameter::*/DeviceType *deviceType;
120 |
121 | };
122 |
123 | #endif
124 |
--------------------------------------------------------------------------------
/config/sata_config.yaml:
--------------------------------------------------------------------------------
1 | # Define your yaml config for a single PE here.
2 | arch:
3 | n_pe: 128
4 |
5 | pe:
6 | mul: 0
7 | acc: 1
8 | add: 1
9 | and: 1
10 | comp: 1
11 | mux: 2
12 | reg: 1
13 |
14 |
15 | # Define the operations in forward stage
16 | fwd:
17 |
18 | # Define the number of operations in lif operation
19 | # reg: 1 for writing residual u, 1 for accumulate u with mac results, 1 for reset acc reg, total 3
20 | lif:
21 | mul: 0
22 | acc: 0
23 | add: 1
24 | and: 0
25 | comp: 1
26 | mux: 2
27 | reg: 3
28 | sft: 1
29 |
30 | # Define the number of operations in mac operation
31 | mac:
32 | mul: 0
33 | acc: 1
34 | add: 0
35 | and: 1
36 | comp: 0
37 | mux: 0
38 | reg: 0
39 | sft: 0
40 |
41 |
42 | # Define the number of operations in backward stage
43 | bwd:
44 |
45 | # Define the number of operations in pgu
46 | #
47 | pgu:
48 | mul: 1
49 | acc: 0
50 | add: 2
51 | and: 0
52 | comp: 0
53 | mux: 1
54 | reg: 0
55 | sft: 2
56 |
57 | # Define the number of operations in on mac operation
58 | mac:
59 | mul: 1
60 | acc: 1
61 | add: 0
62 | and: 0
63 | comp: 0
64 | mux: 0
65 | reg: 0
66 | sft: 0
67 |
68 |
69 | # Define the operations in weight update stage
70 | wup:
71 |
72 | # Define the number of operations in on mac operation
73 | mac:
74 | mul: 0
75 | acc: 1
76 | add: 0
77 | and: 1
78 | comp: 0
79 | mux: 0
80 | reg: 0
81 | sft: 0
--------------------------------------------------------------------------------
/config/vgg5_cifar10.yaml:
--------------------------------------------------------------------------------
1 | # Network config for 1D network fcn
2 |
3 | conv1:
4 | type: 2dconv
5 | H_h: 32
6 | H_w: 32
7 | C: 3
8 | R_h: 3
9 | R_w: 3
10 | K: 64
11 | E_h: 32
12 | E_w: 32
13 |
14 | conv2:
15 | type: 2dconv
16 | H_h: 16
17 | H_w: 16
18 | C: 64
19 | R_h: 3
20 | R_w: 3
21 | K: 128
22 | E_h: 16
23 | E_w: 16
24 |
25 | conv3:
26 | type: 2dconv
27 | H_h: 16
28 | H_w: 16
29 | C: 128
30 | R_h: 3
31 | R_w: 3
32 | K: 128
33 | E_h: 16
34 | E_w: 16
35 |
36 | lin4:
37 | type: linear
38 | in: 8192
39 | out: 1024
40 |
41 | lin5:
42 | type: out_linear
43 | in: 1024
44 | out: 10
--------------------------------------------------------------------------------
/har_configs/dcl_har.yaml:
--------------------------------------------------------------------------------
1 | # Network config for 1D network fcn
2 |
3 | 2dconv1:
4 | w_kt: 5
5 | w_ks: 1
6 | w_cin: 1
7 | w_cout: 64
8 | out_s: 9
9 | out_t: 124
10 |
11 | 2dconv2:
12 | w_kt: 5
13 | w_ks: 1
14 | w_cin: 64
15 | w_cout: 64
16 | out_s: 9
17 | out_t: 120
18 |
19 | 2dconv3:
20 | w_kt: 5
21 | w_ks: 1
22 | w_cin: 64
23 | w_cout: 64
24 | out_s: 9
25 | out_t: 116
26 |
27 | 2dconv4:
28 | w_kt: 5
29 | w_ks: 1
30 | w_cin: 64
31 | w_cout: 64
32 | out_s: 9
33 | out_t: 112
--------------------------------------------------------------------------------
/har_configs/dcl_shar.yaml:
--------------------------------------------------------------------------------
1 | # Network config for 1D network fcn
2 |
3 | 2dconv1:
4 | w_kt: 5
5 | w_ks: 1
6 | w_cin: 1
7 | w_cout: 64
8 | out_s: 3
9 | out_t: 147
10 |
11 | 2dconv2:
12 | w_kt: 5
13 | w_ks: 1
14 | w_cin: 64
15 | w_cout: 64
16 | out_s: 3
17 | out_t: 143
18 |
19 | 2dconv3:
20 | w_kt: 5
21 | w_ks: 1
22 | w_cin: 64
23 | w_cout: 64
24 | out_s: 3
25 | out_t: 139
26 |
27 | 2dconv4:
28 | w_kt: 5
29 | w_ks: 1
30 | w_cin: 64
31 | w_cout: 64
32 | out_s: 3
33 | out_t: 135
--------------------------------------------------------------------------------
/har_configs/fcn_har.yaml:
--------------------------------------------------------------------------------
1 | # Network config for 1D network fcn
2 |
3 | conv1:
4 | w_k: 8
5 | w_cin: 9
6 | w_cout: 32
7 | in: 128
8 | out: 129
9 |
10 | conv2:
11 | w_k: 8
12 | w_cin: 32
13 | w_cout: 64
14 | in: 65
15 | out: 66
16 |
17 | conv3:
18 | w_k: 8
19 | w_cin: 64
20 | w_cout: 128
21 | in: 34
22 | out: 35
23 |
24 | fc1:
25 | in: 2304
26 | out: 6
27 |
--------------------------------------------------------------------------------
/har_configs/fcn_shar.yaml:
--------------------------------------------------------------------------------
1 | # Network config for 1D network fcn
2 |
3 | conv1:
4 | w_k: 8
5 | w_cin: 3
6 | w_cout: 32
7 | in: 151
8 | out: 152
9 |
10 | conv2:
11 | w_k: 8
12 | w_cin: 32
13 | w_cout: 64
14 | in: 77
15 | out: 78
16 |
17 | conv3:
18 | w_k: 8
19 | w_cin: 64
20 | w_cout: 128
21 | in: 40
22 | out: 41
23 |
24 | fc1:
25 | in: 2688
26 | out: 17
27 |
--------------------------------------------------------------------------------
/har_configs/sata_ann_watch_config.yaml:
--------------------------------------------------------------------------------
1 | # Define your yaml config for a single PE here.
2 | arch:
3 | n_pe: 128
4 |
5 | pe:
6 | mul: 0
7 | acc: 1
8 | add: 1
9 | and: 1
10 | comp: 1
11 | mux: 2
12 | reg: 1
13 |
14 |
15 | # Define the number of operations in on lif operation
16 | # reg: 1 for writing residual u, 1 for accumulate u with mac results, 1 for reset acc reg, total 3
17 | lif:
18 | mul: 0
19 | acc: 0
20 | add: 0
21 | and: 0
22 | comp: 0
23 | mux: 0
24 | reg: 0
25 |
26 | # Define the number of operations in on relu operation
27 | # reg: 1 for reseting the acc
28 | relu:
29 | mul: 0
30 | acc: 0
31 | add: 0
32 | and: 0
33 | comp: 1
34 | mux: 1
35 | reg: 1
36 |
37 |
38 | # Define the number of operations in on mac operation
39 | mac:
40 | mul: 1
41 | acc: 1
42 | add: 0
43 | and: 0
44 | comp: 0
45 | mux: 0
46 | reg: 0
--------------------------------------------------------------------------------
/har_configs/sata_watch_config.yaml:
--------------------------------------------------------------------------------
1 | # Define your yaml config for a single PE here.
2 | arch:
3 | n_pe: 128
4 |
5 | pe:
6 | mul: 0
7 | acc: 1
8 | add: 1
9 | and: 1
10 | comp: 1
11 | mux: 2
12 | reg: 1
13 |
14 | # Define the number of operations in on lif operation
15 | # reg: 1 for writing residual u, 1 for accumulate u with mac results, 1 for reset acc reg, total 3
16 | lif:
17 | mul: 0
18 | acc: 0
19 | add: 1
20 | and: 0
21 | comp: 1
22 | mux: 2
23 | reg: 3
24 |
25 | # Define the number of operations in on mac operation
26 | mac:
27 | mul: 0
28 | acc: 1
29 | add: 0
30 | and: 1
31 | comp: 0
32 | mux: 0
33 | reg: 0
--------------------------------------------------------------------------------
/inference-energy-cal/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/inference-energy-cal/.DS_Store
--------------------------------------------------------------------------------
/inference-energy-cal/__pycache__/hw_kernels.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/inference-energy-cal/__pycache__/hw_kernels.cpython-39.pyc
--------------------------------------------------------------------------------
/inference-energy-cal/comp-utils.py:
--------------------------------------------------------------------------------
1 | import yaml
2 | import hw_kernels # Assuming hw-kernels.py is in the same directory
3 |
4 | # Function Definitions
5 |
6 | def extract_act_dict_from_yaml(filename):
7 | with open(filename, 'r') as file:
8 | data = yaml.safe_load(file)
9 |
10 | act_dict = {}
11 |
12 | def recursive_search(subtrees):
13 | for subtree in subtrees:
14 | if subtree.get('class') == 'pe-compute':
15 | locals_ = subtree.get('local', [])
16 | for local in locals_:
17 | act_tag = local.get('act-tag')
18 | attributes = local.get('attributes', {})
19 | kernel_name = attributes.get('kernel')
20 | count = attributes.get('count')
21 | gated = attributes.get('gated')
22 | width = attributes.get('width')
23 |
24 | # Create the kernel object using the class from hw-kernels.py
25 | KernelClass = getattr(hw_kernels, kernel_name, None)
26 | if KernelClass:
27 | kernel_obj = KernelClass(width)
28 | else:
29 | print(f"Warning: No matching class found for kernel: {kernel_name}")
30 | kernel_obj = kernel_name # Use the name as a fallback
31 |
32 | if act_tag in act_dict:
33 | act_dict[act_tag].append((kernel_obj, count, gated))
34 | else:
35 | act_dict[act_tag] = [(kernel_obj, count, gated)]
36 | elif subtree.get('class') == 'pe-mem':
37 | locals_ = subtree.get('local', [])
38 | for local in locals_:
39 | act_tag = local.get('act-tag')
40 | attributes = local.get('attributes', {})
41 | kernel_name = attributes.get('kernel')
42 | count = attributes.get('count')
43 | gated = attributes.get('gated')
44 | width = attributes.get('width')
45 | size = attributes.get('size-bytes')
46 |
47 | # Create the kernel object using the class from hw-kernels.py
48 | KernelClass = getattr(hw_kernels, kernel_name, None)
49 | if KernelClass:
50 | kernel_obj = KernelClass(size)
51 | # print(kernel_obj.get_dpower())
52 | else:
53 | print(f"Warning: No matching register-files found for kernel: {kernel_name}")
54 | kernel_obj = kernel_name # Use the name as a fallback
55 | if act_tag in act_dict:
56 | act_dict[act_tag].append((kernel_obj, count, gated))
57 | else:
58 | act_dict[act_tag] = [(kernel_obj, count, gated)]
59 | # print(act_dict)
60 | else:
61 | recursive_search(subtree.get('subtree', []))
62 |
63 | recursive_search(data.get('architecture', {}).get('subtree', []))
64 | return act_dict
65 |
66 | def aggregate_act_data(act_dict):
67 | aggregated_data = {}
68 |
69 | for act_tag, kernel_list in act_dict.items():
70 | area_total = 0
71 | lpower_total = 0
72 | dpower = {'n': 0, 'y': 0}
73 |
74 | for kernel_obj, count, gated in kernel_list:
75 | # print(gated)
76 | area_total += kernel_obj.get_area() * count
77 | lpower_total += kernel_obj.get_lpower() * count
78 | dpower[gated] += kernel_obj.get_dpower() * count
79 | # if 'spad' in act_tag:
80 |
81 |
82 | # Restricting to 4 decimal places
83 | area_total = round(area_total, 6)
84 | lpower_total = round(lpower_total, 6)
85 | dpower['n'] = round(dpower['n'], 6)
86 | dpower['y'] = round(dpower['y'], 6)
87 |
88 | aggregated_data[act_tag] = {
89 | 'area': area_total,
90 | 'lpower': lpower_total,
91 | 'dpower': dpower
92 | }
93 |
94 | return aggregated_data
95 |
96 | # Main Execution
97 |
98 | if __name__ == "__main__":
99 | filename = 'sata-config.yaml'
100 | act_dict = extract_act_dict_from_yaml(filename)
101 | # print(act_dict)
102 | aggregated_act_data = aggregate_act_data(act_dict)
103 | print(aggregated_act_data)
104 |
105 | output_filename = 'results/comp-stat.yaml'
106 | with open(output_filename, 'w') as outfile:
107 | yaml.dump(aggregated_act_data, outfile, default_flow_style=False)
108 | print(f"Computation components written to {output_filename}")
--------------------------------------------------------------------------------
/inference-energy-cal/cycle-utils.py:
--------------------------------------------------------------------------------
1 | import yaml
2 | import os
3 | import shutil
4 | import subprocess
5 | import csv
6 |
7 | # Function Definitions
8 |
9 | def extract_scalesim_dict_from_yaml(filename):
10 | with open(filename, 'r') as file:
11 | data = yaml.safe_load(file)
12 |
13 | scalesim_dict = {}
14 |
15 | def recursive_search(subtrees):
16 | for subtree in subtrees:
17 | if subtree.get('class') == 'glbs':
18 | locals_ = subtree.get('local', [])
19 | # print(locals_)
20 | for local in locals_:
21 | act_tag = local.get('act-tag')
22 | attributes = local.get('attributes', {})
23 | size = int(attributes.get('size-bytes')/1024)
24 |
25 | #! Adding support for multiple srams for one operand (e.g., 2 sram for ifmap)
26 | if "weight" in act_tag:
27 | scalesim_dict["FilterSramSzkB"] = size
28 | elif "ifmap" in act_tag:
29 | scalesim_dict["IfmapSramSzkB"] = size
30 | elif "ofmap" in act_tag:
31 | scalesim_dict["OfmapSramSzkB"] = size
32 | else:
33 | print(f"Warning: No matching sram class found for scalesim: {act_tag}")
34 | elif subtree.get('class') == 'pe-array':
35 | attributes = subtree.get('attributes', {})
36 | scalesim_dict["ArrayWidth"] = attributes.get("width")
37 | scalesim_dict["ArrayHeight"] = attributes.get("height")
38 | else:
39 | recursive_search(subtree.get('subtree', []))
40 |
41 | recursive_search(data.get('architecture', {}).get('subtree', []))
42 | if not "OfmapSramSzkB" in scalesim_dict.keys():
43 | scalesim_dict["OfmapSramSzkB"] = scalesim_dict["IfmapSramSzkB"]
44 |
45 | arch = data.get('architecture')
46 | dataflow = arch.get('dataflow')
47 | name = arch.get('name')
48 | scalesim_dict['run_name'] = name
49 | scalesim_dict['dataflow'] = dataflow
50 |
51 | return scalesim_dict
52 |
53 | def update_config(yaml_filename, default_config_path, output_config_path):
54 | # Load the YAML file
55 | with open(yaml_filename, 'r') as yaml_file:
56 | yaml_data = yaml.safe_load(yaml_file)
57 |
58 | # Define a mapping from the placeholders to the YAML keys
59 | mapping = {
60 | "x0": "run_name",
61 | "x1": "ArrayHeight",
62 | "x2": "ArrayWidth",
63 | "x3": "IfmapSramSzkB",
64 | "x4": "FilterSramSzkB",
65 | "x5": "OfmapSramSzkB",
66 | "x6": "dataflow"
67 | }
68 |
69 | # Copy the original default.cfg to a new file
70 | shutil.copy(default_config_path, output_config_path)
71 |
72 | # Load the copied file
73 | with open(output_config_path, 'r') as config_file:
74 | config_content = config_file.read()
75 |
76 | # Replace the placeholders with the values from the YAML file
77 | for placeholder, key in mapping.items():
78 | value = yaml_data.get(key, placeholder) # Use the placeholder as default if key not found
79 |
80 | config_content = config_content.replace(placeholder, str(value))
81 |
82 | # Write the updated content back to the copied file
83 | with open(output_config_path, 'w') as config_file:
84 | config_file.write(config_content)
85 |
86 | print(f"{output_config_path} updated successfully!")
87 |
88 |
89 | def generate_temp_workload():
90 | with open('./workload.yaml', 'r') as file:
91 | data = yaml.safe_load(file)
92 |
93 | # Convert YAML data to CSV rows
94 | csv_rows = []
95 | for layer in data.get('Layers', []):
96 | attributes = layer.get('attributes', {})
97 | row = [
98 | layer.get('name', 'N/A'),
99 | attributes.get('IFMAP Height', 'N/A'),
100 | attributes.get('IFMAP Width', 'N/A'),
101 | attributes.get('Filter Height', 'N/A'),
102 | attributes.get('Filter Width', 'N/A'),
103 | attributes.get('Channels', 'N/A'),
104 | attributes.get('Num Filter', 'N/A'),
105 | attributes.get('Strides', 'N/A'),
106 | ''
107 | ]
108 | csv_rows.append(row)
109 |
110 | # Write the CSV data to a file
111 | csv_file_path = '../scale-sim-v2/temp_workload.csv'
112 | with open(csv_file_path, 'w', newline='') as csv_file:
113 | csv_writer = csv.writer(csv_file)
114 | csv_writer.writerow(['Layer name', 'IFMAP Height', 'IFMAP Width', 'Filter Height', 'Filter Width', 'Channels', 'Num Filter', 'Strides', ''])
115 | csv_writer.writerows(csv_rows)
116 |
117 | print("CSV file written successfully.")
118 |
119 | # Main Execution
120 |
121 | if __name__ == "__main__":
122 | filename = 'sata-config.yaml'
123 | scalesim_dict = extract_scalesim_dict_from_yaml(filename)
124 | # print(scalesim_dict)
125 | scalesim_path = '../scale-sim-v2'
126 |
127 | output_filename = 'cycle-stat-temp.yaml'
128 | with open(output_filename, 'w') as outfile:
129 | yaml.dump(scalesim_dict, outfile, default_flow_style=False)
130 | print(f"Cycle stats written to {output_filename}")
131 |
132 |
133 | default_config_path = os.path.join('..', 'scale-sim-v2', 'configs', 'default.cfg')
134 | output_config_path = os.path.join('..', 'scale-sim-v2', 'configs', 'running.cfg')
135 |
136 | # Update the copied file with values from the YAML file
137 | update_config(output_filename, default_config_path, output_config_path)
138 | os.remove(output_filename)
139 | generate_temp_workload()
140 | os.chdir(scalesim_path)
141 | subprocess.run('python3 run.py', shell=True)
142 |
--------------------------------------------------------------------------------
/inference-energy-cal/related-work-estimate.py:
--------------------------------------------------------------------------------
1 | import hw_kernels
2 |
3 | adder = hw_kernels.adder(8)
4 | mul = hw_kernels.multiplier(8)
5 | reg = hw_kernels.register(8)
6 | mac_8_bit = adder.get_dpower() + mul.get_dpower() + reg.get_dpower()
7 | acc_8_bit = adder.get_dpower() + reg.get_dpower()
8 | scale = 1/250
9 |
10 | ######################### TDBN ###################################
11 |
12 | # The work provide the estimate adds and muls numbers
13 |
14 | add_n = 1.8e9
15 | mul_n = 3.4e7
16 |
17 | # Assuming 8 bits, 300MHz
18 | adder = hw_kernels.adder(8)
19 | mul = hw_kernels.multiplier(8)
20 |
21 | tdbn_total_estimated_energy = (add_n*adder.get_dpower() + mul_n*mul.get_dpower()) * scale
22 |
23 | print('TDBN estimated energy in (/8-bit int MAC): ', round(tdbn_total_estimated_energy,2))
24 |
25 |
26 | ######################### TSSL ###################################
27 |
28 | Conv = [128, 256,'m',512,'m',1024,'m',512]
29 | Linear = [1024,512]
30 | T = 5
31 | spa = 0.901
32 |
33 | kernel_size = 3*3
34 | input_channel = 3
35 | img_size = 32
36 |
37 | n_acc = 0
38 | for c in Conv:
39 | if type(c) is int:
40 | n_acc += input_channel*kernel_size*c*img_size*img_size
41 | input_channel = c
42 | else:
43 | img_size = img_size/2
44 | n_acc = n_acc + (512*img_size*img_size*1024) + 1024*512
45 | tssl_total_estimated_energy = (n_acc * acc_8_bit * T * (1-spa)) * scale
46 |
47 | print('TSSL estimated energy in (/8-bit int MAC): ', round(tssl_total_estimated_energy,2))
48 |
49 |
50 | ######################### Direct #################################
51 |
52 | Conv = [128,256,'m',512,'m',1024,'m',512]
53 | Linear = [1024,512]
54 | T = 10
55 | spa = 0.90
56 |
57 | kernel_size = 3*3
58 | input_channel = 3
59 | img_size = 32
60 |
61 | n_acc = 0
62 | for c in Conv:
63 | if type(c) is int:
64 | n_acc += input_channel*kernel_size*c*img_size*img_size
65 | input_channel = c
66 | else:
67 | img_size = img_size/2
68 | n_acc = n_acc + (512*img_size*img_size*1024) + 1024*512
69 | tssl_total_estimated_energy = (n_acc * acc_8_bit * T * (1-spa)) * scale
70 | print('Direct estimated energy in (/8-bit int MAC): ', round(tssl_total_estimated_energy,2))
71 |
72 |
73 |
74 | ######################### BNTT #################################
75 |
76 | Conv = [64, 64, "M", 128, 128, "M", 256, 256, 256, "M"]
77 | Linear = [1024,512]
78 | T = 20
79 | spa = 0.91
80 |
81 | kernel_size = 3*3
82 | input_channel = 3
83 | img_size = 32
84 |
85 | W_size = 0
86 | U_size = 0
87 | w_bit = 8
88 | u_bit = 32
89 | membrane_size = img_size
90 | batch = 1
91 |
92 | n_acc = 0
93 | for c in Conv:
94 | if type(c) is int:
95 | n_acc += input_channel*kernel_size*c*img_size*img_size
96 | U_size += c*(membrane_size**2)*batch
97 | W_size += input_channel*c*kernel_size
98 | input_channel = c
99 | else:
100 | img_size = img_size/2
101 | membrane_size = membrane_size/2
102 | n_acc = n_acc + (512*img_size*img_size*1024) + 1024*512
103 | # print(n_acc)
104 | tssl_total_estimated_energy = (n_acc * acc_8_bit * T * (1-spa)) * scale
105 | print('BNTT estimated energy in (/8-bit int MAC): ', round(tssl_total_estimated_energy,2))
106 |
107 | # U_size = U_size + 1024 + 512
108 | # U_size = U_size*u_bit/8 ### bits to MB
109 |
110 | # W_size += (512*img_size*img_size*1024) + 1024*512
111 |
112 | # W_size = W_size*w_bit/8 ### bits to MB
113 |
114 | # print('U size', U_size)
115 | # print('W size', W_size)
116 |
117 | # print(acc_8_bit*1e-3*(1/300e6))
--------------------------------------------------------------------------------
/inference-energy-cal/results/bntt/comp-stat.yaml:
--------------------------------------------------------------------------------
1 | lif:
2 | area: 0.000129
3 | dpower:
4 | n: 0.05625
5 | y: 0.01886
6 | lpower: 8.4e-05
7 | spad:
8 | area: 0.000891
9 | dpower:
10 | n: 0.21233
11 | y: 0.11241
12 | lpower: 0.000523
13 | spike-mac:
14 | area: 9.7e-05
15 | dpower:
16 | n: 0.01924
17 | y: 0.02511
18 | lpower: 6.2e-05
19 |
--------------------------------------------------------------------------------
/inference-energy-cal/results/bntt/computation-energy.yaml:
--------------------------------------------------------------------------------
1 | lif:
2 | energy-leakage: 503.36732544000006
3 | energy-operation: 186.84979199999998
4 | energy-ungated: 337076.334
5 | total: 337766.55111744
6 | spad:
7 | energy-leakage: 3134.0608476800003
8 | energy-operation: 53406.3540843
9 | energy-ungated: 1272380.7644128
10 | total: 1328921.1793447803
11 | spike-mac:
12 | energy-leakage: 371.53302592
13 | energy-operation: 11929.8421053
14 | energy-ungated: 115295.0873984
15 | total: 127596.46252962001
16 | total: 1794284.1929918402
17 |
--------------------------------------------------------------------------------
/inference-energy-cal/results/bntt/cycle-stat.yaml:
--------------------------------------------------------------------------------
1 | Layer 0:
2 | DRAM Filter Reads: 1728.0
3 | DRAM IFMAP Reads: 3072.0
4 | DRAM OFMAP Writes: 57607.0
5 | SRAM Filter Cycles: 14085.0
6 | SRAM Filter Reads: 1728.0
7 | SRAM IFMAP Cycles: 22324.0
8 | SRAM IFMAP Reads: 194400.0
9 | SRAM OFMAP Cycles: 22317.0
10 | SRAM OFMAP Start Cycle: 26.0
11 | SRAM OFMAP Writes: 68544.0
12 | Layer 1:
13 | DRAM Filter Reads: 36864.0
14 | DRAM IFMAP Reads: 649682.0
15 | DRAM OFMAP Writes: 57607.0
16 | SRAM Filter Cycles: 269221.0
17 | SRAM Filter Reads: 36864.0
18 | SRAM IFMAP Cycles: 272668.0
19 | SRAM IFMAP Reads: 4147200.0
20 | SRAM OFMAP Cycles: 272112.0
21 | SRAM OFMAP Start Cycle: 575.0
22 | SRAM OFMAP Writes: 68544.0
23 | Layer 2:
24 | DRAM Filter Reads: 73728.0
25 | DRAM IFMAP Reads: 276832.0
26 | DRAM OFMAP Writes: 25095.0
27 | SRAM Filter Cycles: 134565.0
28 | SRAM Filter Reads: 73728.0
29 | SRAM IFMAP Cycles: 124364.0
30 | SRAM IFMAP Reads: 1806336.0
31 | SRAM OFMAP Cycles: 123808.0
32 | SRAM OFMAP Start Cycle: 575.0
33 | SRAM OFMAP Writes: 30080.0
34 | Layer 3:
35 | DRAM Filter Reads: 147456.0
36 | DRAM IFMAP Reads: 8192.0
37 | DRAM OFMAP Writes: 4608.0
38 | SRAM Filter Cycles: 85189.0
39 | SRAM Filter Reads: 147456.0
40 | SRAM IFMAP Cycles: 56332.0
41 | SRAM IFMAP Reads: 663552.0
42 | SRAM OFMAP Cycles: 55200.0
43 | SRAM OFMAP Start Cycle: 1151.0
44 | SRAM OFMAP Writes: 5760.0
45 | Layer 4:
46 | DRAM Filter Reads: 294912.0
47 | DRAM IFMAP Reads: 2048.0
48 | DRAM OFMAP Writes: 1024.0
49 | SRAM Filter Cycles: 96741.0
50 | SRAM Filter Reads: 294912.0
51 | SRAM IFMAP Cycles: 37548.0
52 | SRAM IFMAP Reads: 147456.0
53 | SRAM OFMAP Cycles: 36404.0
54 | SRAM OFMAP Start Cycle: 1163.0
55 | SRAM OFMAP Writes: 1792.0
56 | Layer 5:
57 | DRAM Filter Reads: 589824.0
58 | DRAM IFMAP Reads: 4096.0
59 | DRAM OFMAP Writes: 1024.0
60 | SRAM Filter Cycles: 193509.0
61 | SRAM Filter Reads: 589824.0
62 | SRAM IFMAP Cycles: 74412.0
63 | SRAM IFMAP Reads: 294912.0
64 | SRAM OFMAP Cycles: 72116.0
65 | SRAM OFMAP Start Cycle: 2315.0
66 | SRAM OFMAP Writes: 1792.0
67 | Layer 6:
68 | DRAM Filter Reads: 589824.0
69 | DRAM IFMAP Reads: 4096.0
70 | DRAM OFMAP Writes: 1024.0
71 | SRAM Filter Cycles: 193509.0
72 | SRAM Filter Reads: 589824.0
73 | SRAM IFMAP Cycles: 74412.0
74 | SRAM IFMAP Reads: 294912.0
75 | SRAM OFMAP Cycles: 72116.0
76 | SRAM OFMAP Start Cycle: 2315.0
77 | SRAM OFMAP Writes: 1792.0
78 | Layer 7:
79 | DRAM Filter Reads: 1048576.0
80 | DRAM IFMAP Reads: 1024.0
81 | DRAM OFMAP Writes: 1024.0
82 | SRAM Filter Cycles: 319464.0
83 | SRAM Filter Reads: 1048576.0
84 | SRAM IFMAP Cycles: 133865.0
85 | SRAM IFMAP Reads: 131072.0
86 | SRAM OFMAP Cycles: 132849.0
87 | SRAM OFMAP Start Cycle: 1038.0
88 | SRAM OFMAP Writes: 4096.0
89 | Layer 8:
90 | DRAM Filter Reads: 524288.0
91 | DRAM IFMAP Reads: 1024.0
92 | DRAM OFMAP Writes: 512.0
93 | SRAM Filter Cycles: 159720.0
94 | SRAM Filter Reads: 524288.0
95 | SRAM IFMAP Cycles: 66921.0
96 | SRAM IFMAP Reads: 65536.0
97 | SRAM OFMAP Cycles: 65905.0
98 | SRAM OFMAP Start Cycle: 1038.0
99 | SRAM OFMAP Writes: 2048.0
100 |
--------------------------------------------------------------------------------
/inference-energy-cal/results/bntt/mem-stat.yaml:
--------------------------------------------------------------------------------
1 | DRAM:
2 | - name: DRAM-system
3 | read energy: 0.468762
4 | SRAM:
5 | - name: GLB-ifmap
6 | read dynamic energy: 0.0110295
7 | write dynamic energy: 0.012772
8 | leakage power: 6.25862
9 | - name: GLB-ofmap
10 | read dynamic energy: 0.0110295
11 | write dynamic energy: 0.012772
12 | leakage power: 6.25862
13 | - name: GLB-weight
14 | read dynamic energy: 0.0286315
15 | write dynamic energy: 0.0282197
16 | leakage power: 24.9086
17 |
--------------------------------------------------------------------------------
/inference-energy-cal/results/bntt/memory-energy.yaml:
--------------------------------------------------------------------------------
1 | DRAM-system:
2 | ifmap: 445354.838292
3 | ofmap: 70091.63805000001
4 | total: 2065736.1627419998
5 | weight: 1550289.6864
6 | GLB-ifmap:
7 | ifmap-dynamic: 85427.624592
8 | ifmap-leakage: 293004.53965265
9 | ifmap-total: 378432.16424465
10 | GLB-ofmap:
11 | ofmap-dynamic: 2355.769856
12 | ofmap-leakage: 293004.53965265
13 | ofmap-total: 295360.30950865004
14 | GLB-weight:
15 | weight-dynamic: 94690.0968
16 | weight-leakage: 1166124.9407045
17 | weight-total: 1260815.0375045
18 | dram_total: 2065736.1627419998
19 | sram_total: 1934607.5112578
20 |
--------------------------------------------------------------------------------
/inference-energy-cal/results/direct/comp-stat.yaml:
--------------------------------------------------------------------------------
1 | lif:
2 | area: 0.000129
3 | dpower:
4 | n: 0.05625
5 | y: 0.01886
6 | lpower: 8.4e-05
7 | spad:
8 | area: 0.000891
9 | dpower:
10 | n: 0.21233
11 | y: 0.11241
12 | lpower: 0.000523
13 | spike-mac:
14 | area: 9.7e-05
15 | dpower:
16 | n: 0.01924
17 | y: 0.02511
18 | lpower: 6.2e-05
19 |
--------------------------------------------------------------------------------
/inference-energy-cal/results/direct/computation-energy.yaml:
--------------------------------------------------------------------------------
1 | lif:
2 | energy-leakage: 2349.9881664
3 | energy-operation: 288.48256
4 | energy-ungated: 1573652.79
5 | total: 1576291.2607264
6 | spad:
7 | energy-leakage: 14631.4739408
8 | energy-operation: 299354.052736575
9 | energy-ungated: 5940154.611568
10 | total: 6254140.138245375
11 | spike-mac:
12 | energy-leakage: 1734.5150752000002
13 | energy-operation: 66869.32002682501
14 | energy-ungated: 538259.194304
15 | total: 606863.0294060251
16 | total: 8437294.4283778
17 |
--------------------------------------------------------------------------------
/inference-energy-cal/results/direct/cycle-stat.yaml:
--------------------------------------------------------------------------------
1 | Layer 0:
2 | DRAM Filter Reads: 3456.0
3 | DRAM IFMAP Reads: 3072.0
4 | DRAM OFMAP Writes: 115207.0
5 | SRAM Filter Cycles: 29093.0
6 | SRAM Filter Reads: 3456.0
7 | SRAM IFMAP Cycles: 44668.0
8 | SRAM IFMAP Reads: 388800.0
9 | SRAM OFMAP Cycles: 44661.0
10 | SRAM OFMAP Start Cycle: 26.0
11 | SRAM OFMAP Writes: 137088.0
12 | Layer 1:
13 | DRAM Filter Reads: 294912.0
14 | DRAM IFMAP Reads: 7501612.0
15 | DRAM OFMAP Writes: 230407.0
16 | SRAM Filter Cycles: 2160229.0
17 | SRAM Filter Reads: 294912.0
18 | SRAM IFMAP Cycles: 2141356.0
19 | SRAM IFMAP Reads: 33177600.0
20 | SRAM OFMAP Cycles: 2140224.0
21 | SRAM OFMAP Start Cycle: 1151.0
22 | SRAM OFMAP Writes: 274176.0
23 | Layer 2:
24 | DRAM Filter Reads: 1179648.0
25 | DRAM IFMAP Reads: 6799586.0
26 | DRAM OFMAP Writes: 100359.0
27 | SRAM Filter Cycles: 2156325.0
28 | SRAM Filter Reads: 1179648.0
29 | SRAM IFMAP Cycles: 1935212.0
30 | SRAM IFMAP Reads: 28901376.0
31 | SRAM OFMAP Cycles: 1932928.0
32 | SRAM OFMAP Start Cycle: 2303.0
33 | SRAM OFMAP Writes: 120320.0
34 | Layer 3:
35 | DRAM Filter Reads: 4718592.0
36 | DRAM IFMAP Reads: 7320866.0
37 | DRAM OFMAP Writes: 36871.0
38 | SRAM Filter Cycles: 2727877.0
39 | SRAM Filter Reads: 4718592.0
40 | SRAM IFMAP Cycles: 1777900.0
41 | SRAM IFMAP Reads: 21233664.0
42 | SRAM OFMAP Cycles: 1773312.0
43 | SRAM OFMAP Start Cycle: 4607.0
44 | SRAM OFMAP Writes: 46080.0
45 | Layer 4:
46 | DRAM Filter Reads: 4718592.0
47 | DRAM IFMAP Reads: 1058891.0
48 | DRAM OFMAP Writes: 2048.0
49 | SRAM Filter Cycles: 1548261.0
50 | SRAM Filter Reads: 4718592.0
51 | SRAM IFMAP Cycles: 591212.0
52 | SRAM IFMAP Reads: 2359296.0
53 | SRAM OFMAP Cycles: 582004.0
54 | SRAM OFMAP Start Cycle: 9227.0
55 | SRAM OFMAP Writes: 3584.0
56 | Layer 5:
57 | DRAM Filter Reads: 8388608.0
58 | DRAM IFMAP Reads: 8192.0
59 | DRAM OFMAP Writes: 1024.0
60 | SRAM Filter Cycles: 2555880.0
61 | SRAM Filter Reads: 8388608.0
62 | SRAM IFMAP Cycles: 1051369.0
63 | SRAM IFMAP Reads: 1048576.0
64 | SRAM OFMAP Cycles: 1043185.0
65 | SRAM OFMAP Start Cycle: 8206.0
66 | SRAM OFMAP Writes: 4096.0
67 | Layer 6:
68 | DRAM Filter Reads: 524288.0
69 | DRAM IFMAP Reads: 1024.0
70 | DRAM OFMAP Writes: 512.0
71 | SRAM Filter Cycles: 159720.0
72 | SRAM Filter Reads: 524288.0
73 | SRAM IFMAP Cycles: 66921.0
74 | SRAM IFMAP Reads: 65536.0
75 | SRAM OFMAP Cycles: 65905.0
76 | SRAM OFMAP Start Cycle: 1038.0
77 | SRAM OFMAP Writes: 2048.0
78 |
--------------------------------------------------------------------------------
/inference-energy-cal/results/direct/mem-stat.yaml:
--------------------------------------------------------------------------------
1 | DRAM:
2 | - name: DRAM-system
3 | read energy: 0.468762
4 | SRAM:
5 | - name: GLB-ifmap
6 | read dynamic energy: 0.0110295
7 | write dynamic energy: 0.012772
8 | leakage power: 6.25862
9 | - name: GLB-ofmap
10 | read dynamic energy: 0.0110295
11 | write dynamic energy: 0.012772
12 | leakage power: 6.25862
13 | - name: GLB-weight
14 | read dynamic energy: 0.0286315
15 | write dynamic energy: 0.0282197
16 | leakage power: 24.9086
17 |
--------------------------------------------------------------------------------
/inference-energy-cal/results/direct/memory-energy.yaml:
--------------------------------------------------------------------------------
1 | DRAM-system:
2 | ifmap: 10637729.975166
3 | ofmap: 228018.96213600002
4 | total: 20160406.874454
5 | weight: 9294657.937152
6 | GLB-ifmap:
7 | ifmap-dynamic: 961494.9860159999
8 | ifmap-leakage: 1367902.05896525
9 | ifmap-total: 2329397.0449812496
10 | GLB-ofmap:
11 | ofmap-dynamic: 7502.170624
12 | ofmap-leakage: 1367902.05896525
13 | ofmap-total: 1375404.22958925
14 | GLB-weight:
15 | weight-dynamic: 567708.130624
16 | weight-leakage: 5444095.5395825
17 | weight-total: 6011803.6702065
18 | dram_total: 20160406.874454
19 | sram_total: 9716604.944777
20 |
--------------------------------------------------------------------------------
/inference-energy-cal/results/tdbn/comp-stat.yaml:
--------------------------------------------------------------------------------
1 | lif:
2 | area: 0.000129
3 | dpower:
4 | n: 0.05625
5 | y: 0.01886
6 | lpower: 8.4e-05
7 | spad:
8 | area: 0.000891
9 | dpower:
10 | n: 0.21233
11 | y: 0.11241
12 | lpower: 0.000523
13 | spike-mac:
14 | area: 9.7e-05
15 | dpower:
16 | n: 0.01924
17 | y: 0.02511
18 | lpower: 6.2e-05
19 |
--------------------------------------------------------------------------------
/inference-energy-cal/results/tdbn/computation-energy.yaml:
--------------------------------------------------------------------------------
1 | lif:
2 | energy-leakage: 2149.7978073599998
3 | energy-operation: 272.3565056
4 | energy-ungated: 1439596.7459999998
5 | total: 1442018.90031296
6 | spad:
7 | energy-leakage: 13385.05063392
8 | energy-operation: 385679.407785075
9 | energy-ungated: 5434125.8147232
10 | total: 5833190.273142195
11 | spike-mac:
12 | energy-leakage: 1586.75552448
13 | energy-operation: 86152.56587032501
14 | energy-ungated: 492406.0692096
15 | total: 580145.390604405
16 | total: 7855354.56405956
17 |
--------------------------------------------------------------------------------
/inference-energy-cal/results/tdbn/mem-stat.yaml:
--------------------------------------------------------------------------------
1 | DRAM:
2 | - name: DRAM-system
3 | read energy: 0.468762
4 | SRAM:
5 | - name: GLB-ifmap
6 | read dynamic energy: 0.0110295
7 | write dynamic energy: 0.012772
8 | leakage power: 6.25862
9 | - name: GLB-ofmap
10 | read dynamic energy: 0.0110295
11 | write dynamic energy: 0.012772
12 | leakage power: 6.25862
13 | - name: GLB-weight
14 | read dynamic energy: 0.0286315
15 | write dynamic energy: 0.0282197
16 | leakage power: 24.9086
17 |
--------------------------------------------------------------------------------
/inference-energy-cal/results/tdbn/memory-energy.yaml:
--------------------------------------------------------------------------------
1 | DRAM-system:
2 | ifmap: 28812870.246846
3 | ofmap: 567790.31631
4 | total: 36171574.406100005
5 | weight: 6790913.842944
6 | GLB-ifmap:
7 | ifmap-dynamic: 2461365.8084159996
8 | ifmap-leakage: 1251373.4703403502
9 | ifmap-total: 3712739.27875635
10 | GLB-ofmap:
11 | ofmap-dynamic: 18484.864512
12 | ofmap-leakage: 1251373.4703403502
13 | ofmap-total: 1269858.3348523502
14 | GLB-weight:
15 | weight-dynamic: 414782.020928
16 | weight-leakage: 4980324.931585501
17 | weight-total: 5395106.952513501
18 | dram_total: 36171574.406100005
19 | sram_total: 10377704.5661222
20 |
--------------------------------------------------------------------------------
/inference-energy-cal/results/tssl/comp-stat.yaml:
--------------------------------------------------------------------------------
1 | lif:
2 | area: 0.000129
3 | dpower:
4 | n: 0.05625
5 | y: 0.01886
6 | lpower: 8.4e-05
7 | spad:
8 | area: 0.000891
9 | dpower:
10 | n: 0.21233
11 | y: 0.11241
12 | lpower: 0.000523
13 | spike-mac:
14 | area: 9.7e-05
15 | dpower:
16 | n: 0.01924
17 | y: 0.02511
18 | lpower: 6.2e-05
19 |
--------------------------------------------------------------------------------
/inference-energy-cal/results/tssl/computation-energy.yaml:
--------------------------------------------------------------------------------
1 | lif:
2 | energy-leakage: 1327.3685376
3 | energy-operation: 144.24128
4 | energy-ungated: 888862.86
5 | total: 890334.4698176
6 | spad:
7 | energy-leakage: 8264.4493472
8 | energy-operation: 149677.02622777497
9 | energy-ungated: 3355240.018912
10 | total: 3513181.494486975
11 | spike-mac:
12 | energy-leakage: 979.7243968000001
13 | energy-operation: 33434.659982025005
14 | energy-ungated: 304030.603136
15 | total: 338444.98751482496
16 | total: 4741960.951819399
17 |
--------------------------------------------------------------------------------
/inference-energy-cal/results/tssl/cycle-stat.yaml:
--------------------------------------------------------------------------------
1 | Layer 0:
2 | DRAM Filter Reads: 3456.0
3 | DRAM IFMAP Reads: 3072.0
4 | DRAM OFMAP Writes: 115207.0
5 | SRAM Filter Cycles: 29093.0
6 | SRAM Filter Reads: 3456.0
7 | SRAM IFMAP Cycles: 44668.0
8 | SRAM IFMAP Reads: 388800.0
9 | SRAM OFMAP Cycles: 44661.0
10 | SRAM OFMAP Start Cycle: 26.0
11 | SRAM OFMAP Writes: 137088.0
12 | Layer 1:
13 | DRAM Filter Reads: 294912.0
14 | DRAM IFMAP Reads: 7501612.0
15 | DRAM OFMAP Writes: 230407.0
16 | SRAM Filter Cycles: 2160229.0
17 | SRAM Filter Reads: 294912.0
18 | SRAM IFMAP Cycles: 2141356.0
19 | SRAM IFMAP Reads: 33177600.0
20 | SRAM OFMAP Cycles: 2140224.0
21 | SRAM OFMAP Start Cycle: 1151.0
22 | SRAM OFMAP Writes: 274176.0
23 | Layer 2:
24 | DRAM Filter Reads: 1179648.0
25 | DRAM IFMAP Reads: 6799586.0
26 | DRAM OFMAP Writes: 100359.0
27 | SRAM Filter Cycles: 2156325.0
28 | SRAM Filter Reads: 1179648.0
29 | SRAM IFMAP Cycles: 1935212.0
30 | SRAM IFMAP Reads: 28901376.0
31 | SRAM OFMAP Cycles: 1932928.0
32 | SRAM OFMAP Start Cycle: 2303.0
33 | SRAM OFMAP Writes: 120320.0
34 | Layer 3:
35 | DRAM Filter Reads: 4718592.0
36 | DRAM IFMAP Reads: 7320866.0
37 | DRAM OFMAP Writes: 36871.0
38 | SRAM Filter Cycles: 2727877.0
39 | SRAM Filter Reads: 4718592.0
40 | SRAM IFMAP Cycles: 1777900.0
41 | SRAM IFMAP Reads: 21233664.0
42 | SRAM OFMAP Cycles: 1773312.0
43 | SRAM OFMAP Start Cycle: 4607.0
44 | SRAM OFMAP Writes: 46080.0
45 | Layer 4:
46 | DRAM Filter Reads: 4718592.0
47 | DRAM IFMAP Reads: 1058891.0
48 | DRAM OFMAP Writes: 2048.0
49 | SRAM Filter Cycles: 1548261.0
50 | SRAM Filter Reads: 4718592.0
51 | SRAM IFMAP Cycles: 591212.0
52 | SRAM IFMAP Reads: 2359296.0
53 | SRAM OFMAP Cycles: 582004.0
54 | SRAM OFMAP Start Cycle: 9227.0
55 | SRAM OFMAP Writes: 3584.0
56 | Layer 5:
57 | DRAM Filter Reads: 8388608.0
58 | DRAM IFMAP Reads: 8192.0
59 | DRAM OFMAP Writes: 1024.0
60 | SRAM Filter Cycles: 2555880.0
61 | SRAM Filter Reads: 8388608.0
62 | SRAM IFMAP Cycles: 1051369.0
63 | SRAM IFMAP Reads: 1048576.0
64 | SRAM OFMAP Cycles: 1043185.0
65 | SRAM OFMAP Start Cycle: 8206.0
66 | SRAM OFMAP Writes: 4096.0
67 | Layer 6:
68 | DRAM Filter Reads: 524288.0
69 | DRAM IFMAP Reads: 1024.0
70 | DRAM OFMAP Writes: 512.0
71 | SRAM Filter Cycles: 159720.0
72 | SRAM Filter Reads: 524288.0
73 | SRAM IFMAP Cycles: 66921.0
74 | SRAM IFMAP Reads: 65536.0
75 | SRAM OFMAP Cycles: 65905.0
76 | SRAM OFMAP Start Cycle: 1038.0
77 | SRAM OFMAP Writes: 2048.0
78 |
--------------------------------------------------------------------------------
/inference-energy-cal/results/tssl/mem-stat.yaml:
--------------------------------------------------------------------------------
1 | DRAM:
2 | - name: DRAM-system
3 | read energy: 0.468762
4 | SRAM:
5 | - name: GLB-ifmap
6 | read dynamic energy: 0.0110295
7 | write dynamic energy: 0.012772
8 | leakage power: 6.25862
9 | - name: GLB-ofmap
10 | read dynamic energy: 0.0110295
11 | write dynamic energy: 0.012772
12 | leakage power: 6.25862
13 | - name: GLB-weight
14 | read dynamic energy: 0.0286315
15 | write dynamic energy: 0.0282197
16 | leakage power: 24.9086
17 |
--------------------------------------------------------------------------------
/inference-energy-cal/results/tssl/memory-energy.yaml:
--------------------------------------------------------------------------------
1 | DRAM-system:
2 | ifmap: 10637729.975166
3 | ofmap: 228018.96213600002
4 | total: 20160406.874454
5 | weight: 9294657.937152
6 | GLB-ifmap:
7 | ifmap-dynamic: 961494.9860159999
8 | ifmap-leakage: 772646.5101185001
9 | ifmap-total: 1734141.4961345
10 | GLB-ofmap:
11 | ofmap-dynamic: 7502.170624
12 | ofmap-leakage: 772646.5101185001
13 | ofmap-total: 780148.6807425001
14 | GLB-weight:
15 | weight-dynamic: 567708.130624
16 | weight-leakage: 3075045.754805
17 | weight-total: 3642753.885429
18 | dram_total: 20160406.874454
19 | sram_total: 6157044.062306
20 |
--------------------------------------------------------------------------------
/inference-energy-cal/run.py:
--------------------------------------------------------------------------------
1 | import subprocess
2 |
3 |
4 | def print_sata_sim_banner():
5 | banner = r"""
6 | _____ _______ _____ _____ __ __
7 | / ____| /\|__ __|/\ / ____|_ _| \/ |
8 | | (___ / \ | | / \ ______ | (___ | | | \ / |
9 | \___ \ / /\ \ | | / /\ \ |______| \___ \ | | | |\/| |
10 | ____) / ____ \| |/ ____ \ ____) |_| |_| | | |
11 | |_____/_/ \_\_/_/ \_\ |_____/|_____|_| |_|
12 |
13 | """
14 | print(banner)
15 |
16 | if __name__ == "__main__":
17 |
18 |
19 | print_sata_sim_banner()
20 | subprocess.run('python3 comp-utils.py', shell=True)
21 | subprocess.run('python3 mem-utils.py', shell=True)
22 | subprocess.run('python3 cycle-utils.py', shell=True)
23 | subprocess.run('python3 energy-cal.py', shell=True)
24 |
--------------------------------------------------------------------------------
/inference-energy-cal/sata-config.yaml:
--------------------------------------------------------------------------------
1 | architecture:
2 | name: SATA-inference
3 | dataflow: sata
4 | # timestep: 8
5 | clock-frequency: 400
6 |
7 | subtree:
8 | - name: PE-array
9 | class: pe-array
10 | attributes:
11 | width: 8
12 | height: 16
13 |
14 | subtree:
15 | - name: PE-compute
16 | class: pe-compute
17 |
18 | local:
19 | - name: and-gate
20 | class: compute
21 | act-tag: spike-mac
22 | attributes:
23 | kernel: andgate
24 | width: 8
25 | gated: n
26 | count: 1
27 | - name: adder
28 | class: compute
29 | act-tag: spike-mac
30 | attributes:
31 | kernel: adder
32 | width: 8
33 | gated: n
34 | count: 1
35 | - name: reg-acc
36 | class: compute
37 | act-tag: spike-mac
38 | attributes:
39 | kernel: register
40 | width: 16
41 | gated: y
42 | count: 1
43 | - name: comp-lif
44 | class: compute
45 | act-tag: lif
46 | attributes:
47 | kernel: comparator
48 | width: 16
49 | gated: n
50 | count: 1
51 | - name: sub-lif
52 | class: compute
53 | act-tag: lif
54 | attributes:
55 | kernel: subtractor
56 | width: 16
57 | gated: n
58 | count: 1
59 | - name: reg-th
60 | class: compute
61 | act-tag: lif
62 | attributes:
63 | kernel: register
64 | width: 4
65 | gated: y
66 | count: 1
67 | - name: reg-membrane
68 | class: compute
69 | act-tag: lif
70 | attributes:
71 | kernel: register
72 | width: 8
73 | gated: y
74 | count: 1
75 |
76 | - name: PE-mem
77 | class: pe-mem
78 |
79 | local:
80 | - name: SPAD-spike
81 | class: memory
82 | act-tag: spad
83 | attributes:
84 | kernel: registerfiles
85 | width: 8
86 | size-bytes: 9
87 | count: 1
88 | gated: n
89 | - name: SPAD-weight
90 | class: memory
91 | act-tag: spad
92 | attributes:
93 | kernel: registerfiles
94 | width: 8
95 | size-bytes: 9
96 | count: 1
97 | gated: y
98 | - name: SPAD-psum
99 | class: memory
100 | act-tag: spad
101 | attributes:
102 | kernel: registerfiles
103 | width: 8
104 | size-bytes: 8
105 | count: 1
106 | gated: n
107 |
108 | - name: GLBs
109 | class: glbs
110 |
111 | local:
112 | - name: GLB-weight
113 | class: memory
114 | act-tag: glb-weight
115 | attributes:
116 | kernel: sram
117 | width: 32
118 | size-bytes: 147456
119 | bank: 2
120 | count: 1
121 | gated: y
122 | - name: GLB-ifmap
123 | class: memory
124 | act-tag: glb-ifmap
125 | attributes:
126 | kernel: sram
127 | width: 32
128 | size-bytes: 32768
129 | bank: 2
130 | count: 1
131 | gated: y
132 | - name: GLB-ofmap
133 | class: memory
134 | act-tag: glb-ofmap
135 | attributes:
136 | kernel: sram
137 | width: 32
138 | size-bytes: 32768
139 | bank: 2
140 | count: 1
141 | gated: y
142 |
143 | - name: DRAMs
144 | class: dram
145 |
146 | local:
147 | - name: DRAM-system
148 | class: memory
149 | act-tag: dram
150 | attributes:
151 | kernel: dram
152 | width: 128
153 | size-bytes: 536870912
154 | bank: 16
155 | count: 1
156 | gated: y
157 |
--------------------------------------------------------------------------------
/inference-energy-cal/workload.yaml:
--------------------------------------------------------------------------------
1 | General:
2 | timestep: 4
3 | sparsity: 0.85
4 |
5 | Layers:
6 | - name: Conv1
7 | attributes:
8 | IFMAP Height: 32
9 | IFMAP Width: 32
10 | Filter Height: 3
11 | Filter Width: 3
12 | Channels: 3
13 | Num Filter: 128
14 | Strides: 1
15 | - name: Conv2
16 | attributes:
17 | IFMAP Height: 32
18 | IFMAP Width: 32
19 | Filter Height: 3
20 | Filter Width: 3
21 | Channels: 128
22 | Num Filter: 128
23 | Strides: 1
24 | - name: Conv3
25 | attributes:
26 | IFMAP Height: 32
27 | IFMAP Width: 32
28 | Filter Height: 3
29 | Filter Width: 3
30 | Channels: 128
31 | Num Filter: 128
32 | Strides: 1
33 | - name: Conv4
34 | attributes:
35 | IFMAP Height: 32
36 | IFMAP Width: 32
37 | Filter Height: 3
38 | Filter Width: 3
39 | Channels: 128
40 | Num Filter: 128
41 | Strides: 1
42 | - name: Conv5
43 | attributes:
44 | IFMAP Height: 32
45 | IFMAP Width: 32
46 | Filter Height: 3
47 | Filter Width: 3
48 | Channels: 128
49 | Num Filter: 128
50 | Strides: 1
51 | - name: Conv6
52 | attributes:
53 | IFMAP Height: 32
54 | IFMAP Width: 32
55 | Filter Height: 3
56 | Filter Width: 3
57 | Channels: 128
58 | Num Filter: 128
59 | Strides: 1
60 | - name: Conv7
61 | attributes:
62 | IFMAP Height: 32
63 | IFMAP Width: 32
64 | Filter Height: 3
65 | Filter Width: 3
66 | Channels: 128
67 | Num Filter: 128
68 | Strides: 1
69 | - name: Conv8
70 | attributes:
71 | IFMAP Height: 32
72 | IFMAP Width: 32
73 | Filter Height: 3
74 | Filter Width: 3
75 | Channels: 128
76 | Num Filter: 256
77 | Strides: 2
78 | - name: Conv9
79 | attributes:
80 | IFMAP Height: 16
81 | IFMAP Width: 16
82 | Filter Height: 3
83 | Filter Width: 3
84 | Channels: 256
85 | Num Filter: 256
86 | Strides: 1
87 | - name: Conv10
88 | attributes:
89 | IFMAP Height: 16
90 | IFMAP Width: 16
91 | Filter Height: 3
92 | Filter Width: 3
93 | Channels: 256
94 | Num Filter: 256
95 | Strides: 1
96 | - name: Conv11
97 | attributes:
98 | IFMAP Height: 16
99 | IFMAP Width: 16
100 | Filter Height: 3
101 | Filter Width: 3
102 | Channels: 256
103 | Num Filter: 256
104 | Strides: 1
105 | - name: Conv12
106 | attributes:
107 | IFMAP Height: 16
108 | IFMAP Width: 16
109 | Filter Height: 3
110 | Filter Width: 3
111 | Channels: 256
112 | Num Filter: 256
113 | Strides: 1
114 | - name: Conv13
115 | attributes:
116 | IFMAP Height: 16
117 | IFMAP Width: 16
118 | Filter Height: 3
119 | Filter Width: 3
120 | Channels: 256
121 | Num Filter: 256
122 | Strides: 1
123 | - name: Conv14
124 | attributes:
125 | IFMAP Height: 16
126 | IFMAP Width: 16
127 | Filter Height: 3
128 | Filter Width: 3
129 | Channels: 256
130 | Num Filter: 512
131 | Strides: 2
132 | - name: Conv15
133 | attributes:
134 | IFMAP Height: 8
135 | IFMAP Width: 8
136 | Filter Height: 3
137 | Filter Width: 3
138 | Channels: 512
139 | Num Filter: 512
140 | Strides: 1
141 | - name: Conv16
142 | attributes:
143 | IFMAP Height: 8
144 | IFMAP Width: 8
145 | Filter Height: 3
146 | Filter Width: 3
147 | Channels: 512
148 | Num Filter: 512
149 | Strides: 1
150 | - name: Conv17
151 | attributes:
152 | IFMAP Height: 8
153 | IFMAP Width: 8
154 | Filter Height: 3
155 | Filter Width: 3
156 | Channels: 512
157 | Num Filter: 512
158 | Strides: 1
159 | - name: FC1
160 | attributes:
161 | IFMAP Height: 1
162 | IFMAP Width: 1
163 | Filter Height: 1
164 | Filter Width: 1
165 | Channels: 8192
166 | Num Filter: 256
167 | Strides: 1
--------------------------------------------------------------------------------
/inference-energy-cal/workloads/workload._bntt.yaml:
--------------------------------------------------------------------------------
1 | General:
2 | timestep: 20
3 | sparsity: 0.91
4 |
5 | Layers:
6 | - name: Conv1
7 | attributes:
8 | IFMAP Height: 32
9 | IFMAP Width: 32
10 | Filter Height: 3
11 | Filter Width: 3
12 | Channels: 3
13 | Num Filter: 64
14 | Strides: 1
15 | - name: Conv2
16 | attributes:
17 | IFMAP Height: 32
18 | IFMAP Width: 32
19 | Filter Height: 3
20 | Filter Width: 3
21 | Channels: 64
22 | Num Filter: 64
23 | Strides: 1
24 | - name: Conv3
25 | attributes:
26 | IFMAP Height: 16
27 | IFMAP Width: 16
28 | Filter Height: 3
29 | Filter Width: 3
30 | Channels: 64
31 | Num Filter: 128
32 | Strides: 1
33 | - name: Conv4
34 | attributes:
35 | IFMAP Height: 8
36 | IFMAP Width: 8
37 | Filter Height: 3
38 | Filter Width: 3
39 | Channels: 128
40 | Num Filter: 128
41 | Strides: 1
42 | - name: Conv5
43 | attributes:
44 | IFMAP Height: 4
45 | IFMAP Width: 4
46 | Filter Height: 3
47 | Filter Width: 3
48 | Channels: 128
49 | Num Filter: 256
50 | Strides: 1
51 | - name: Conv6
52 | attributes:
53 | IFMAP Height: 4
54 | IFMAP Width: 4
55 | Filter Height: 3
56 | Filter Width: 3
57 | Channels: 256
58 | Num Filter: 256
59 | Strides: 1
60 | - name: Conv7
61 | attributes:
62 | IFMAP Height: 4
63 | IFMAP Width: 4
64 | Filter Height: 3
65 | Filter Width: 3
66 | Channels: 256
67 | Num Filter: 256
68 | Strides: 1
69 | - name: FC1
70 | attributes:
71 | IFMAP Height: 1
72 | IFMAP Width: 1
73 | Filter Height: 1
74 | Filter Width: 1
75 | Channels: 1024
76 | Num Filter: 1024
77 | Strides: 1
78 | - name: FC2
79 | attributes:
80 | IFMAP Height: 1
81 | IFMAP Width: 1
82 | Filter Height: 1
83 | Filter Width: 1
84 | Channels: 1024
85 | Num Filter: 512
86 | Strides: 1
--------------------------------------------------------------------------------
/inference-energy-cal/workloads/workload_direct.yaml:
--------------------------------------------------------------------------------
1 | General:
2 | timestep: 10
3 | sparsity: 0.9
4 |
5 | Layers:
6 | - name: Conv1
7 | attributes:
8 | IFMAP Height: 32
9 | IFMAP Width: 32
10 | Filter Height: 3
11 | Filter Width: 3
12 | Channels: 3
13 | Num Filter: 128
14 | Strides: 1
15 | - name: Conv2
16 | attributes:
17 | IFMAP Height: 32
18 | IFMAP Width: 32
19 | Filter Height: 3
20 | Filter Width: 3
21 | Channels: 128
22 | Num Filter: 256
23 | Strides: 1
24 | - name: Conv3
25 | attributes:
26 | IFMAP Height: 16
27 | IFMAP Width: 16
28 | Filter Height: 3
29 | Filter Width: 3
30 | Channels: 256
31 | Num Filter: 512
32 | Strides: 1
33 | - name: Conv4
34 | attributes:
35 | IFMAP Height: 8
36 | IFMAP Width: 8
37 | Filter Height: 3
38 | Filter Width: 3
39 | Channels: 512
40 | Num Filter: 1024
41 | Strides: 1
42 | - name: Conv5
43 | attributes:
44 | IFMAP Height: 4
45 | IFMAP Width: 4
46 | Filter Height: 3
47 | Filter Width: 3
48 | Channels: 1024
49 | Num Filter: 512
50 | Strides: 1
51 | - name: FC1
52 | attributes:
53 | IFMAP Height: 1
54 | IFMAP Width: 1
55 | Filter Height: 1
56 | Filter Width: 1
57 | Channels: 8192
58 | Num Filter: 1024
59 | Strides: 1
60 | - name: FC2
61 | attributes:
62 | IFMAP Height: 1
63 | IFMAP Width: 1
64 | Filter Height: 1
65 | Filter Width: 1
66 | Channels: 1024
67 | Num Filter: 512
68 | Strides: 1
--------------------------------------------------------------------------------
/inference-energy-cal/workloads/workload_tdbn.yaml:
--------------------------------------------------------------------------------
1 | General:
2 | timestep: 4
3 | sparsity: 0.85
4 |
5 | Layers:
6 | - name: Conv1
7 | attributes:
8 | IFMAP Height: 32
9 | IFMAP Width: 32
10 | Filter Height: 3
11 | Filter Width: 3
12 | Channels: 3
13 | Num Filter: 128
14 | Strides: 1
15 | - name: Conv2
16 | attributes:
17 | IFMAP Height: 32
18 | IFMAP Width: 32
19 | Filter Height: 3
20 | Filter Width: 3
21 | Channels: 128
22 | Num Filter: 128
23 | Strides: 1
24 | - name: Conv3
25 | attributes:
26 | IFMAP Height: 32
27 | IFMAP Width: 32
28 | Filter Height: 3
29 | Filter Width: 3
30 | Channels: 128
31 | Num Filter: 128
32 | Strides: 1
33 | - name: Conv4
34 | attributes:
35 | IFMAP Height: 32
36 | IFMAP Width: 32
37 | Filter Height: 3
38 | Filter Width: 3
39 | Channels: 128
40 | Num Filter: 128
41 | Strides: 1
42 | - name: Conv5
43 | attributes:
44 | IFMAP Height: 32
45 | IFMAP Width: 32
46 | Filter Height: 3
47 | Filter Width: 3
48 | Channels: 128
49 | Num Filter: 128
50 | Strides: 1
51 | - name: Conv6
52 | attributes:
53 | IFMAP Height: 32
54 | IFMAP Width: 32
55 | Filter Height: 3
56 | Filter Width: 3
57 | Channels: 128
58 | Num Filter: 128
59 | Strides: 1
60 | - name: Conv7
61 | attributes:
62 | IFMAP Height: 32
63 | IFMAP Width: 32
64 | Filter Height: 3
65 | Filter Width: 3
66 | Channels: 128
67 | Num Filter: 128
68 | Strides: 1
69 | - name: Conv8
70 | attributes:
71 | IFMAP Height: 32
72 | IFMAP Width: 32
73 | Filter Height: 3
74 | Filter Width: 3
75 | Channels: 128
76 | Num Filter: 256
77 | Strides: 2
78 | - name: Conv9
79 | attributes:
80 | IFMAP Height: 16
81 | IFMAP Width: 16
82 | Filter Height: 3
83 | Filter Width: 3
84 | Channels: 256
85 | Num Filter: 256
86 | Strides: 1
87 | - name: Conv10
88 | attributes:
89 | IFMAP Height: 16
90 | IFMAP Width: 16
91 | Filter Height: 3
92 | Filter Width: 3
93 | Channels: 256
94 | Num Filter: 256
95 | Strides: 1
96 | - name: Conv11
97 | attributes:
98 | IFMAP Height: 16
99 | IFMAP Width: 16
100 | Filter Height: 3
101 | Filter Width: 3
102 | Channels: 256
103 | Num Filter: 256
104 | Strides: 1
105 | - name: Conv12
106 | attributes:
107 | IFMAP Height: 16
108 | IFMAP Width: 16
109 | Filter Height: 3
110 | Filter Width: 3
111 | Channels: 256
112 | Num Filter: 256
113 | Strides: 1
114 | - name: Conv13
115 | attributes:
116 | IFMAP Height: 16
117 | IFMAP Width: 16
118 | Filter Height: 3
119 | Filter Width: 3
120 | Channels: 256
121 | Num Filter: 256
122 | Strides: 1
123 | - name: Conv14
124 | attributes:
125 | IFMAP Height: 16
126 | IFMAP Width: 16
127 | Filter Height: 3
128 | Filter Width: 3
129 | Channels: 256
130 | Num Filter: 512
131 | Strides: 2
132 | - name: Conv15
133 | attributes:
134 | IFMAP Height: 8
135 | IFMAP Width: 8
136 | Filter Height: 3
137 | Filter Width: 3
138 | Channels: 512
139 | Num Filter: 512
140 | Strides: 1
141 | - name: Conv16
142 | attributes:
143 | IFMAP Height: 8
144 | IFMAP Width: 8
145 | Filter Height: 3
146 | Filter Width: 3
147 | Channels: 512
148 | Num Filter: 512
149 | Strides: 1
150 | - name: Conv17
151 | attributes:
152 | IFMAP Height: 8
153 | IFMAP Width: 8
154 | Filter Height: 3
155 | Filter Width: 3
156 | Channels: 512
157 | Num Filter: 512
158 | Strides: 1
159 | - name: FC1
160 | attributes:
161 | IFMAP Height: 1
162 | IFMAP Width: 1
163 | Filter Height: 1
164 | Filter Width: 1
165 | Channels: 8192
166 | Num Filter: 256
167 | Strides: 1
--------------------------------------------------------------------------------
/inference-energy-cal/workloads/workload_tssl.yaml:
--------------------------------------------------------------------------------
1 | General:
2 | timestep: 5
3 | sparsity: 0.9
4 |
5 | Layers:
6 | - name: Conv1
7 | attributes:
8 | IFMAP Height: 32
9 | IFMAP Width: 32
10 | Filter Height: 3
11 | Filter Width: 3
12 | Channels: 3
13 | Num Filter: 128
14 | Strides: 1
15 | - name: Conv2
16 | attributes:
17 | IFMAP Height: 32
18 | IFMAP Width: 32
19 | Filter Height: 3
20 | Filter Width: 3
21 | Channels: 128
22 | Num Filter: 256
23 | Strides: 1
24 | - name: Conv3
25 | attributes:
26 | IFMAP Height: 16
27 | IFMAP Width: 16
28 | Filter Height: 3
29 | Filter Width: 3
30 | Channels: 256
31 | Num Filter: 512
32 | Strides: 1
33 | - name: Conv4
34 | attributes:
35 | IFMAP Height: 8
36 | IFMAP Width: 8
37 | Filter Height: 3
38 | Filter Width: 3
39 | Channels: 512
40 | Num Filter: 1024
41 | Strides: 1
42 | - name: Conv5
43 | attributes:
44 | IFMAP Height: 4
45 | IFMAP Width: 4
46 | Filter Height: 3
47 | Filter Width: 3
48 | Channels: 1024
49 | Num Filter: 512
50 | Strides: 1
51 | - name: FC1
52 | attributes:
53 | IFMAP Height: 1
54 | IFMAP Width: 1
55 | Filter Height: 1
56 | Filter Width: 1
57 | Channels: 8192
58 | Num Filter: 1024
59 | Strides: 1
60 | - name: FC2
61 | attributes:
62 | IFMAP Height: 1
63 | IFMAP Width: 1
64 | Filter Height: 1
65 | Filter Width: 1
66 | Channels: 1024
67 | Num Filter: 512
68 | Strides: 1
--------------------------------------------------------------------------------
/training_energy_cal/energy_cal.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import energy_configs
3 | import mem_configs
4 | import yaml
5 | from get_workload import get_workload
6 | from get_arch_energy import get_arch_energy
7 |
8 |
9 |
10 | def main():
11 |
12 | hw_config = 'sata_config.yaml'
13 | network_path = 'vgg5_cifar10.yaml'
14 | args = energy_configs.get_args()
15 | mem_args = mem_configs.get_args()
16 |
17 | T = 4
18 | fwd_b = 2
19 |
20 | sp_s = 0.9
21 | sp_du = 0.7
22 | sp_df = 0.6
23 |
24 |
25 | keyword = ['lif','mac_fwd','pgu','mac_bwd','mac_wup','dram_fwd', 'glb_fwd','spad_fwd','dram_bwd',
26 | 'glb_bwd', 'spad_bwd', 'dram_wup', 'glb_wup', 'spad_wup']
27 |
28 |
29 | workload_d = get_workload(T,fwd_b,network_path,sp_s,sp_du,sp_df)
30 | arch_d = get_arch_energy(args, mem_args, hw_config, fwd_b)
31 |
32 |
33 | total_energy = 0
34 | total_fwd_comp = 0
35 | total_fwd_mem = 0
36 |
37 | for k in keyword:
38 | total_energy += workload_d[k]*arch_d[k]
39 |
40 | if 'lif' in k:
41 | total_fwd_comp += workload_d[k]*arch_d[k]
42 | if 'mac_fwd' in k:
43 | total_fwd_comp += workload_d[k]*arch_d[k]
44 | elif 'fwd' in k:
45 | total_fwd_mem += workload_d[k]*arch_d[k]
46 |
47 |
48 | single_ann_mac = 0.239 + 0.0389
49 | print("Bitwidth for fwd: ", fwd_b)
50 | print("Total Energy in (\MAC): ", total_energy/single_ann_mac)
51 | print("Total Fwd Energy in (\MAC): ", (total_fwd_comp + total_fwd_mem)/single_ann_mac)
52 | print("Total Fwd Comp Energy in (\MAC): ", total_fwd_comp/single_ann_mac)
53 | print("Total Fwd Mem Energy in (\MAC): ", total_fwd_mem/single_ann_mac)
54 |
55 |
56 |
57 |
58 | if __name__ == '__main__':
59 | main()
60 |
--------------------------------------------------------------------------------
/training_energy_cal/energy_configs.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 |
4 | def get_args():
5 |
6 | parser = argparse.ArgumentParser("SATA_Energy_Component")
7 |
8 | parser.add_argument('--mul', type=float, default=0.239, help='dynamic energy for 16 bits multiplier')
9 | parser.add_argument('--acc', type=float, default=0.0389, help='dynamic energy for 16 bits accumulator')
10 | parser.add_argument('--add', type=float, default=0.00967, help='dynamic energy for 16 bits adder')
11 | parser.add_argument('--and', type=float, default=0.000794, help='dynamic energy for 16 bits bitwsie-and')
12 | parser.add_argument('--comp', type=float, default=0.00309, help='dynamic energy for 16 bits comparator')
13 | parser.add_argument('--mux', type=float, default=0.00172, help='dynamic energy for 16 bits mux with 2 inputs')
14 | parser.add_argument('--reg', type=float, default=0.0301, help='dynamic energy for 16 bits register')
15 | parser.add_argument('--sft', type=float, default=0.00605, help='dynamic energy for 16 bits sfter, 3 stage')
16 |
17 | # parser.add_argument('--kw', type=int, default=8, help='bitwidth for weight')
18 | # parser.add_argument('--ku', type=int, default=8, help='bitwidth for membrane potential')
19 | # parser.add_argument('--kdu', type=int, default=8, help='bitwidth for gradient of membrane potential')
20 | # parser.add_argument('--kds', type=int, default=8, help='bitwidth for gradient of spike')
21 | # parser.add_argument('--kdw', type=int, default=8, help='bitwidth for gradient of weight')
22 | # parser.add_argument('--kh', type=int, default=8, help='bitwidth for error')
23 |
24 |
25 | args = parser.parse_args()
26 | print(args)
27 |
28 | return args
--------------------------------------------------------------------------------
/training_energy_cal/get_arch_energy.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import energy_configs
3 | import mem_configs
4 | import yaml
5 |
6 |
7 | # def backward(energy_dic):
8 |
9 |
10 | def get_arch_energy(args, mem_args, hw_config, fwd_b):
11 |
12 | arch_e_dic = {}
13 |
14 | with open(hw_config, 'r') as file:
15 |
16 | documents = yaml.full_load(file)
17 | energy_dic = (vars(args))
18 |
19 | single_lif = 0
20 | single_mac_fwd = 0
21 | single_pgu = 0
22 | single_mac_bwd = 0
23 | single_mac_wup = 0
24 |
25 | for item, doc in documents.items():
26 | if item == "fwd":
27 | for item2, doc2 in doc.items():
28 | if item2 == "lif":
29 | for k in doc2:
30 | single_lif += (energy_dic[k] * (fwd_b/16)) * doc2[k]
31 | elif item2 == "mac":
32 | for k in doc2:
33 | single_mac_fwd += (energy_dic[k] * (fwd_b/16)) * doc2[k]
34 |
35 | elif item == "bwd":
36 | for item2, doc2 in doc.items():
37 | if item2 == "pgu":
38 | for k in doc2:
39 | single_pgu += energy_dic[k] * doc2[k]
40 | elif item2 == "mac":
41 | for k in doc2:
42 | single_mac_bwd += energy_dic[k] * doc2[k]
43 |
44 | elif item == "wup":
45 | for item2, doc2 in doc.items():
46 | if item2 == "mac":
47 | for k in doc2:
48 | single_mac_wup += energy_dic[k] * doc2[k]
49 |
50 |
51 | arch_e_dic['lif'] = single_lif
52 | arch_e_dic['mac_fwd'] = single_mac_fwd
53 | arch_e_dic['pgu'] = single_pgu
54 | arch_e_dic['mac_bwd'] = single_mac_bwd
55 | arch_e_dic['mac_wup'] = single_mac_wup
56 |
57 | arch_e_dic['dram_fwd']=mem_args.dram * (fwd_b/16)
58 | arch_e_dic['glb_fwd']=mem_args.sram * (fwd_b/16)
59 | arch_e_dic['spad_fwd']=mem_args.spad * (fwd_b/16)
60 | arch_e_dic['dram_bwd']=mem_args.dram
61 | arch_e_dic['glb_bwd']= mem_args.sram
62 | arch_e_dic['spad_bwd']=mem_args.spad
63 | arch_e_dic['dram_wup']=mem_args.dram
64 | arch_e_dic['glb_wup']=mem_args.sram
65 | arch_e_dic['spad_wup']=mem_args.spad
66 |
67 |
68 | return arch_e_dic
69 |
--------------------------------------------------------------------------------
/training_energy_cal/get_workload.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import energy_configs
3 | import yaml
4 |
5 |
6 |
7 | def get_workload(T,b,network_path,sp_s,sp_du,sp_df):
8 |
9 | # network_path = 'vgg5_cifar10.yaml'
10 | workload_dic = {}
11 |
12 | with open(network_path,'r') as file:
13 | documents = yaml.full_load(file)
14 |
15 | lif_n = 0
16 | mac_fwd_n = 0
17 | pgu_n = 0
18 | mac_bwd_n = 0
19 | mac_wup_n = 0
20 | dram_fwd_n =0
21 | glb_fwd_n =0
22 | spad_fwd_n =0
23 | dram_bwd_n =0
24 | glb_bwd_n =0
25 | spad_bwd_n =0
26 | dram_wup_n =0
27 | glb_wup_n =0
28 | spad_wup_n =0
29 |
30 |
31 |
32 | for item, doc in documents.items():
33 | if doc['type'] == '2dconv':
34 | lif_n += doc['K'] * doc['E_h'] * doc['E_w'] * T
35 | mac_fwd_n += (1-sp_s) * doc['C']* doc['R_h'] * doc['R_w'] * doc['K'] * doc['E_h'] * doc['E_w'] * T
36 | pgu_n += doc['K'] * doc['E_h'] * doc['E_w'] * T
37 | mac_bwd_n += (1-sp_du) * doc['C']* doc['R_h'] * doc['R_w'] * doc['K'] * doc['H_h'] * doc['H_w'] * T
38 | mac_wup_n += (1-sp_s) * doc['C']* doc['R_h'] * doc['R_w'] * doc['K'] * doc['E_h'] * doc['E_w'] * T
39 |
40 | dram_fwd_n += doc['K'] * doc['C']* doc['R_h'] * doc['R_w'] + (doc['K'] * doc['E_h'] * doc['E_w'] + (1/b) * doc['C'] * doc['H_h'] * doc['H_w']) * T
41 | glb_fwd_n += 2 * (doc['K'] * doc['C']* doc['R_h'] * doc['R_w'] + (doc['K'] * doc['E_h'] * doc['E_w'] + (1/b) * doc['C'] * doc['H_h'] * doc['H_w']) * T)
42 | spad_fwd_n += 2 * (doc['K'] * doc['C']* doc['R_h'] * doc['R_w'] + T * (1/b) * doc['C'] * doc['H_h'] * doc['H_w'])
43 | dram_bwd_n += T * (doc['K'] * doc['E_h'] * doc['E_w'] + (1/b) * doc['C'] * doc['H_h'] * doc['H_w'])
44 | glb_bwd_n += 7 * T * (doc['K'] * doc['E_h'] * doc['E_w']) + (2*T*(1/b)*doc['C'] * doc['H_h'] * doc['H_w'] + doc['K'] * doc['C']* doc['R_h'] * doc['R_w'])
45 | spad_bwd_n += doc['K'] * doc['C']* doc['R_h'] * doc['R_w'] + T * doc['K'] * doc['E_h'] * doc['E_w']
46 | dram_wup_n += 2 * (doc['K'] * doc['C']* doc['R_h'] * doc['R_w'])
47 | glb_wup_n += 2* (1+T) * doc['K'] * doc['C']* doc['R_h'] * doc['R_w'] + T * ((1/b) * doc['C'] * doc['H_h'] * doc['H_w']+doc['K'] * doc['E_h'] * doc['E_w'])
48 | spad_wup_n += 2* (1+T) * doc['K'] * doc['C']* doc['R_h'] * doc['R_w'] + T * ((1/b) * doc['C'] * doc['H_h'] * doc['H_w']+doc['K'] * doc['E_h'] * doc['E_w']) + 2*T*doc['K'] * doc['C']* doc['R_h'] * doc['R_w']
49 |
50 | elif doc['type'] == 'linear':
51 | lif_n += doc['out'] * T
52 | mac_fwd_n = doc['out'] * doc['in'] * T
53 | pgu_n += doc['out'] * T
54 | mac_bwd_n = doc['out'] * doc['in'] * T
55 | mac_wup_n = doc['out'] * doc['in'] * T
56 |
57 | dram_fwd_n += doc['out'] * doc['in'] + doc['out']*T + doc['in']*T*(1/b)
58 | glb_fwd_n += 2*(doc['out'] * doc['in'] + doc['out']*T + doc['in']*T*(1/b))
59 | spad_fwd_n += 2*(doc['out'] * doc['in'] + T * (1/b) * doc['in'])
60 | dram_bwd_n += T*(doc['out'] + (1/b)*doc['in'])
61 | glb_bwd_n += 7*T*(doc['out']) + (2*T*(1/b)*doc['in'] + doc['in']*doc['out'])
62 | spad_bwd_n += (doc['out'] * doc['in'] + T*doc['out'])
63 | dram_wup_n += 2 * doc['out'] * doc['in']
64 | glb_wup_n += 2 * (1+T) * doc['out'] * doc['in'] + T * ((1/b) * doc['in'] + doc['out'])
65 | spad_wup_n += 2 * (1+T) * doc['out'] * doc['in'] + T * ((1/b) * doc['in'] + doc['out']) + 2*T*doc['out'] * doc['in']
66 |
67 |
68 | workload_dic['lif'] = lif_n
69 | workload_dic['mac_fwd']=mac_fwd_n
70 | workload_dic['pgu']=pgu_n
71 | workload_dic['mac_bwd']=mac_bwd_n
72 | workload_dic['mac_wup']=mac_wup_n
73 | workload_dic['dram_fwd']=dram_fwd_n
74 | workload_dic['glb_fwd']=glb_fwd_n
75 | workload_dic['spad_fwd']=spad_fwd_n
76 | workload_dic['dram_bwd']=dram_bwd_n
77 | workload_dic['glb_bwd']= glb_bwd_n
78 | workload_dic['spad_bwd']=spad_bwd_n
79 | workload_dic['dram_wup']=dram_wup_n
80 | workload_dic['glb_wup']=glb_wup_n
81 | workload_dic['spad_wup']=spad_wup_n
82 |
83 |
84 | return workload_dic
85 |
86 |
87 |
--------------------------------------------------------------------------------
/training_energy_cal/get_workload_new.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import energy_configs
3 | import yaml
4 |
5 |
6 | class Workload_Calculator:
7 | """The Calculator to Get Total Workload"""
8 |
9 | def __init__(self, mapping='TF', network_path=None, sp_dic=None, T=8):
10 |
11 |
12 | self.timstep = T
13 | self.sp_du = sp_dic['du']
14 | self.sp_s = sp_dic['s']
15 | self.sp_df = sp_dic['df']
16 |
17 | L = 0
18 | with open(network_path,'r') as file:
19 | documents = yaml.full_load(file)
20 | for item, doc in documents.items():
21 | L += 1
22 | self.layer = L
23 |
24 | def cal(self):
25 |
26 | print(self.layer)
27 |
28 | return None
29 |
30 | def get_workload(T,b,network_path,sp_s,sp_du,sp_df):
31 |
32 | # network_path = 'vgg5_cifar10.yaml'
33 | workload_dic = {}
34 |
35 | with open(network_path,'r') as file:
36 | documents = yaml.full_load(file)
37 |
38 | T = 8
39 | b = 8
40 | lif_n = 0
41 | mac_fwd_n = 0
42 | pgu_n = 0
43 | mac_bwd_n = 0
44 | mac_wup_n = 0
45 | dram_fwd_n =0
46 | glb_fwd_n =0
47 | spad_fwd_n =0
48 | dram_bwd_n =0
49 | glb_bwd_n =0
50 | spad_bwd_n =0
51 | dram_wup_n =0
52 | glb_wup_n =0
53 | spad_wup_n =0
54 |
55 |
56 |
57 | for item, doc in documents.items():
58 | if doc['type'] == '2dconv':
59 | lif_n += doc['K'] * doc['E_h'] * doc['E_w'] * T
60 | mac_fwd_n += (1-sp_s) * doc['C']* doc['R_h'] * doc['R_w'] * doc['K'] * doc['E_h'] * doc['E_w'] * T
61 | pgu_n += doc['K'] * doc['E_h'] * doc['E_w'] * T
62 | mac_bwd_n += (1-sp_du) * doc['C']* doc['R_h'] * doc['R_w'] * doc['K'] * doc['H_h'] * doc['H_w'] * T
63 | mac_wup_n += (1-sp_s) * doc['C']* doc['R_h'] * doc['R_w'] * doc['K'] * doc['E_h'] * doc['E_w'] * T
64 |
65 | dram_fwd_n += doc['K'] * doc['C']* doc['R_h'] * doc['R_w'] + (doc['K'] * doc['E_h'] * doc['E_w'] + (1/b) * doc['C'] * doc['H_h'] * doc['H_w']) * T
66 | glb_fwd_n += 2 * (doc['K'] * doc['C']* doc['R_h'] * doc['R_w'] + (doc['K'] * doc['E_h'] * doc['E_w'] + (1/b) * doc['C'] * doc['H_h'] * doc['H_w']) * T)
67 | spad_fwd_n += 2 * (doc['K'] * doc['C']* doc['R_h'] * doc['R_w'] + T * (1/b) * doc['C'] * doc['H_h'] * doc['H_w'])
68 | dram_bwd_n += T * (doc['K'] * doc['E_h'] * doc['E_w'] + (1/b) * doc['C'] * doc['H_h'] * doc['H_w'])
69 | glb_bwd_n += 7 * T * (doc['K'] * doc['E_h'] * doc['E_w']) + (2*T*(1/b)*doc['C'] * doc['H_h'] * doc['H_w'] + doc['K'] * doc['C']* doc['R_h'] * doc['R_w'])
70 | spad_bwd_n += doc['K'] * doc['C']* doc['R_h'] * doc['R_w'] + T * doc['K'] * doc['E_h'] * doc['E_w']
71 | dram_wup_n += 2 * (doc['K'] * doc['C']* doc['R_h'] * doc['R_w'])
72 | glb_wup_n += 2* (1+T) * doc['K'] * doc['C']* doc['R_h'] * doc['R_w'] + T * ((1/b) * doc['C'] * doc['H_h'] * doc['H_w']+doc['K'] * doc['E_h'] * doc['E_w'])
73 | spad_wup_n += 2* (1+T) * doc['K'] * doc['C']* doc['R_h'] * doc['R_w'] + T * ((1/b) * doc['C'] * doc['H_h'] * doc['H_w']+doc['K'] * doc['E_h'] * doc['E_w']) + 2*T*doc['K'] * doc['C']* doc['R_h'] * doc['R_w']
74 |
75 | elif doc['type'] == 'linear':
76 | lif_n += doc['out'] * T
77 | mac_fwd_n = doc['out'] * doc['in'] * T
78 | pgu_n += doc['out'] * T
79 | mac_bwd_n = doc['out'] * doc['in'] * T
80 | mac_wup_n = doc['out'] * doc['in'] * T
81 |
82 | dram_fwd_n += doc['out'] * doc['in'] + doc['out']*T + doc['in']*T*(1/b)
83 | glb_fwd_n += 2*(doc['out'] * doc['in'] + doc['out']*T + doc['in']*T*(1/b))
84 | spad_fwd_n += 2*(doc['out'] * doc['in'] + T * (1/b) * doc['in'])
85 | dram_bwd_n += T*(doc['out'] + (1/b)*doc['in'])
86 | glb_bwd_n += 7*T*(doc['out']) + (2*T*(1/b)*doc['in'] + doc['in']*doc['out'])
87 | spad_bwd_n += (doc['out'] * doc['in'] + T*doc['out'])
88 | dram_wup_n += 2 * doc['out'] * doc['in']
89 | glb_wup_n += 2 * (1+T) * doc['out'] * doc['in'] + T * ((1/b) * doc['in'] + doc['out'])
90 | spad_wup_n += 2 * (1+T) * doc['out'] * doc['in'] + T * ((1/b) * doc['in'] + doc['out']) + 2*T*doc['out'] * doc['in']
91 |
92 |
93 | workload_dic['lif'] = lif_n
94 | workload_dic['mac_fwd']=mac_fwd_n
95 | workload_dic['pgu']=pgu_n
96 | workload_dic['mac_bwd']=mac_bwd_n
97 | workload_dic['mac_wup']=mac_wup_n
98 | workload_dic['dram_fwd']=dram_fwd_n
99 | workload_dic['glb_fwd']=glb_fwd_n
100 | workload_dic['spad_fwd']=spad_fwd_n
101 | workload_dic['dram_bwd']=dram_bwd_n
102 | workload_dic['glb_bwd']= glb_bwd_n
103 | workload_dic['spad_bwd']=spad_bwd_n
104 | workload_dic['dram_wup']=dram_wup_n
105 | workload_dic['glb_wup']=glb_wup_n
106 | workload_dic['spad_wup']=spad_wup_n
107 |
108 |
109 | return workload_dic
110 |
111 |
112 |
--------------------------------------------------------------------------------