├── .DS_Store ├── LICENSE ├── README.md ├── SATA_mem ├── m_sram.cfg ├── m_sram.cfg.out ├── mem_configs.py ├── s_sram.cfg ├── s_sram.cfg.out ├── u_sram.cfg ├── u_sram.cfg.out ├── w_sram.cfg ├── w_sram.cfg.out ├── w_sram_results.txt ├── z_sram.cfg └── z_sram.cfg.out ├── cacti ├── 2DDRAM_Samsung2GbDDR2.cfg ├── 2DDRAM_micron1Gb.cfg ├── 2DDRAM_micron1Gb.cfg.out ├── 3DDRAM_Samsung3D8Gb_extened.cfg ├── README ├── TSV.cc ├── TSV.h ├── Ucache.cc ├── Ucache.h ├── arbiter.cc ├── arbiter.h ├── area.cc ├── area.h ├── bank.cc ├── bank.h ├── basic_circuit.cc ├── basic_circuit.h ├── cache.cfg ├── cacti ├── cacti.i ├── cacti.mk ├── cacti_interface.cc ├── cacti_interface.h ├── component.cc ├── component.h ├── const.h ├── contention.dat ├── crossbar.cc ├── crossbar.h ├── ddr3.cfg ├── decoder.cc ├── decoder.h ├── dram-config │ └── dram-config.cfg ├── dram.cfg ├── dram_read_energy_results.txt ├── dram_results │ └── ddr3-dram.out ├── extio.cc ├── extio.h ├── extio_technology.cc ├── extio_technology.h ├── htree2.cc ├── htree2.h ├── io.cc ├── io.h ├── lpddr.cfg ├── main.cc ├── makefile ├── mat.cc ├── mat.h ├── memcad.cc ├── memcad.h ├── memcad_parameters.cc ├── memcad_parameters.h ├── memorybus.cc ├── memorybus.h ├── nuca.cc ├── nuca.h ├── obj_dbg │ ├── TSV.o │ ├── Ucache.o │ ├── arbiter.o │ ├── area.o │ ├── bank.o │ ├── basic_circuit.o │ ├── cacti │ ├── cacti_interface.o │ ├── component.o │ ├── crossbar.o │ ├── decoder.o │ ├── extio.o │ ├── extio_technology.o │ ├── htree2.o │ ├── io.o │ ├── main.o │ ├── mat.o │ ├── memcad.o │ ├── memcad_parameters.o │ ├── memorybus.o │ ├── nuca.o │ ├── parameter.o │ ├── powergating.o │ ├── router.o │ ├── subarray.o │ ├── technology.o │ ├── uca.o │ └── wire.o ├── parameter.cc ├── parameter.h ├── powergating.cc ├── powergating.h ├── regression.test ├── router.cc ├── router.h ├── sample_config_files │ ├── ddr3_cache.cfg │ ├── diff_ddr3_cache.cfg │ ├── lpddr3_cache.cfg │ └── wideio_cache.cfg ├── sram-config │ └── sram-config.cfg ├── subarray.cc ├── subarray.h ├── tech_params │ ├── 16nm.dat │ ├── 180nm-old.dat │ ├── 180nm.dat │ ├── 22nm.dat │ ├── 32nm.dat │ ├── 45nm.dat │ ├── 65nm-old.dat │ ├── 65nm.dat │ ├── 90nm-old.dat │ └── 90nm.dat ├── technology.cc ├── uca.cc ├── uca.h ├── version_cacti.h ├── wire.cc └── wire.h ├── config ├── sata_config.yaml └── vgg5_cifar10.yaml ├── har_configs ├── dcl_har.yaml ├── dcl_shar.yaml ├── fcn_har.yaml ├── fcn_shar.yaml ├── sata_ann_watch_config.yaml └── sata_watch_config.yaml ├── inference-energy-cal ├── .DS_Store ├── __pycache__ │ └── hw_kernels.cpython-39.pyc ├── comp-utils.py ├── cycle-utils.py ├── energy-cal.py ├── hw_kernels.py ├── mem-utils.py ├── related-work-estimate.py ├── results │ ├── bntt │ │ ├── comp-stat.yaml │ │ ├── computation-energy.yaml │ │ ├── cycle-stat.yaml │ │ ├── mem-stat.yaml │ │ └── memory-energy.yaml │ ├── direct │ │ ├── comp-stat.yaml │ │ ├── computation-energy.yaml │ │ ├── cycle-stat.yaml │ │ ├── mem-stat.yaml │ │ └── memory-energy.yaml │ ├── tdbn │ │ ├── comp-stat.yaml │ │ ├── computation-energy.yaml │ │ ├── cycle-stat.yaml │ │ ├── mem-stat.yaml │ │ └── memory-energy.yaml │ └── tssl │ │ ├── comp-stat.yaml │ │ ├── computation-energy.yaml │ │ ├── cycle-stat.yaml │ │ ├── mem-stat.yaml │ │ └── memory-energy.yaml ├── run.py ├── sata-config.yaml ├── workload.yaml └── workloads │ ├── workload._bntt.yaml │ ├── workload_direct.yaml │ ├── workload_tdbn.yaml │ └── workload_tssl.yaml └── training_energy_cal ├── energy_cal.py ├── energy_configs.py ├── get_arch_energy.py ├── get_workload.py └── get_workload_new.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/.DS_Store -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Ruokai Yin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | For most updated version, please go to the following link: https://github.com/RuokaiYin/SATA_Sim 2 | Normally the updates will be reflected in this repo in 1 or 2 days. 3 | 4 | # SATA_Sim 5 | 6 | ## What's New: 7 | 8 | **2023-Sep-6:** 9 | 10 | A new version of SATA_Sim that supports cycle-accurate energy simulation for SNN inference is online! A more detailed READMe file will be added soon. 11 | 12 | The new version of SATA_Sim takes into consideration both dynamic energy and leakage energy while counting all the data movement energy. 13 | 14 | The new version of SATA_Sim even lets you modify the hardware architecture if required. 15 | 16 | We use cacti-7.0 and scale-sim-v2 as the backbone to simulate the memory component and to get the cycle statics. 17 | 18 | For a quick start: 19 | 20 | 1. Clone the project, download all the dependencies, and go to the inference-energy-cal folder. 21 | 2. Modify the workload.yaml for your targeting workload, and modify the sata-config.yaml if any hardware architecture level changes are needed. 22 | 3. Simply run 'python3 run.py' and find the computation and memory energy results in the results folder. Some of the other related statistics are also provided in the folder. 23 | 4. The simulation might be running slow for large workloads. 24 | 25 | Please do leave a message if any new features are needed. Happy running simulations on SNNs! Go Spike! 26 | 27 | 28 | **2023-Mar-15:** 29 | 30 | SATA_Sim now supports the different operand sizes (weights and membrane potentials) for the forward-stage energy estimation. 31 | 32 | One useful case is to use the tool to estimate the energy cost improvement of the quantized SNN models (both weight and membrane potential quantization is supported). 33 | 34 | To check the energy cost for different operand sizes, simply change the 'fwd_b' variable in the energy_cal.py to the target operand size. Please note that we assume the weights and membrane potentials are always quantized to the same bit-width. 35 | 36 | 37 | 38 | ## Overview 39 | 40 | SATA_Sim is an energy estimation framework for Backpropagation-Through-Time (BPTT) based Spiking Neural Networks (SNNs) training with sparsity awareness. 41 | 42 | ## Prerequisite 43 | 44 | Python (Version >= 3.6) 45 | 46 | ## Citing 47 | If you find SATA_Sim is useful for your research, please use the following bibtex to cite us, 48 | 49 | ``` 50 | @article{yin2022sata, 51 | title={Sata: Sparsity-aware training accelerator for spiking neural networks}, 52 | author={Yin, Ruokai and Moitra, Abhishek and Bhattacharjee, Abhiroop and Kim, Youngeun and Panda, Priyadarshini}, 53 | journal={IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems}, 54 | year={2022}, 55 | publisher={IEEE} 56 | } 57 | ``` 58 | 59 | ## Simple Usage Example 60 |

Please first provide the shape information of the network by writing a yaml file like the vgg5_cifar10.yaml.
61 | Then please specify the architecture like sata_config.yaml. You can directly use the sata_config.yaml to use the architecture of SATA.
62 | Then please specify the dynamic energy of the computation components in energy_configs.py. You can directly use the energy_configs.py for SATA.
63 | Then please specify the dynamic energy of the computation components in energy_configs.py. You can directly use the energy_configs.py for SATA.
64 | Please also specify the dynamic energy of memory components in mem_configs.py. This information can be obtained by using CACTI.
65 | Then please specify the timesteps, all three kinds of sparsity, bitwidth of parameters other than spikes in energy_cal.py.
66 | Finally, run the energy_cal.py, and you will get the energy estimation that is normalized with the energy of a single MAC operation in ANNs.

67 | 68 | ## Contribution 69 | Active contributor: 70 | 1. [Ruokai Yin](https://ruokaiyin.github.io/) 71 | 72 | Please contact me (ruokai.yin@yale.edu) if you are interested in contributing to this project! 73 | 74 | ## TODO: 75 | 76 | A more detailed READMe file will be added for using the new version of SATA_Sim. 77 | 78 | The estimation of backward and weight update computation will be added. :white_check_mark: 79 | 80 | The estimation of memory access energy of forward, backward, and weight update stages will be added. :white_check_mark: 81 | 82 | The instructions for using the codes will be added. :white_check_mark: 83 | 84 | Supporting the configurable bitwidth for internal fwd datapaths. :white_check_mark: 85 | 86 | Supporting the configurable bitwidth for internal bwd & wup datapaths. 87 | 88 | Supporting the other dataflow mappings other than the one used in SATA. :white_check_mark: 89 | 90 | Supporting the estimation mode that considers the leak energy. :white_check_mark: 91 | -------------------------------------------------------------------------------- /SATA_mem/m_sram.cfg.out: -------------------------------------------------------------------------------- 1 | Tech node (nm), Capacity (bytes), Number of banks, Associativity, Output width (bits), Access time (ns), Random cycle time (ns), Dynamic search energy (nJ), Dynamic read energy (nJ), Dynamic write energy (nJ), Standby leakage per bank(mW), Area (mm2), Ndwl, Ndbl, Nspd, Ndcm, Ndsam_level_1, Ndsam_level_2, Data arrary area efficiency %, Ntwl, Ntbl, Ntspd, Ntcm, Ntsam_level_1, Ntsam_level_2, Tag arrary area efficiency %, 2 | 32, 8192, 16, 1, 8, 0.394476, 0.236779, N/A, 0.00117638, 0.00112894, 0.142009, 0.0324569, 2, 2, 8, 16, 1, 1, 60.7883, N/A, N/A, N/A, N/A, N/A, N/A, N/A, 3 | -------------------------------------------------------------------------------- /SATA_mem/mem_configs.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | 4 | def get_args(): 5 | 6 | parser = argparse.ArgumentParser("SATA_MEM_Energy_Component") 7 | 8 | # parser.add_argument('--ssram', type=float, default=2.750, help='dynamic energy for isram') 9 | # parser.add_argument('--wsram', type=float, default=6.529, help='dynamic energy for wsram') 10 | # parser.add_argument('--usram', type=float, default=0.239, help='dynamic energy for osram') 11 | # parser.add_argument('--dusram', type=float, default=0.239, help='dynamic energy for osram') 12 | # parser.add_argument('--zsram', type=float, default=0.239, help='dynamic energy for osram') 13 | # parser.add_argument('--dzsram', type=float, default=0.239, help='dynamic energy for osram') 14 | # parser.add_argument('--msram', type=float, default=1.176, help='dynamic energy for osram') 15 | parser.add_argument('--dram', type=float, default=55.58, help='dynamic energy for dram') 16 | parser.add_argument('--sram', type=float, default=1.95, help='dynamic energy for sram') 17 | parser.add_argument('--spad', type=float, default=0.2779, help='dynamic energy for spad') 18 | # parser.add_argument('--ispad', type=float, default=0.239, help='dynamic energy for isram') 19 | # parser.add_argument('--wspad', type=float, default=0.2152, help='dynamic energy for wsram') 20 | 21 | 22 | 23 | args = parser.parse_args() 24 | print(args) 25 | 26 | return args -------------------------------------------------------------------------------- /SATA_mem/s_sram.cfg.out: -------------------------------------------------------------------------------- 1 | Tech node (nm), Capacity (bytes), Number of banks, Associativity, Output width (bits), Access time (ns), Random cycle time (ns), Dynamic search energy (nJ), Dynamic read energy (nJ), Dynamic write energy (nJ), Standby leakage per bank(mW), Area (mm2), Ndwl, Ndbl, Nspd, Ndcm, Ndsam_level_1, Ndsam_level_2, Data arrary area efficiency %, Ntwl, Ntbl, Ntspd, Ntcm, Ntsam_level_1, Ntsam_level_2, Tag arrary area efficiency %, 2 | 32, 32768, 16, 1, 8, 0.563493, 0.297886, N/A, 0.00274961, 0.00250566, 0.500408, 0.122022, 2, 2, 16, 32, 1, 1, 64.6768, N/A, N/A, N/A, N/A, N/A, N/A, N/A, 3 | -------------------------------------------------------------------------------- /SATA_mem/u_sram.cfg.out: -------------------------------------------------------------------------------- 1 | Tech node (nm), Capacity (bytes), Number of banks, Associativity, Output width (bits), Access time (ns), Random cycle time (ns), Dynamic search energy (nJ), Dynamic read energy (nJ), Dynamic write energy (nJ), Standby leakage per bank(mW), Area (mm2), Ndwl, Ndbl, Nspd, Ndcm, Ndsam_level_1, Ndsam_level_2, Data arrary area efficiency %, Ntwl, Ntbl, Ntspd, Ntcm, Ntsam_level_1, Ntsam_level_2, Tag arrary area efficiency %, 2 | 32, 262144, 16, 1, 8, 1.18734, 0.451046, N/A, 0.00899083, 0.00788884, 3.15182, 0.871484, 2, 4, 32, 16, 4, 1, 72.4466, N/A, N/A, N/A, N/A, N/A, N/A, N/A, 3 | -------------------------------------------------------------------------------- /SATA_mem/w_sram.cfg.out: -------------------------------------------------------------------------------- 1 | Tech node (nm), Capacity (bytes), Number of banks, Associativity, Output width (bits), Access time (ns), Random cycle time (ns), Dynamic search energy (nJ), Dynamic read energy (nJ), Dynamic write energy (nJ), Standby leakage per bank(mW), Area (mm2), Ndwl, Ndbl, Nspd, Ndcm, Ndsam_level_1, Ndsam_level_2, Data arrary area efficiency %, Ntwl, Ntbl, Ntspd, Ntcm, Ntsam_level_1, Ntsam_level_2, Tag arrary area efficiency %, 2 | 32, 147456, 16, 1, 8, 0.935514, 0.362277, N/A, 0.0065292, 0.00589278, 1.83923, 0.523663, 2, 4, 32, 16, 4, 1, 67.8185, N/A, N/A, N/A, N/A, N/A, N/A, N/A, 3 | 32, 147456, 8, 1, 8, 0.904971, 0.50647, N/A, 0.00676559, 0.00542787, 3.06862, 0.498412, 2, 4, 64, 32, 4, 1, 71.2544, N/A, N/A, N/A, N/A, N/A, N/A, N/A, 4 | -------------------------------------------------------------------------------- /SATA_mem/w_sram_results.txt: -------------------------------------------------------------------------------- 1 | Cache size : 147456 2 | Block size : 1 3 | Associativity : 1 4 | Read only ports : 1 5 | Write only ports : 1 6 | Read write ports : 0 7 | Single ended read ports : 0 8 | Cache banks (UCA) : 16 9 | Technology : 0.032 10 | Temperature : 360 11 | Tag size : 42 12 | array type : Scratch RAM 13 | Model as memory : 0 14 | Model as 3D memory : 0 15 | Access mode : 0 16 | Data array cell type : 2 17 | Data array peripheral type : 2 18 | Tag array cell type : 2 19 | Tag array peripheral type : 2 20 | Optimization target : 2 21 | Design objective (UCA wt) : 100 20 20 10 10 22 | Design objective (UCA dev) : 10 1000 1000 1000 1000 23 | Cache model : 0 24 | Nuca bank : 0 25 | Wire inside mat : 1 26 | Wire outside mat : 1 27 | Interconnect projection : 1 28 | Wire signaling : 0 29 | Print level : 0 30 | ECC overhead : 0 31 | Page size : 8192 32 | Burst length : 8 33 | Internal prefetch width : 8 34 | Force cache config : 0 35 | Subarray Driver direction : 1 36 | iostate : WRITE 37 | dram_ecc : NO_ECC 38 | io_type : DDR3 39 | dram_dimm : UDIMM 40 | IO Area (sq.mm) = inf 41 | IO Timing Margin (ps) = -14.1667 42 | IO Votlage Margin (V) = 0.155 43 | IO Dynamic Power (mW) = 1506.36 PHY Power (mW) = 232.752 PHY Wakeup Time (us) = 27.503 44 | IO Termination and Bias Power (mW) = 2505.96 45 | 46 | ---------- CACTI (version 7.0.3DD Prerelease of Aug, 2012), Uniform Cache Access SRAM Model ---------- 47 | 48 | Cache Parameters: 49 | Total cache size (bytes): 147456 50 | Number of banks: 16 51 | Associativity: direct mapped 52 | Block size (bytes): 1 53 | Read/write Ports: 0 54 | Read ports: 1 55 | Write ports: 1 56 | Technology size (nm): 32 57 | 58 | Access time (ns): 0.935514 59 | Cycle time (ns): 0.362277 60 | Total dynamic read energy per access (nJ): 0.0065292 61 | Total dynamic write energy per access (nJ): 0.00589278 62 | Total leakage power of a bank (mW): 1.81852 63 | Total gate leakage power of a bank (mW): 0.0207095 64 | Cache height x width (mm): 0.853564 x 0.613501 65 | 66 | Best Ndwl : 2 67 | Best Ndbl : 4 68 | Best Nspd : 32 69 | Best Ndcm : 16 70 | Best Ndsam L1 : 4 71 | Best Ndsam L2 : 1 72 | 73 | Data array, H-tree wire type: Global wires with 5% delay penalty 74 | top 3 best memory configurations are: 75 | Memory cap: 80 GB num_bobs: 1 bw: 533 (MHz) cost: $731.2 energy: 32.6101 (nJ) 76 | { 77 | (0) BoB cap: 80 GB num_channels: 1 bw: 533 (MHz) cost: $731.2 energy: 32.6101 (nJ) 78 | ============== 79 | (0) cap: 80 GB bw: 533 (MHz) cost: $731.2 dpc: 3 energy: 32.6101 (nJ) DIMM: RDIMM low power: F [ 0(4GB) 0(8GB) 1(16GB) 2(32GB) 0(64GB) ] 80 | ============== 81 | 82 | } 83 | 84 | ============================================= -------------------------------------------------------------------------------- /SATA_mem/z_sram.cfg.out: -------------------------------------------------------------------------------- 1 | Tech node (nm), Capacity (bytes), Number of banks, Associativity, Output width (bits), Access time (ns), Random cycle time (ns), Dynamic search energy (nJ), Dynamic read energy (nJ), Dynamic write energy (nJ), Standby leakage per bank(mW), Area (mm2), Ndwl, Ndbl, Nspd, Ndcm, Ndsam_level_1, Ndsam_level_2, Data arrary area efficiency %, Ntwl, Ntbl, Ntspd, Ntcm, Ntsam_level_1, Ntsam_level_2, Tag arrary area efficiency %, 2 | 32, 65536, 16, 1, 8, 0.75728, 0.398596, N/A, 0.00393635, 0.00346167, 0.926132, 0.223867, 2, 2, 16, 32, 1, 1, 70.506, N/A, N/A, N/A, N/A, N/A, N/A, N/A, 3 | 32, 65536, 16, 1, 8, 0.75728, 0.398596, N/A, 0.00393635, 0.00346167, 0.926132, 0.223867, 2, 2, 16, 32, 1, 1, 70.506, N/A, N/A, N/A, N/A, N/A, N/A, N/A, 4 | -------------------------------------------------------------------------------- /cacti/2DDRAM_micron1Gb.cfg.out: -------------------------------------------------------------------------------- 1 | Tech node (nm), Capacity (bytes), Number of banks, Associativity, Output width (bits), Access time (ns), Random cycle time (ns), Dynamic search energy (nJ), Dynamic read energy (nJ), Dynamic write energy (nJ), Standby leakage per bank(mW), Area (mm2), Ndwl, Ndbl, Nspd, Ndcm, Ndsam_level_1, Ndsam_level_2, Data arrary area efficiency %, Ntwl, Ntbl, Ntspd, Ntcm, Ntsam_level_1, Ntsam_level_2, Tag arrary area efficiency %, 2 | 78, 1, 8, 1, 64, 29.4658, 64.5874, N/A, 1.87983, 1.87986, 0.018124, 72.5955, 16, 16, 1, 1, 1, 1, 62.2986, N/A, N/A, N/A, N/A, N/A, N/A, N/A, 3 | -------------------------------------------------------------------------------- /cacti/README: -------------------------------------------------------------------------------- 1 | ----------------------------------------------------------- 2 | 3 | 4 | ____ __ ________ __ 5 | /\ _`\ /\ \__ __ /\_____ \ /'__`\ 6 | \ \ \/\_\ __ ___\ \ ,_\/\_\ \/___//'/'/\ \/\ \ 7 | \ \ \/_/_ /'__`\ /'___\ \ \/\/\ \ /' /' \ \ \ \ \ 8 | \ \ \L\ \/\ \L\.\_/\ \__/\ \ \_\ \ \ /' /'__ \ \ \_\ \ 9 | \ \____/\ \__/.\_\ \____\\ \__\\ \_\ /\_/ /\_\ \ \____/ 10 | \/___/ \/__/\/_/\/____/ \/__/ \/_/ \// \/_/ \/___/ 11 | 12 | 13 | A Tool to Model Caches/Memories, 3D stacking, and off-chip IO 14 | ----------------------------------------------------------- 15 | 16 | CACTI is an analytical tool that takes a set of cache/memory para- 17 | meters as input and calculates its access time, power, cycle 18 | time, and area. 19 | CACTI was originally developed by Dr. Jouppi and Dr. Wilton 20 | in 1993 and since then it has undergone six major 21 | revisions. 22 | 23 | List of features (version 1-7): 24 | =============================== 25 | The following is the list of features supported by the tool. 26 | 27 | * Power, delay, area, and cycle time model for 28 | direct mapped caches 29 | set-associative caches 30 | fully associative caches 31 | Embedded DRAM memories 32 | Commodity DRAM memories 33 | 34 | * Support for modeling multi-ported uniform cache access (UCA) 35 | and multi-banked, multi-ported non-uniform cache access (NUCA). 36 | 37 | * Leakage power calculation that also considers the operating 38 | temperature of the cache. 39 | 40 | * Router power model. 41 | 42 | * Interconnect model with different delay, power, and area 43 | properties including low-swing wire model. 44 | 45 | * An interface to perform trade-off analysis involving power, delay, 46 | area, and bandwidth. 47 | 48 | * All process specific values used by the tool are obtained 49 | from ITRS and currently, the tool supports 90nm, 65nm, 45nm, 50 | and 32nm technology nodes. 51 | 52 | * Chip IO model to calculate latency and energy for DDR bus. Users can model 53 | different loads (fan-outs) and evaluate the impact on frequency and energy. 54 | This model can be used to study LR-DIMMs, R-DIMMs, etc. 55 | 56 | Version 7.0 is derived from 6.5 and merged with CACTI 3D. 57 | It has many new additions apart from code refinements and 58 | bug fixes: new IO model, 3D memory model, and power gating models. 59 | Ref: CACTI-IO: CACTI With OFF-chip Power-Area-Timing Models 60 | MemCAD: An Interconnect Exploratory Tool for Innovative Memories Beyond DDR4 61 | CACTI-3DD: Architecture-level modeling for 3D die-stacked DRAM main memory 62 | 63 | -------------------------------------------------------------------------- 64 | Version 6.5 has a new c++ code base and includes numerous bug fixes. 65 | CACTI 5.3 and 6.0 activate an entire row of mats to read/write a single 66 | block of data. This technique improves reliability at the cost of 67 | power. CACTI 6.5 activates minimum number of mats just enough to retrieve 68 | a block to minimize power. 69 | 70 | How to use the tool? 71 | ==================== 72 | Prior versions of CACTI take input parameters such as cache 73 | size and technology node as a set of command line arguments. 74 | To avoid a long list of command line arguments, 75 | CACTI 6.5 & & let users specify their cache model in a more 76 | detailed manner by using a config file (cache.cfg). 77 | 78 | -> define the cache model using cache.cfg 79 | -> run the "cacti" binary <./cacti -infile cache.cfg> 80 | 81 | CACTI also provides a command line interface similar to earlier versions. The command line interface can be used as 82 | 83 | ./cacti cache_size line_size associativity rw_ports excl_read_ports excl_write_ports 84 | single_ended_read_ports search_ports banks tech_node output_width specific_tag tag_width 85 | access_mode cache main_mem obj_func_delay obj_func_dynamic_power obj_func_leakage_power 86 | obj_func_cycle_time obj_func_area dev_func_delay dev_func_dynamic_power dev_func_leakage_power 87 | dev_func_area dev_func_cycle_time ed_ed2_none temp wt data_arr_ram_cell_tech_flavor_in 88 | data_arr_peri_global_tech_flavor_in tag_arr_ram_cell_tech_flavor_in tag_arr_peri_global_tech_flavor_in 89 | interconnect_projection_type_in wire_inside_mat_type_in wire_outside_mat_type_in 90 | REPEATERS_IN_HTREE_SEGMENTS_in VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in 91 | BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in PAGE_SIZE_BITS_in BURST_LENGTH_in 92 | INTERNAL_PREFETCH_WIDTH_in force_wiretype wiretype force_config ndwl ndbl nspd ndcm 93 | ndsam1 ndsam2 ecc 94 | 95 | For complete documentation of the tool, please refer 96 | to the following publications and reports. 97 | 98 | CACTI-5.3 & 6 reports - Details on Meory/cache organizations and tradeoffs. 99 | 100 | Latency/Energy tradeoffs for large caches and NUCA design: 101 | "Optimizing NUCA Organizations and Wiring Alternatives for Large Caches With CACTI 6.0", that appears in MICRO 2007. 102 | 103 | Memory IO design: CACTI-IO: CACTI With OFF-chip Power-Area-Timing Models, 104 | MemCAD: An Interconnect Exploratory Tool for Innovative Memories Beyond DDR4 105 | CACTI-IO Technical Report - http://www.hpl.hp.com/techreports/2013/HPL-2013-79.pdf 106 | 107 | 3D model: 108 | CACTI-3DD: Architecture-level modeling for 3D die-stacked DRAM main memory 109 | 110 | We are still improving the tool and refining the code. If you 111 | have any comments, questions, or suggestions please write to 112 | us. 113 | 114 | Naveen Muralimanohar 115 | naveen.muralimanohar@hpe.com 116 | 117 | Ali Shafiee 118 | shafiee@cs.utah.edu 119 | 120 | Vaishnav Srinivas 121 | vaishnav.srinivas@gmail.com 122 | 123 | -------------------------------------------------------------------------------- /cacti/TSV.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * CACTI 7.0 3 | * SOFTWARE LICENSE AGREEMENT 4 | * Copyright 2015 Hewlett-Packard Development Company, L.P. 5 | * All Rights Reserved 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are 9 | * met: redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer; 11 | * redistributions in binary form must reproduce the above copyright 12 | * notice, this list of conditions and the following disclaimer in the 13 | * documentation and/or other materials provided with the distribution; 14 | * neither the name of the copyright holders nor the names of its 15 | * contributors may be used to endorse or promote products derived from 16 | * this software without specific prior written permission. 17 | 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” 29 | * 30 | ***************************************************************************/ 31 | 32 | #ifndef TSV_H_ 33 | #define TSV_H_ 34 | 35 | #include "basic_circuit.h" 36 | #include "component.h" 37 | #include "parameter.h" 38 | //#include "assert.h" 39 | #include "cacti_interface.h" 40 | #include "const.h" 41 | //#include "area.h" 42 | #include 43 | #include 44 | #include 45 | 46 | 47 | class TSV : public Component 48 | { 49 | public: 50 | TSV(enum TSV_type tsv_type, 51 | /*TechnologyParameter::*/DeviceType * dt = &(g_tp.peri_global));//Should change peri_global to TSV in technology.cc 52 | //TSV():len(20),rad(2.5),pitch(50){} 53 | ~TSV(); 54 | 55 | double res;//TSV resistance 56 | double cap;//TSV capacitance 57 | double C_load_TSV;//The intrinsic load plus the load TSV is driving, needs changes? 58 | double min_area; 59 | 60 | //int num_IO;//number of I/O 61 | int num_gates; 62 | int num_gates_min;//Necessary? 63 | double w_TSV_n[MAX_NUMBER_GATES_STAGE]; 64 | double w_TSV_p[MAX_NUMBER_GATES_STAGE]; 65 | 66 | //double delay_TSV_path;//Delay of TSV path including the parasitics 67 | 68 | double is_dram;//two external arguments, defaulted to be false in constructor 69 | double is_wl_tr; 70 | 71 | void compute_buffer_stage(); 72 | void compute_area(); 73 | void compute_delay(); 74 | void print_TSV(); 75 | 76 | Area TSV_metal_area; 77 | Area Buffer_area; 78 | 79 | /*//Herigated from Component 80 | double delay; 81 | Area area; 82 | powerDef power, rt_power; 83 | double delay; 84 | double cycle_time; 85 | 86 | int logical_effort();*/ 87 | 88 | private: 89 | double min_w_pmos; 90 | /*TechnologyParameter::*/DeviceType * deviceType; 91 | unsigned int tsv_type; 92 | 93 | }; 94 | 95 | 96 | #endif /* TSV_H_ */ 97 | -------------------------------------------------------------------------------- /cacti/Ucache.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * CACTI 7.0 3 | * SOFTWARE LICENSE AGREEMENT 4 | * Copyright 2015 Hewlett-Packard Development Company, L.P. 5 | * All Rights Reserved 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are 9 | * met: redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer; 11 | * redistributions in binary form must reproduce the above copyright 12 | * notice, this list of conditions and the following disclaimer in the 13 | * documentation and/or other materials provided with the distribution; 14 | * neither the name of the copyright holders nor the names of its 15 | * contributors may be used to endorse or promote products derived from 16 | * this software without specific prior written permission. 17 | 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” 29 | * 30 | ***************************************************************************/ 31 | 32 | 33 | #ifndef __UCACHE_H__ 34 | #define __UCACHE_H__ 35 | 36 | #include 37 | #include "area.h" 38 | #include "router.h" 39 | #include "nuca.h" 40 | 41 | 42 | class min_values_t 43 | { 44 | public: 45 | double min_delay; 46 | double min_dyn; 47 | double min_leakage; 48 | double min_area; 49 | double min_cyc; 50 | 51 | min_values_t() : min_delay(BIGNUM), min_dyn(BIGNUM), min_leakage(BIGNUM), min_area(BIGNUM), min_cyc(BIGNUM) { } 52 | 53 | void update_min_values(const min_values_t * val); 54 | void update_min_values(const uca_org_t & res); 55 | void update_min_values(const nuca_org_t * res); 56 | void update_min_values(const mem_array * res); 57 | }; 58 | 59 | 60 | 61 | struct solution 62 | { 63 | int tag_array_index; 64 | int data_array_index; 65 | list::iterator tag_array_iter; 66 | list::iterator data_array_iter; 67 | double access_time; 68 | double cycle_time; 69 | double area; 70 | double efficiency; 71 | powerDef total_power; 72 | }; 73 | 74 | 75 | 76 | bool calculate_time( 77 | bool is_tag, 78 | int pure_ram, 79 | bool pure_cam, 80 | double Nspd, 81 | unsigned int Ndwl, 82 | unsigned int Ndbl, 83 | unsigned int Ndcm, 84 | unsigned int Ndsam_lev_1, 85 | unsigned int Ndsam_lev_2, 86 | mem_array *ptr_array, 87 | int flag_results_populate, 88 | results_mem_array *ptr_results, 89 | uca_org_t *ptr_fin_res, 90 | Wire_type wtype, // merge from cacti-7 to cacti3d 91 | bool is_main_mem); 92 | void update(uca_org_t *fin_res); 93 | 94 | void solve(uca_org_t *fin_res); 95 | void init_tech_params(double tech, bool is_tag); 96 | 97 | 98 | struct calc_time_mt_wrapper_struct 99 | { 100 | uint32_t tid; 101 | bool is_tag; 102 | bool pure_ram; 103 | bool pure_cam; 104 | bool is_main_mem; 105 | double Nspd_min; 106 | 107 | min_values_t * data_res; 108 | min_values_t * tag_res; 109 | 110 | list data_arr; 111 | list tag_arr; 112 | }; 113 | 114 | void *calc_time_mt_wrapper(void * void_obj); 115 | 116 | void print_g_tp(); 117 | 118 | #endif 119 | -------------------------------------------------------------------------------- /cacti/arbiter.cc: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * CACTI 7.0 3 | * SOFTWARE LICENSE AGREEMENT 4 | * Copyright 2015 Hewlett-Packard Development Company, L.P. 5 | * All Rights Reserved 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are 9 | * met: redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer; 11 | * redistributions in binary form must reproduce the above copyright 12 | * notice, this list of conditions and the following disclaimer in the 13 | * documentation and/or other materials provided with the distribution; 14 | * neither the name of the copyright holders nor the names of its 15 | * contributors may be used to endorse or promote products derived from 16 | * this software without specific prior written permission. 17 | 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” 29 | * 30 | ***************************************************************************/ 31 | 32 | #include "arbiter.h" 33 | 34 | Arbiter::Arbiter( 35 | double n_req, 36 | double flit_size_, 37 | double output_len, 38 | /*TechnologyParameter::*/DeviceType *dt 39 | ):R(n_req), flit_size(flit_size_), 40 | o_len (output_len), deviceType(dt) 41 | { 42 | min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_; 43 | Vdd = dt->Vdd; 44 | double technology = g_ip->F_sz_um; 45 | NTn1 = 13.5*technology/2; 46 | PTn1 = 76*technology/2; 47 | NTn2 = 13.5*technology/2; 48 | PTn2 = 76*technology/2; 49 | NTi = 12.5*technology/2; 50 | PTi = 25*technology/2; 51 | NTtr = 10*technology/2; /*Transmission gate's nmos tr. length*/ 52 | PTtr = 20*technology/2; /* pmos tr. length*/ 53 | } 54 | 55 | Arbiter::~Arbiter(){} 56 | 57 | double 58 | Arbiter::arb_req() { 59 | double temp = ((R-1)*(2*gate_C(NTn1, 0)+gate_C(PTn1, 0)) + 2*gate_C(NTn2, 0) + 60 | gate_C(PTn2, 0) + gate_C(NTi, 0) + gate_C(PTi, 0) + 61 | drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def)); 62 | return temp; 63 | } 64 | 65 | double 66 | Arbiter::arb_pri() { 67 | double temp = 2*(2*gate_C(NTn1, 0)+gate_C(PTn1, 0)); /* switching capacitance 68 | of flip-flop is ignored */ 69 | return temp; 70 | } 71 | 72 | 73 | double 74 | Arbiter::arb_grant() { 75 | double temp = drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def)*2 + drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) + crossbar_ctrline(); 76 | return temp; 77 | } 78 | 79 | double 80 | Arbiter::arb_int() { 81 | double temp = (drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def)*2 + drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) + 82 | 2*gate_C(NTn2, 0) + gate_C(PTn2, 0)); 83 | return temp; 84 | } 85 | 86 | void 87 | Arbiter::compute_power() { 88 | power.readOp.dynamic = (R*arb_req()*Vdd*Vdd/2 + R*arb_pri()*Vdd*Vdd/2 + 89 | arb_grant()*Vdd*Vdd + arb_int()*0.5*Vdd*Vdd); 90 | double nor1_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTn1*2, min_w_pmos * PTn1*2, 2, nor); 91 | double nor2_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTn2*R, min_w_pmos * PTn2*R, 2, nor); 92 | double not_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTi, min_w_pmos * PTi, 1, inv); 93 | double nor1_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTn1*2, min_w_pmos * PTn1*2, 2, nor); 94 | double nor2_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTn2*R, min_w_pmos * PTn2*R, 2, nor); 95 | double not_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTi, min_w_pmos * PTi, 1, inv); 96 | power.readOp.leakage = (nor1_leak + nor2_leak + not_leak)*Vdd; //FIXME include priority table leakage 97 | power.readOp.gate_leakage = nor1_leak_gate*Vdd + nor2_leak_gate*Vdd + not_leak_gate*Vdd; 98 | } 99 | 100 | double //wire cap with triple spacing 101 | Arbiter::Cw3(double length) { 102 | Wire wc(g_ip->wt, length, 1, 3, 3); 103 | double temp = (wc.wire_cap(length,true)); 104 | return temp; 105 | } 106 | 107 | double 108 | Arbiter::crossbar_ctrline() { 109 | double temp = (Cw3(o_len * 1e-6 /* m */) + 110 | drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def) + 111 | gate_C(NTi, 0) + gate_C(PTi, 0)); 112 | return temp; 113 | } 114 | 115 | double 116 | Arbiter::transmission_buf_ctrcap() { 117 | double temp = gate_C(NTtr, 0)+gate_C(PTtr, 0); 118 | return temp; 119 | } 120 | 121 | 122 | void Arbiter::print_arbiter() 123 | { 124 | cout << "\nArbiter Stats (" << R << " input arbiter" << ")\n\n"; 125 | cout << "Flit size : " << flit_size << " bits" << endl; 126 | cout << "Dynamic Power : " << power.readOp.dynamic*1e9 << " (nJ)" << endl; 127 | cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" << endl; 128 | } 129 | 130 | 131 | -------------------------------------------------------------------------------- /cacti/arbiter.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * CACTI 7.0 3 | * SOFTWARE LICENSE AGREEMENT 4 | * Copyright 2015 Hewlett-Packard Development Company, L.P. 5 | * All Rights Reserved 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are 9 | * met: redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer; 11 | * redistributions in binary form must reproduce the above copyright 12 | * notice, this list of conditions and the following disclaimer in the 13 | * documentation and/or other materials provided with the distribution; 14 | * neither the name of the copyright holders nor the names of its 15 | * contributors may be used to endorse or promote products derived from 16 | * this software without specific prior written permission. 17 | 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” 29 | * 30 | ***************************************************************************/ 31 | 32 | #ifndef __ARBITER__ 33 | #define __ARBITER__ 34 | 35 | #include 36 | #include 37 | #include "basic_circuit.h" 38 | #include "cacti_interface.h" 39 | #include "component.h" 40 | #include "parameter.h" 41 | #include "mat.h" 42 | #include "wire.h" 43 | 44 | class Arbiter : public Component 45 | { 46 | public: 47 | Arbiter( 48 | double Req, 49 | double flit_sz, 50 | double output_len, 51 | /*TechnologyParameter::*/DeviceType *dt = &(g_tp.peri_global)); 52 | ~Arbiter(); 53 | 54 | void print_arbiter(); 55 | double arb_req(); 56 | double arb_pri(); 57 | double arb_grant(); 58 | double arb_int(); 59 | void compute_power(); 60 | double Cw3(double len); 61 | double crossbar_ctrline(); 62 | double transmission_buf_ctrcap(); 63 | 64 | 65 | 66 | private: 67 | double NTn1, PTn1, NTn2, PTn2, R, PTi, NTi; 68 | double flit_size; 69 | double NTtr, PTtr; 70 | double o_len; 71 | /*TechnologyParameter::*/DeviceType *deviceType; 72 | double TriS1, TriS2; 73 | double min_w_pmos, Vdd; 74 | 75 | }; 76 | 77 | #endif 78 | -------------------------------------------------------------------------------- /cacti/area.cc: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * CACTI 7.0 3 | * SOFTWARE LICENSE AGREEMENT 4 | * Copyright 2015 Hewlett-Packard Development Company, L.P. 5 | * All Rights Reserved 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are 9 | * met: redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer; 11 | * redistributions in binary form must reproduce the above copyright 12 | * notice, this list of conditions and the following disclaimer in the 13 | * documentation and/or other materials provided with the distribution; 14 | * neither the name of the copyright holders nor the names of its 15 | * contributors may be used to endorse or promote products derived from 16 | * this software without specific prior written permission. 17 | 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” 29 | * 30 | ***************************************************************************/ 31 | 32 | 33 | 34 | #include "area.h" 35 | #include "component.h" 36 | #include "decoder.h" 37 | #include "parameter.h" 38 | #include "basic_circuit.h" 39 | #include 40 | #include 41 | #include 42 | 43 | using namespace std; 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /cacti/area.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * CACTI 7.0 3 | * SOFTWARE LICENSE AGREEMENT 4 | * Copyright 2015 Hewlett-Packard Development Company, L.P. 5 | * All Rights Reserved 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are 9 | * met: redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer; 11 | * redistributions in binary form must reproduce the above copyright 12 | * notice, this list of conditions and the following disclaimer in the 13 | * documentation and/or other materials provided with the distribution; 14 | * neither the name of the copyright holders nor the names of its 15 | * contributors may be used to endorse or promote products derived from 16 | * this software without specific prior written permission. 17 | 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” 29 | * 30 | ***************************************************************************/ 31 | 32 | 33 | 34 | #ifndef __AREA_H__ 35 | #define __AREA_H__ 36 | 37 | #include "cacti_interface.h" 38 | #include "basic_circuit.h" 39 | 40 | using namespace std; 41 | 42 | class Area 43 | { 44 | public: 45 | double w; 46 | double h; 47 | 48 | Area():w(0), h(0), area(0) { } 49 | double get_w() const { return w; } 50 | double get_h() const { return h; } 51 | double get_area() const 52 | { 53 | if (w == 0 && h == 0) 54 | { 55 | return area; 56 | } 57 | else 58 | { 59 | return w*h; 60 | } 61 | } 62 | void set_w(double w_) { w = w_; } 63 | void set_h(double h_) { h = h_; } 64 | void set_area(double a_) { area = a_; } 65 | 66 | private: 67 | double area; 68 | }; 69 | 70 | #endif 71 | 72 | -------------------------------------------------------------------------------- /cacti/bank.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * CACTI 7.0 3 | * SOFTWARE LICENSE AGREEMENT 4 | * Copyright 2015 Hewlett-Packard Development Company, L.P. 5 | * All Rights Reserved 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are 9 | * met: redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer; 11 | * redistributions in binary form must reproduce the above copyright 12 | * notice, this list of conditions and the following disclaimer in the 13 | * documentation and/or other materials provided with the distribution; 14 | * neither the name of the copyright holders nor the names of its 15 | * contributors may be used to endorse or promote products derived from 16 | * this software without specific prior written permission. 17 | 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” 29 | * 30 | ***************************************************************************/ 31 | 32 | 33 | 34 | #ifndef __BANK_H__ 35 | #define __BANK_H__ 36 | 37 | #include "component.h" 38 | #include "decoder.h" 39 | #include "mat.h" 40 | #include "htree2.h" 41 | 42 | 43 | class Bank : public Component 44 | { 45 | public: 46 | Bank(const DynamicParameter & dyn_p); 47 | ~Bank(); 48 | double compute_delays(double inrisetime); // return outrisetime 49 | void compute_power_energy(); 50 | 51 | const DynamicParameter & dp; 52 | Mat mat; 53 | Htree2 *htree_in_add; 54 | Htree2 *htree_in_data; 55 | Htree2 *htree_out_data; 56 | Htree2 *htree_in_search; 57 | Htree2 *htree_out_search; 58 | 59 | int num_addr_b_mat; 60 | int num_mats_hor_dir; 61 | int num_mats_ver_dir; 62 | 63 | int num_addr_b_row_dec; 64 | int num_addr_b_routed_to_mat_for_act; 65 | int num_addr_b_routed_to_mat_for_rd_or_wr; 66 | 67 | double array_leakage; 68 | double wl_leakage; 69 | double cl_leakage; 70 | }; 71 | 72 | 73 | 74 | #endif 75 | -------------------------------------------------------------------------------- /cacti/cacti: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/cacti -------------------------------------------------------------------------------- /cacti/cacti.i: -------------------------------------------------------------------------------- 1 | %module cacti 2 | %{ 3 | /* Includes the header in the wrapper code */ 4 | #include "cacti_interface.h" 5 | %} 6 | 7 | /* Parse the header file to generate wrappers */ 8 | %include "cacti_interface.h" -------------------------------------------------------------------------------- /cacti/cacti.mk: -------------------------------------------------------------------------------- 1 | TARGET = cacti 2 | SHELL = /bin/sh 3 | .PHONY: all depend clean 4 | .SUFFIXES: .cc .o 5 | 6 | ifndef NTHREADS 7 | NTHREADS = 8 8 | endif 9 | 10 | 11 | LIBS = 12 | INCS = -lm 13 | 14 | ifeq ($(TAG),dbg) 15 | DBG = -Wall 16 | OPT = -ggdb -g -O0 -DNTHREADS=1 -gstabs+ 17 | else 18 | DBG = 19 | OPT = -g -msse2 -mfpmath=sse -DNTHREADS=$(NTHREADS) 20 | endif 21 | 22 | #CXXFLAGS = -Wall -Wno-unknown-pragmas -Winline $(DBG) $(OPT) 23 | CXXFLAGS = -Wno-unknown-pragmas $(DBG) $(OPT) 24 | CXX = g++ -m64 25 | CC = gcc -m64 26 | 27 | SRCS = area.cc bank.cc mat.cc main.cc Ucache.cc io.cc technology.cc basic_circuit.cc parameter.cc \ 28 | decoder.cc component.cc uca.cc subarray.cc wire.cc htree2.cc extio.cc extio_technology.cc \ 29 | cacti_interface.cc router.cc nuca.cc crossbar.cc arbiter.cc powergating.cc TSV.cc memorybus.cc \ 30 | memcad.cc memcad_parameters.cc 31 | 32 | 33 | OBJS = $(patsubst %.cc,obj_$(TAG)/%.o,$(SRCS)) 34 | PYTHONLIB_SRCS = $(patsubst main.cc, ,$(SRCS)) obj_$(TAG)/cacti_wrap.cc 35 | PYTHONLIB_OBJS = $(patsubst %.cc,%.o,$(PYTHONLIB_SRCS)) 36 | INCLUDES = -I /usr/include/python2.4 -I /usr/lib/python2.4/config 37 | 38 | all: obj_$(TAG)/$(TARGET) 39 | cp -f obj_$(TAG)/$(TARGET) $(TARGET) 40 | 41 | obj_$(TAG)/$(TARGET) : $(OBJS) 42 | $(CXX) $(OBJS) -o $@ $(INCS) $(CXXFLAGS) $(LIBS) -pthread 43 | 44 | #obj_$(TAG)/%.o : %.cc 45 | # $(CXX) -c $(CXXFLAGS) $(INCS) -o $@ $< 46 | 47 | obj_$(TAG)/%.o : %.cc 48 | $(CXX) $(CXXFLAGS) -c $< -o $@ 49 | 50 | clean: 51 | -rm -f *.o _cacti.so cacti.py $(TARGET) 52 | 53 | 54 | -------------------------------------------------------------------------------- /cacti/component.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * CACTI 7.0 3 | * SOFTWARE LICENSE AGREEMENT 4 | * Copyright 2015 Hewlett-Packard Development Company, L.P. 5 | * All Rights Reserved 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are 9 | * met: redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer; 11 | * redistributions in binary form must reproduce the above copyright 12 | * notice, this list of conditions and the following disclaimer in the 13 | * documentation and/or other materials provided with the distribution; 14 | * neither the name of the copyright holders nor the names of its 15 | * contributors may be used to endorse or promote products derived from 16 | * this software without specific prior written permission. 17 | 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” 29 | * 30 | ***************************************************************************/ 31 | 32 | 33 | 34 | #ifndef __COMPONENT_H__ 35 | #define __COMPONENT_H__ 36 | 37 | #include "parameter.h" 38 | #include "area.h" 39 | 40 | using namespace std; 41 | 42 | class Crossbar; 43 | class Bank; 44 | 45 | class Component 46 | { 47 | public: 48 | Component(); 49 | ~Component(); 50 | 51 | Area area; 52 | powerDef power,rt_power; 53 | double delay; 54 | double cycle_time; 55 | 56 | double compute_gate_area( 57 | int gate_type, 58 | int num_inputs, 59 | double w_pmos, 60 | double w_nmos, 61 | double h_gate); 62 | 63 | double compute_tr_width_after_folding(double input_width, double threshold_folding_width); 64 | double height_sense_amplifier(double pitch_sense_amp); 65 | 66 | protected: 67 | int logical_effort( 68 | int num_gates_min, 69 | double g, 70 | double F, 71 | double * w_n, 72 | double * w_p, 73 | double C_load, 74 | double p_to_n_sz_ratio, 75 | bool is_dram_, 76 | bool is_wl_tr_, 77 | double max_w_nmos); 78 | 79 | private: 80 | double compute_diffusion_width(int num_stacked_in, int num_folded_tr); 81 | }; 82 | 83 | #endif 84 | 85 | -------------------------------------------------------------------------------- /cacti/contention.dat: -------------------------------------------------------------------------------- 1 | l34c64l1b: 1000 1000 1000 1000 1000 1000 1000 1000 2 | l34c64l2b: 9 11 19 29 43 62 81 102 3 | l34c64l4b: 6 8 12 17 24 29 39 47 4 | l34c64l8b: 7 8 10 14 18 22 25 30 5 | l34c64l16b: 7 7 9 12 14 17 20 24 6 | l34c64l32b: 7 7 9 12 14 17 20 24 -r 7 | l34c64l64b: 7 7 9 12 14 17 20 24 -r 8 | l34c128l1b: 1000 1000 1000 1000 1000 1000 1000 1000 9 | l34c128l2b: 4 10 19 30 44 64 82 103 10 | l34c128l4b: 3 6 11 17 24 31 38 47 11 | l34c128l8b: 3 5 9 13 17 21 25 29 12 | l34c128l16b: 4 5 7 10 13 16 19 22 13 | l34c128l32b: 4 5 7 10 13 16 19 22 -r 14 | l34c128l64b: 4 5 7 10 13 16 19 22 -r 15 | l34c256l1b: 1000 1000 1000 1000 1000 1000 1000 1000 16 | l34c256l2b: 3 10 19 30 44 63 82 103 17 | l34c256l4b: 3 6 11 17 24 31 38 47 18 | l34c256l8b: 2 5 8 12 16 20 24 29 19 | l34c256l16b: 2 4 7 9 12 15 18 21 20 | l34c256l32b: 2 4 7 9 12 15 18 21 -r 21 | l34c256l64b: 2 4 7 9 12 15 18 21 -r 22 | l38c64l1b: 1000 1000 1000 1000 1000 1000 1000 1000 23 | l38c64l2b: 57 59 77 90 137 187 219 245 24 | l38c64l4b: 35 40 48 56 43 61 80 101 25 | l38c64l8b: 18 27 41 45 52 58 58 58 -r 26 | l38c64l16b: 16 17 19 35 40 49 53 53 -r 27 | l38c64l32b: 15 15 17 19 22 25 30 30 -r 28 | l38c64l64b: 15 15 17 19 22 25 30 30 -r 29 | l38c128l1b: 1000 1000 1000 1000 1000 1000 1000 1000 30 | l38c128l2b: 38 50 78 93 139 188 220 245 31 | l38c128l4b: 29 37 46 56 43 61 81 102 32 | l38c128l8b: 16 30 39 44 50 57 57 57 -r 33 | l38c128l16b: 14 16 19 33 40 47 52 52 -r 34 | l38c128l32b: 14 15 17 20 23 27 31 31 -r 35 | l38c128l64b: 14 15 17 20 23 27 31 31 -r 36 | l38c256l1b: 1000 1000 1000 1000 1000 1000 1000 1000 37 | l38c256l2b: 35 50 78 94 139 188 220 246 38 | l38c256l4b: 28 36 45 55 55 61 81 102 39 | l38c256l8b: 17 30 38 43 50 57 57 57 -r 40 | l38c256l16b: 15 17 21 32 40 47 51 51 41 | l38c256l32b: 15 17 19 21 24 29 33 33 42 | l38c256l64b: 15 17 19 21 24 29 33 33 -r 43 | l316c64l1b: 1000 1000 1000 1000 1000 1000 1000 1000 44 | l316c64l2b: 1000 1000 1000 1000 1000 1000 1000 1000 45 | l316c64l4b: 34 35 78 126 178 220 252 274 46 | l316c64l8b: 9 11 23 43 62 87 105 130 47 | l316c64l16b: 7 9 13 23 33 45 56 67 48 | l316c64l32b: 5 6 7 10 13 19 25 30 49 | l316c64l64b: 4 5 6 8 10 14 18 21 50 | l316c128l1b: 1000 1000 1000 1000 1000 1000 1000 1000 51 | l316c128l2b: 25 131 243 1000 1000 1000 1000 1000 52 | l316c128l4b: 8 28 79 127 179 221 253 274 53 | l316c128l8b: 4 9 22 43 62 88 106 131 54 | l316c128l16b: 4 6 11 21 32 44 55 67 55 | l316c128l32b: 4 6 11 12 12 18 24 29 56 | l316c128l64b: 2 3 5 7 9 13 17 21 57 | l316c256l1b: 1000 1000 1000 1000 1000 1000 1000 1000 58 | l316c256l2b: 1000 1000 1000 1000 1000 1000 1000 1000 59 | l316c256l4b: 5 28 80 128 180 221 253 274 60 | l316c256l8b: 3 8 22 43 63 88 107 131 61 | l316c256l16b: 2 5 11 21 32 44 55 67 62 | l316c256l32b: 2 3 5 8 12 18 24 29 63 | l316c256l64b: 2 3 4 6 9 13 17 21 64 | l24c64l1b: 1000 1000 1000 1000 1000 1000 1000 1000 65 | l24c64l2b: 10 12 24 41 60 86 105 122 66 | l24c64l4b: 5 7 13 20 29 38 47 56 67 | l24c64l8b: 5 6 9 14 18 24 29 35 68 | l24c64l16b: 4 5 7 10 12 16 19 22 69 | l24c64l32b: 5 5 6 8 10 12 14 17 70 | l24c64l64b: 5 5 6 8 10 12 14 16 71 | l24c128l1b: 1000 1000 1000 1000 1000 1000 1000 1000 72 | l24c128l2b: 1000 1000 1000 1000 1000 1000 1000 1000 73 | l24c128l4b: 3 7 13 20 29 38 47 57 74 | l24c128l8b: 3 5 9 13 18 23 29 35 75 | l24c128l16b: 3 4 6 9 12 15 19 22 76 | l24c128l32b: 3 4 5 7 9 11 14 16 77 | l24c128l64b: 1000 1000 1000 1000 1000 1000 1000 1000 78 | l24c256l1b: 1000 1000 1000 1000 1000 1000 1000 1000 79 | l24c256l2b: 1000 1000 1000 1000 1000 1000 1000 1000 80 | l24c256l4b: 2 6 13 20 29 38 47 57 81 | l24c256l8b: 2 4 8 13 18 23 28 35 82 | l24c256l16b: 2 3 6 8 11 15 18 22 83 | l24c256l32b: 2 3 5 6 8 11 14 16 84 | l24c256l64b: 1000 1000 1000 1000 1000 1000 1000 1000 85 | l28c64l1b: 1000 1000 1000 1000 1000 1000 1000 1000 86 | l28c64l2b: 46 52 117 157 188 225 246 261 87 | l28c64l4b: 19 25 39 54 96 107 120 150 88 | l28c64l8b: 9 12 21 30 39 47 58 79 89 | l28c64l16b: 8 9 11 16 25 32 37 42 90 | l28c64l32b: 7 8 9 11 14 19 23 28 91 | l28c64l64b: 7 7 8 10 12 14 18 22 92 | l28c128l1b: 1000 1000 1000 1000 1000 1000 1000 1000 93 | l28c128l2b: 1000 1000 1000 1000 1000 1000 1000 1000 94 | l28c128l4b: 12 22 39 54 98 108 130 151 95 | l28c128l8b: 7 12 21 30 39 48 59 80 96 | l28c128l16b: 6 8 11 16 24 31 37 42 97 | l28c128l32b: 6 7 9 11 14 19 24 28 98 | l28c128l64b: 6 7 9 11 14 19 24 28 99 | l28c256l1b: 1000 1000 1000 1000 1000 1000 1000 1000 100 | l28c256l2b: 1000 1000 1000 1000 1000 1000 1000 1000 101 | l28c256l4b: 12 22 39 54 100 108 130 152 102 | l28c256l8b: 7 12 21 30 39 48 59 81 103 | l28c256l16b: 6 8 11 16 24 31 37 42 104 | l28c256l32b: 6 7 9 11 14 19 24 28 105 | l28c256l64b: 6 7 9 11 14 19 24 28 106 | l216c64l1b: 1000 1000 1000 1000 1000 1000 1000 1000 107 | l216c64l2b: 1000 1000 1000 1000 1000 1000 1000 1000 108 | l216c64l4b: 34 35 78 126 178 220 252 274 109 | l216c64l8b: 9 11 23 43 62 87 105 130 110 | l216c64l16b: 7 9 13 23 33 45 56 67 111 | l216c64l32b: 5 6 7 10 13 19 25 30 112 | l216c64l64b: 4 5 6 8 10 14 18 21 113 | l216c128l1b: 1000 1000 1000 1000 1000 1000 1000 1000 114 | l216c128l2b: 25 131 243 1000 1000 1000 1000 1000 115 | l216c128l4b: 8 28 79 127 179 221 253 274 116 | l216c128l8b: 4 9 22 43 62 88 106 131 117 | l216c128l16b: 4 6 11 21 32 44 55 67 118 | l216c128l32b: 4 6 11 12 12 18 24 29 119 | l216c128l64b: 2 3 5 7 9 13 17 21 120 | l216c256l1b: 1000 1000 1000 1000 1000 1000 1000 1000 121 | l216c256l2b: 1000 1000 1000 1000 1000 1000 1000 1000 122 | l216c256l4b: 5 28 80 128 180 221 253 274 123 | l216c256l8b: 3 8 22 43 63 88 107 131 124 | l216c256l16b: 2 5 11 21 32 44 55 67 125 | l216c256l32b: 2 3 5 8 12 18 24 29 126 | l216c256l64b: 2 3 4 6 9 13 17 21 127 | -------------------------------------------------------------------------------- /cacti/crossbar.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * CACTI 7.0 3 | * SOFTWARE LICENSE AGREEMENT 4 | * Copyright 2015 Hewlett-Packard Development Company, L.P. 5 | * All Rights Reserved 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are 9 | * met: redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer; 11 | * redistributions in binary form must reproduce the above copyright 12 | * notice, this list of conditions and the following disclaimer in the 13 | * documentation and/or other materials provided with the distribution; 14 | * neither the name of the copyright holders nor the names of its 15 | * contributors may be used to endorse or promote products derived from 16 | * this software without specific prior written permission. 17 | 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” 29 | * 30 | ***************************************************************************/ 31 | 32 | 33 | #ifndef __CROSSBAR__ 34 | #define __CROSSBAR__ 35 | 36 | #include 37 | #include 38 | #include "basic_circuit.h" 39 | #include "cacti_interface.h" 40 | #include "component.h" 41 | #include "parameter.h" 42 | #include "mat.h" 43 | #include "wire.h" 44 | 45 | class Crossbar : public Component 46 | { 47 | public: 48 | Crossbar( 49 | double in, 50 | double out, 51 | double flit_sz, 52 | /*TechnologyParameter::*/DeviceType *dt = &(g_tp.peri_global)); 53 | ~Crossbar(); 54 | 55 | void print_crossbar(); 56 | double output_buffer(); 57 | void compute_power(); 58 | 59 | double n_inp, n_out; 60 | double flit_size; 61 | double tri_inp_cap, tri_out_cap, tri_ctr_cap, tri_int_cap; 62 | 63 | private: 64 | double CB_ADJ; 65 | /* 66 | * Adjust factor of the height of the cross-point (tri-state buffer) cell (layout) in crossbar 67 | * buffer is adjusted to get an aspect ratio of whole cross bar close to one; 68 | * when adjust the ratio, the number of wires route over the tri-state buffers does not change, 69 | * however, the effective wiring pitch changes. Specifically, since CB_ADJ will increase 70 | * during the adjust, the tri-state buffer will become taller and thiner, and the effective wiring pitch 71 | * will increase. As a result, the height of the crossbar (area.h) will increase. 72 | */ 73 | 74 | /*TechnologyParameter::*/DeviceType *deviceType; 75 | double TriS1, TriS2; 76 | double min_w_pmos, Vdd; 77 | 78 | }; 79 | 80 | 81 | 82 | 83 | #endif 84 | -------------------------------------------------------------------------------- /cacti/dram.cfg: -------------------------------------------------------------------------------- 1 | //-size (bytes) 16777216 2 | //-size (bytes) 33554432 3 | -size (bytes) 134217728 4 | //-size (bytes) 67108864 5 | //-size (bytes) 1073741824 6 | 7 | -block size (bytes) 64 8 | -associativity 1 9 | -read-write port 1 10 | -exclusive read port 0 11 | -exclusive write port 0 12 | -single ended read ports 0 13 | -UCA bank count 1 14 | //-technology (u) 0.032 15 | //-technology (u) 0.045 16 | -technology (u) 0.068 17 | //-technology (u) 0.078 18 | 19 | # following three parameters are meaningful only for main memories 20 | -page size (bits) 8192 21 | -burst length 8 22 | -internal prefetch width 8 23 | 24 | # following parameter can have one of the five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram) 25 | -Data array cell type - "comm-dram" 26 | 27 | # following parameter can have one of the three values -- (itrs-hp, itrs-lstp, itrs-lop) 28 | -Data array peripheral type - "itrs-hp" 29 | 30 | # following parameter can have one of the five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram) 31 | -Tag array cell type - "itrs-hp" 32 | 33 | # following parameter can have one of the three values -- (itrs-hp, itrs-lstp, itrs-lop) 34 | -Tag array peripheral type - "itrs-hp" 35 | 36 | # Bus width include data bits and address bits required by the decoder 37 | //-output/input bus width 512 38 | -output/input bus width 64 39 | 40 | -operating temperature (K) 350 41 | 42 | -cache type "main memory" 43 | 44 | # to model special structure like branch target buffers, directory, etc. 45 | # change the tag size parameter 46 | # if you want cacti to calculate the tagbits, set the tag size to "default" 47 | -tag size (b) "default" 48 | //-tag size (b) 45 49 | 50 | # fast - data and tag access happen in parallel 51 | # sequential - data array is accessed after accessing the tag array 52 | # normal - data array lookup and tag access happen in parallel 53 | # final data block is broadcasted in data array h-tree 54 | # after getting the signal from the tag array 55 | //-access mode (normal, sequential, fast) - "fast" 56 | -access mode (normal, sequential, fast) - "normal" 57 | //-access mode (normal, sequential, fast) - "sequential" 58 | 59 | # DESIGN OBJECTIVE for UCA (or banks in NUCA) 60 | //-design objective (weight delay, dynamic power, leakage power, cycle time, area) 100:100:0:0:0 61 | -design objective (weight delay, dynamic power, leakage power, cycle time, area) 0:0:0:100:0 62 | -deviate (delay, dynamic power, leakage power, cycle time, area) 20:100000:100000:100000:1000000 63 | //-deviate (delay, dynamic power, leakage power, cycle time, area) 200:100000:100000:100000:20 64 | 65 | -Optimize ED or ED^2 (ED, ED^2, NONE): "NONE" 66 | 67 | -Cache model (NUCA, UCA) - "UCA" 68 | 69 | //-Wire signalling (fullswing, lowswing, default) - "default" 70 | -Wire signalling (fullswing, lowswing, default) - "Global_10" 71 | 72 | -Wire inside mat - "global" 73 | //-Wire inside mat - "semi-global" 74 | -Wire outside mat - "global" 75 | 76 | -Interconnect projection - "conservative" 77 | //-Interconnect projection - "aggressive" 78 | 79 | -Add ECC - "true" 80 | 81 | -Print level (DETAILED, CONCISE) - "DETAILED" 82 | 83 | # for debugging 84 | -Print input parameters - "true" 85 | # force CACTI to model the cache with the 86 | # following Ndbl, Ndwl, Nspd, Ndsam, 87 | # and Ndcm values 88 | //-Force cache config - "true" 89 | -Force cache config - "false" 90 | -Ndwl 1 91 | -Ndbl 1 92 | -Nspd 0 93 | -Ndcm 1 94 | -Ndsam1 0 95 | -Ndsam2 0 96 | 97 | ########### NUCA Params ############ 98 | 99 | # Objective for NUCA 100 | -NUCAdesign objective (weight delay, dynamic power, leakage power, cycle time, area) 100:100:0:0:100 101 | -NUCAdeviate (delay, dynamic power, leakage power, cycle time, area) 10:10000:10000:10000:10000 102 | 103 | # Contention in network (which is a function of core count and cache level) is one of 104 | # the critical factor used for deciding the optimal bank count value 105 | # core count can be 4, 8, or 16 106 | //-Core count 4 107 | -Core count 8 108 | //-Core count 16 109 | -Cache level (L2/L3) - "L3" 110 | 111 | # In order for CACTI to find the optimal NUCA bank value the following 112 | # variable should be assigned 0. 113 | -NUCA bank count 0 114 | 115 | -------------------------------------------------------------------------------- /cacti/dram_read_energy_results.txt: -------------------------------------------------------------------------------- 1 | DRAM: 2 | - Read energy: 0.468762 nJ 3 | name: dram-config-DRAM-system.log 4 | -------------------------------------------------------------------------------- /cacti/extio.h: -------------------------------------------------------------------------------- 1 | #ifndef _extio_H_ 2 | #define _extio_H_ 3 | #include "parameter.h" 4 | #include "component.h" 5 | #include "extio_technology.h" 6 | 7 | class Extio : public Component 8 | { 9 | public: 10 | 11 | Extio(IOTechParam *); 12 | 13 | void extio_area(); 14 | void extio_eye(); 15 | void extio_power_dynamic(); 16 | void extio_power_phy(); 17 | void extio_power_term(); 18 | 19 | private: 20 | IOTechParam *io_param; 21 | 22 | double io_area; 23 | 24 | double io_power_term; 25 | double power_termination_write; 26 | double power_termination_read; 27 | double power_bias; 28 | double power_clk_bias; 29 | 30 | double phy_power; 31 | double phy_wtime; 32 | double phy_static_power; 33 | double phy_dynamic_power; 34 | 35 | double io_power_dynamic; 36 | 37 | double power_dq_write, power_dqs_write, power_ca_write, 38 | power_dq_read, power_dqs_read, power_ca_read, 39 | power_clk; 40 | 41 | double io_tmargin, io_vmargin; 42 | 43 | }; 44 | 45 | 46 | #endif // _extio_H_ 47 | -------------------------------------------------------------------------------- /cacti/htree2.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * CACTI 7.0 3 | * SOFTWARE LICENSE AGREEMENT 4 | * Copyright 2015 Hewlett-Packard Development Company, L.P. 5 | * All Rights Reserved 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are 9 | * met: redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer; 11 | * redistributions in binary form must reproduce the above copyright 12 | * notice, this list of conditions and the following disclaimer in the 13 | * documentation and/or other materials provided with the distribution; 14 | * neither the name of the copyright holders nor the names of its 15 | * contributors may be used to endorse or promote products derived from 16 | * this software without specific prior written permission. 17 | 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” 29 | * 30 | ***************************************************************************/ 31 | 32 | 33 | #ifndef __HTREE2_H__ 34 | #define __HTREE2_H__ 35 | 36 | #include "basic_circuit.h" 37 | #include "component.h" 38 | #include "parameter.h" 39 | #include "assert.h" 40 | #include "subarray.h" 41 | #include "cacti_interface.h" 42 | #include "wire.h" 43 | 44 | // leakge power includes entire htree in a bank (when uca_tree == false) 45 | // leakge power includes only part to one bank when uca_tree == true 46 | 47 | class Htree2 : public Component 48 | { 49 | public: 50 | Htree2(enum Wire_type wire_model, 51 | double mat_w, double mat_h, int add, int data_in, int search_data_in, int data_out, int search_data_out, int bl, int wl, 52 | enum Htree_type h_type, bool uca_tree_ = false, bool search_tree_ = false, 53 | /*TechnologyParameter::*/DeviceType *dt = &(g_tp.peri_global)); 54 | ~Htree2() {}; 55 | 56 | void in_htree(); 57 | void out_htree(); 58 | 59 | // repeaters only at h-tree nodes 60 | void limited_in_htree(); 61 | void limited_out_htree(); 62 | void input_nand(double s1, double s2, double l); 63 | void output_buffer(double s1, double s2, double l); 64 | 65 | double in_rise_time, out_rise_time; 66 | 67 | void set_in_rise_time(double rt) 68 | { 69 | in_rise_time = rt; 70 | } 71 | 72 | double max_unpipelined_link_delay; 73 | powerDef power_bit; 74 | 75 | 76 | private: 77 | double wire_bw; 78 | double init_wire_bw; // bus width at root 79 | enum Htree_type tree_type; 80 | double htree_hnodes; 81 | double htree_vnodes; 82 | double mat_width; 83 | double mat_height; 84 | int add_bits, data_in_bits,search_data_in_bits,data_out_bits, search_data_out_bits; 85 | int ndbl, ndwl; 86 | bool uca_tree; // should have full bandwidth to access all banks in the array simultaneously 87 | bool search_tree; 88 | 89 | enum Wire_type wt; 90 | double min_w_nmos; 91 | double min_w_pmos; 92 | 93 | /*TechnologyParameter::*/DeviceType *deviceType; 94 | 95 | }; 96 | 97 | #endif 98 | -------------------------------------------------------------------------------- /cacti/io.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * CACTI 7.0 3 | * SOFTWARE LICENSE AGREEMENT 4 | * Copyright 2015 Hewlett-Packard Development Company, L.P. 5 | * All Rights Reserved 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are 9 | * met: redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer; 11 | * redistributions in binary form must reproduce the above copyright 12 | * notice, this list of conditions and the following disclaimer in the 13 | * documentation and/or other materials provided with the distribution; 14 | * neither the name of the copyright holders nor the names of its 15 | * contributors may be used to endorse or promote products derived from 16 | * this software without specific prior written permission. 17 | 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” 29 | * 30 | ***************************************************************************/ 31 | 32 | 33 | #ifndef __IO_H__ 34 | #define __IO_H__ 35 | 36 | 37 | #include "const.h" 38 | #include "cacti_interface.h" 39 | 40 | 41 | void output_data_csv(const uca_org_t & fin_res, string fn="out.csv"); 42 | void output_UCA(uca_org_t * fin_res); 43 | void output_data_csv_3dd(const uca_org_t & fin_res); 44 | 45 | #endif 46 | -------------------------------------------------------------------------------- /cacti/makefile: -------------------------------------------------------------------------------- 1 | TAR = cacti 2 | 3 | .PHONY: dbg opt depend clean clean_dbg clean_opt 4 | 5 | all: dbg 6 | 7 | dbg: $(TAR).mk obj_dbg 8 | @$(MAKE) TAG=dbg -C . -f $(TAR).mk 9 | 10 | opt: $(TAR).mk obj_opt 11 | @$(MAKE) TAG=opt -C . -f $(TAR).mk 12 | 13 | obj_dbg: 14 | mkdir $@ 15 | 16 | obj_opt: 17 | mkdir $@ 18 | 19 | clean: clean_dbg clean_opt 20 | 21 | clean_dbg: obj_dbg 22 | @$(MAKE) TAG=dbg -C . -f $(TAR).mk clean 23 | rm -rf $< 24 | 25 | clean_opt: obj_opt 26 | @$(MAKE) TAG=opt -C . -f $(TAR).mk clean 27 | rm -rf $< 28 | 29 | -------------------------------------------------------------------------------- /cacti/memcad.h: -------------------------------------------------------------------------------- 1 | #ifndef __MEMCAD_H__ 2 | #define __MEMCAD_H__ 3 | 4 | #include "memcad_parameters.h" 5 | #include 6 | 7 | 8 | extern vector *memcad_all_channels; 9 | 10 | extern vector *memcad_all_bobs; 11 | 12 | extern vector *memcad_all_memories; 13 | 14 | extern vector *memcad_best_results; 15 | 16 | 17 | 18 | void find_all_channels(MemCadParameters * memcad_params); 19 | 20 | void find_all_bobs(MemCadParameters * memcad_params); 21 | 22 | bool find_all_memories(MemCadParameters * memcad_params); 23 | 24 | void clean_results(); 25 | 26 | void solve_memcad(MemCadParameters * memcad_params); 27 | 28 | #endif 29 | 30 | 31 | -------------------------------------------------------------------------------- /cacti/memcad_parameters.h: -------------------------------------------------------------------------------- 1 | #ifndef __MEMCAD_PARAMS_H__ 2 | #define __MEMCAD_PARAMS_H__ 3 | 4 | #include 5 | #include 6 | #include "cacti_interface.h" 7 | #include "const.h" 8 | #include "parameter.h" 9 | 10 | using namespace std; 11 | 12 | ///#define INF 1000000 13 | #define EPS 0.0000001 14 | 15 | #define MAX_DIMM_PER_CHANNEL 3 16 | #define MAX_CAP_PER_DIMM 64 17 | #define MAX_RANKS_PER_DIMM 4 18 | #define MIN_BW_PER_CHANNEL 400 19 | #define MAX_DDR3_CHANNEL_BW 800 20 | #define MAX_DDR4_CHANNEL_BW 1600 21 | #define MAX_NUM_CHANNELS_PER_BOB 2 22 | #define MAX_NUM_BOBS 6 23 | #define DIMM_PER_CHANNEL 3 24 | 25 | /* 26 | enum Mem_IO_type 27 | { 28 | DDR3, 29 | DDR4, 30 | LPDDR2, 31 | WideIO, 32 | Low_Swing_Diff, 33 | Serial 34 | }; 35 | 36 | enum Mem_DIMM 37 | { 38 | UDIMM, 39 | RDIMM, 40 | LRDIMM 41 | }; 42 | */ 43 | 44 | 45 | 46 | class MemCadParameters 47 | { 48 | public: 49 | 50 | Mem_IO_type io_type; // DDR3 vs. DDR4 51 | 52 | int capacity; // in GB 53 | 54 | int num_bobs; // default=4me 55 | 56 | ///int bw_per_channel; // defaul=1600 MHz; 57 | 58 | ///bool with_bob; 59 | 60 | int num_channels_per_bob; // 1 means no bob 61 | 62 | bool capacity_wise; // true means the load on each channel is proportional to its capacity. 63 | 64 | ///int min_bandwith; 65 | 66 | MemCad_metrics first_metric; 67 | 68 | MemCad_metrics second_metric; 69 | 70 | MemCad_metrics third_metric; 71 | 72 | DIMM_Model dimm_model; 73 | 74 | bool low_power_permitted; // Not yet implemented. It determines acceptable VDDs. 75 | 76 | double load; // between 0 to 1 77 | 78 | double row_buffer_hit_rate; 79 | 80 | double rd_2_wr_ratio; 81 | 82 | bool same_bw_in_bob; // true if all the channels in the bob have the same bandwidth. 83 | 84 | 85 | bool mirror_in_bob;// true if all the channels in the bob have the same configs 86 | 87 | bool total_power; // false means just considering I/O Power 88 | 89 | bool verbose; 90 | 91 | // Functions 92 | MemCadParameters(InputParameter * g_ip); 93 | void print_inputs(); 94 | bool sanity_check(); 95 | 96 | }; 97 | 98 | 99 | ////////////////////////////////////////////////////////////////////////////////// 100 | 101 | class MemoryParameters 102 | { 103 | public: 104 | // Power Parameteres 105 | static double VDD[2][2][4]; 106 | 107 | static double IDD0[2][4]; 108 | 109 | static double IDD1[2][4]; 110 | 111 | static double IDD2P0[2][4]; 112 | 113 | static double IDD2P1[2][4]; 114 | 115 | static double IDD2N[2][4]; 116 | 117 | static double IDD3P[2][4]; 118 | 119 | static double IDD3N[2][4]; 120 | 121 | static double IDD4R[2][4]; 122 | 123 | static double IDD4W[2][4]; 124 | 125 | static double IDD5[2][4]; 126 | 127 | static double io_energy_read[2][3][3][4]; 128 | 129 | static double io_energy_write[2][3][3][4]; 130 | 131 | // Timing Parameters 132 | static double T_RAS[2]; 133 | 134 | static double T_RC[2]; 135 | 136 | static double T_RP[2]; 137 | 138 | static double T_RFC[2]; 139 | 140 | static double T_REFI[2]; 141 | 142 | // Bandwidth Parameters 143 | static int bandwidth_load[2][4]; 144 | 145 | // Cost Parameters 146 | static double cost[2][3][5]; 147 | 148 | 149 | // Functions 150 | MemoryParameters(); 151 | 152 | int bw_index(Mem_IO_type type, int bandwidth); 153 | }; 154 | 155 | /////////////////////////////////////////////////////////////////////////// 156 | 157 | int bw_index(Mem_IO_type type, int bandwidth); 158 | 159 | 160 | /////////////////////////////////////////////////////////////////////////// 161 | 162 | class channel_conf 163 | { 164 | public: 165 | MemCadParameters *memcad_params; 166 | 167 | Mem_DIMM type; 168 | int num_dimm_per_channel; 169 | int histogram_capacity[5]; // 0->4GB, 1->8GB, 2->16GB, 3->32GB, 4->64GB 170 | bool low_power; 171 | 172 | int capacity; 173 | int bandwidth; 174 | double energy_per_read; 175 | double energy_per_write; 176 | double energy_per_access; 177 | 178 | double cost; 179 | double latency; 180 | 181 | bool valid; 182 | // Functions 183 | channel_conf(MemCadParameters * memcad_params, const vector& dimm_cap, int bandwidth, Mem_DIMM type, bool low_power); 184 | 185 | void calc_power(); 186 | 187 | friend channel_conf* clone(channel_conf*); 188 | friend ostream & operator<<(ostream &os, const channel_conf& ch_cnf); 189 | 190 | }; 191 | 192 | 193 | /////////////////////////////////////////////////////////////////////////// 194 | 195 | class bob_conf 196 | { 197 | public: 198 | MemCadParameters *memcad_params; 199 | int num_channels; 200 | channel_conf *channels[MAX_NUM_CHANNELS_PER_BOB]; 201 | 202 | int capacity; 203 | int bandwidth; 204 | double energy_per_read; 205 | double energy_per_write; 206 | double energy_per_access; 207 | 208 | double cost; 209 | double latency; 210 | 211 | bool valid; 212 | 213 | bob_conf(MemCadParameters * memcad_params, vector * channels); 214 | 215 | friend bob_conf* clone(bob_conf*); 216 | friend ostream & operator <<(ostream &os, const bob_conf& bob_cnf); 217 | }; 218 | 219 | /////////////////////////////////////////////////////////////////////////// 220 | 221 | 222 | class memory_conf 223 | { 224 | public: 225 | MemCadParameters *memcad_params; 226 | int num_bobs; 227 | bob_conf* bobs[MAX_NUM_BOBS]; 228 | 229 | int capacity; 230 | int bandwidth; 231 | double energy_per_read; 232 | double energy_per_write; 233 | double energy_per_access; 234 | 235 | double cost; 236 | double latency; 237 | 238 | bool valid; 239 | 240 | memory_conf(MemCadParameters * memcad_params, vector * bobs); 241 | friend ostream & operator <<(ostream &os, const memory_conf& bob_cnf); 242 | }; 243 | 244 | 245 | 246 | 247 | 248 | 249 | #endif 250 | 251 | 252 | -------------------------------------------------------------------------------- /cacti/nuca.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * CACTI 7.0 3 | * SOFTWARE LICENSE AGREEMENT 4 | * Copyright 2015 Hewlett-Packard Development Company, L.P. 5 | * All Rights Reserved 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are 9 | * met: redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer; 11 | * redistributions in binary form must reproduce the above copyright 12 | * notice, this list of conditions and the following disclaimer in the 13 | * documentation and/or other materials provided with the distribution; 14 | * neither the name of the copyright holders nor the names of its 15 | * contributors may be used to endorse or promote products derived from 16 | * this software without specific prior written permission. 17 | 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” 29 | * 30 | ***************************************************************************/ 31 | 32 | 33 | #ifndef __NUCA_H__ 34 | #define __NUCA_H__ 35 | 36 | #include "basic_circuit.h" 37 | #include "component.h" 38 | #include "parameter.h" 39 | #include "assert.h" 40 | #include "cacti_interface.h" 41 | #include "wire.h" 42 | #include "mat.h" 43 | #include "io.h" 44 | #include "router.h" 45 | #include 46 | 47 | 48 | 49 | class nuca_org_t { 50 | public: 51 | ~nuca_org_t(); 52 | // int size; 53 | /* area, power, access time, and cycle time stats */ 54 | Component nuca_pda; 55 | Component bank_pda; 56 | Component wire_pda; 57 | Wire *h_wire; 58 | Wire *v_wire; 59 | Router *router; 60 | /* for particular network configuration 61 | * calculated based on a cycle accurate 62 | * simulation Ref: CACTI 6 - Tech report 63 | */ 64 | double contention; 65 | 66 | /* grid network stats */ 67 | double avg_hops; 68 | int rows; 69 | int columns; 70 | int bank_count; 71 | }; 72 | 73 | 74 | 75 | class Nuca : public Component 76 | { 77 | public: 78 | Nuca( 79 | /*TechnologyParameter::*/DeviceType *dt); 80 | void print_router(); 81 | ~Nuca(); 82 | void sim_nuca(); 83 | void init_cont(); 84 | int calc_cycles(double lat, double oper_freq); 85 | void calculate_nuca_area (nuca_org_t *nuca); 86 | int check_nuca_org (nuca_org_t *n, min_values_t *minval); 87 | nuca_org_t * find_optimal_nuca (list *n, min_values_t *minval); 88 | void print_nuca(nuca_org_t *n); 89 | void print_cont_stats(); 90 | 91 | private: 92 | 93 | /*TechnologyParameter::*/DeviceType *deviceType; 94 | int wt_min, wt_max; 95 | Wire *wire_vertical[WIRE_TYPES], 96 | *wire_horizontal[WIRE_TYPES]; 97 | 98 | }; 99 | 100 | 101 | #endif 102 | -------------------------------------------------------------------------------- /cacti/obj_dbg/TSV.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/TSV.o -------------------------------------------------------------------------------- /cacti/obj_dbg/Ucache.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/Ucache.o -------------------------------------------------------------------------------- /cacti/obj_dbg/arbiter.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/arbiter.o -------------------------------------------------------------------------------- /cacti/obj_dbg/area.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/area.o -------------------------------------------------------------------------------- /cacti/obj_dbg/bank.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/bank.o -------------------------------------------------------------------------------- /cacti/obj_dbg/basic_circuit.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/basic_circuit.o -------------------------------------------------------------------------------- /cacti/obj_dbg/cacti: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/cacti -------------------------------------------------------------------------------- /cacti/obj_dbg/cacti_interface.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/cacti_interface.o -------------------------------------------------------------------------------- /cacti/obj_dbg/component.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/component.o -------------------------------------------------------------------------------- /cacti/obj_dbg/crossbar.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/crossbar.o -------------------------------------------------------------------------------- /cacti/obj_dbg/decoder.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/decoder.o -------------------------------------------------------------------------------- /cacti/obj_dbg/extio.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/extio.o -------------------------------------------------------------------------------- /cacti/obj_dbg/extio_technology.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/extio_technology.o -------------------------------------------------------------------------------- /cacti/obj_dbg/htree2.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/htree2.o -------------------------------------------------------------------------------- /cacti/obj_dbg/io.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/io.o -------------------------------------------------------------------------------- /cacti/obj_dbg/main.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/main.o -------------------------------------------------------------------------------- /cacti/obj_dbg/mat.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/mat.o -------------------------------------------------------------------------------- /cacti/obj_dbg/memcad.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/memcad.o -------------------------------------------------------------------------------- /cacti/obj_dbg/memcad_parameters.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/memcad_parameters.o -------------------------------------------------------------------------------- /cacti/obj_dbg/memorybus.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/memorybus.o -------------------------------------------------------------------------------- /cacti/obj_dbg/nuca.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/nuca.o -------------------------------------------------------------------------------- /cacti/obj_dbg/parameter.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/parameter.o -------------------------------------------------------------------------------- /cacti/obj_dbg/powergating.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/powergating.o -------------------------------------------------------------------------------- /cacti/obj_dbg/router.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/router.o -------------------------------------------------------------------------------- /cacti/obj_dbg/subarray.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/subarray.o -------------------------------------------------------------------------------- /cacti/obj_dbg/technology.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/technology.o -------------------------------------------------------------------------------- /cacti/obj_dbg/uca.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/uca.o -------------------------------------------------------------------------------- /cacti/obj_dbg/wire.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/cacti/obj_dbg/wire.o -------------------------------------------------------------------------------- /cacti/powergating.cc: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * CACTI 7.0 3 | * SOFTWARE LICENSE AGREEMENT 4 | * Copyright 2015 Hewlett-Packard Development Company, L.P. 5 | * All Rights Reserved 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are 9 | * met: redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer; 11 | * redistributions in binary form must reproduce the above copyright 12 | * notice, this list of conditions and the following disclaimer in the 13 | * documentation and/or other materials provided with the distribution; 14 | * neither the name of the copyright holders nor the names of its 15 | * contributors may be used to endorse or promote products derived from 16 | * this software without specific prior written permission. 17 | 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” 29 | * 30 | ***************************************************************************/ 31 | 32 | #include "area.h" 33 | #include "powergating.h" 34 | #include "parameter.h" 35 | #include 36 | #include 37 | #include 38 | 39 | using namespace std; 40 | 41 | //TODO: although DTSN is used,since for memory array, the number of sleep txs 42 | //is related to the number of rows and cols. so All calculations are still base on 43 | //single sleep tx cases 44 | 45 | Sleep_tx::Sleep_tx( 46 | double _perf_with_sleep_tx, 47 | double _active_Isat,//of circuit block, not sleep tx 48 | bool _is_footer, 49 | double _c_circuit_wakeup, 50 | double _V_delta, 51 | int _num_sleep_tx, 52 | // double _vt_circuit, 53 | // double _vt_sleep_tx, 54 | // double _mobility,//of sleep tx 55 | // double _c_ox,//of sleep tx 56 | const Area & cell_) 57 | :perf_with_sleep_tx(_perf_with_sleep_tx), 58 | active_Isat(_active_Isat), 59 | is_footer(_is_footer), 60 | c_circuit_wakeup(_c_circuit_wakeup), 61 | V_delta(_V_delta), 62 | num_sleep_tx(_num_sleep_tx), 63 | // vt_circuit(_vt_circuit), 64 | // vt_sleep_tx(_vt_sleep_tx), 65 | // mobility(_mobility), 66 | // c_ox(_c_ox) 67 | cell(cell_), 68 | is_sleep_tx(true) 69 | { 70 | 71 | //a single sleep tx in a network 72 | double raw_area, raw_width, raw_hight; 73 | double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(false, false, true); 74 | vdd = g_tp.peri_global.Vdd; 75 | vt_circuit = g_tp.peri_global.Vth; 76 | vt_sleep_tx = g_tp.sleep_tx.Vth; 77 | mobility = g_tp.sleep_tx.Mobility_n; 78 | c_ox = g_tp.sleep_tx.C_ox; 79 | 80 | width = active_Isat/(perf_with_sleep_tx*mobility*c_ox*(vdd-vt_circuit)*(vdd-vt_sleep_tx))*g_ip->F_sz_um;//W/L uses physical numbers 81 | width /= num_sleep_tx; 82 | 83 | raw_area = compute_gate_area(INV, 1, width, p_to_n_sz_ratio*width, cell.w*2)/2; //Only single device, assuming device is laide on the side 84 | raw_width = cell.w; 85 | raw_hight = raw_area/cell.w; 86 | area.set_h(raw_hight); 87 | area.set_w(raw_width); 88 | 89 | compute_penalty(); 90 | 91 | } 92 | 93 | double Sleep_tx::compute_penalty() 94 | { 95 | //V_delta = VDD - VCCmin nothing to do with threshold of sleep tx. Although it might be OK to use sleep tx to control the V_delta 96 | // double c_load; 97 | double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(false, false, true); 98 | 99 | if (is_footer) 100 | { 101 | c_intrinsic_sleep = drain_C_(width, NCH, 1, 1, area.h, false, false, false,is_sleep_tx); 102 | // V_delta = _V_delta; 103 | wakeup_delay = (c_circuit_wakeup + c_intrinsic_sleep)*V_delta/(simplified_nmos_Isat(width, false, false, false,is_sleep_tx)/Ilinear_to_Isat_ratio); 104 | wakeup_power.readOp.dynamic = (c_circuit_wakeup + c_intrinsic_sleep)*g_tp.sram_cell.Vdd*V_delta; 105 | //no 0.5 because the half of the energy spend in entering sleep and half of the energy will be spent in waking up. And they are pairs 106 | } 107 | else 108 | { 109 | c_intrinsic_sleep = drain_C_(width*p_to_n_sz_ratio, PCH, 1, 1, area.h, false, false, false,is_sleep_tx); 110 | // V_delta = _V_delta; 111 | wakeup_delay = (c_circuit_wakeup + c_intrinsic_sleep)*V_delta/(simplified_pmos_Isat(width, false, false, false,is_sleep_tx)/Ilinear_to_Isat_ratio); 112 | wakeup_power.readOp.dynamic = (c_circuit_wakeup + c_intrinsic_sleep)*g_tp.sram_cell.Vdd*V_delta; 113 | } 114 | 115 | return wakeup_delay; 116 | 117 | /* 118 | The number of cycles in the wake-up latency set the constraint on the 119 | minimum number of idle clock cycles needed before a processor 120 | can enter in the corresponding sleep mode without any wakeup 121 | overhead. 122 | 123 | If the circuit is half way to sleep then waken up, it is still OK 124 | just the wakeup latency will be shorter than the wakeup time from full asleep. 125 | So, the sleep time and energy does not matter 126 | */ 127 | 128 | } 129 | 130 | -------------------------------------------------------------------------------- /cacti/powergating.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * CACTI 7.0 3 | * SOFTWARE LICENSE AGREEMENT 4 | * Copyright 2015 Hewlett-Packard Development Company, L.P. 5 | * All Rights Reserved 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are 9 | * met: redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer; 11 | * redistributions in binary form must reproduce the above copyright 12 | * notice, this list of conditions and the following disclaimer in the 13 | * documentation and/or other materials provided with the distribution; 14 | * neither the name of the copyright holders nor the names of its 15 | * contributors may be used to endorse or promote products derived from 16 | * this software without specific prior written permission. 17 | 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” 29 | * 30 | ***************************************************************************/ 31 | 32 | #ifndef POWERGATING_H_ 33 | #define POWERGATING_H_ 34 | 35 | #include "component.h" 36 | 37 | class Sleep_tx : public Component 38 | { 39 | public: 40 | Sleep_tx( 41 | double _perf_with_sleep_tx, 42 | double _active_Isat,//of circuit block, not sleep tx 43 | bool _is_footer, 44 | double _c_circuit_wakeup, 45 | double _V_delta, 46 | int _num_sleep_tx, 47 | // double _vt_circuit, 48 | // double _vt_sleep_tx, 49 | // double _mobility,//of sleep tx 50 | // double _c_ox,//of sleep tx 51 | const Area & cell_); 52 | 53 | double perf_with_sleep_tx; 54 | double active_Isat; 55 | bool is_footer; 56 | 57 | double vt_circuit; 58 | double vt_sleep_tx; 59 | double vdd;// of circuit block not sleep tx 60 | double mobility;//of sleep tx 61 | double c_ox; 62 | double width; 63 | double c_circuit_wakeup; 64 | double c_intrinsic_sleep; 65 | double delay, wakeup_delay; 66 | powerDef power, wakeup_power; 67 | // double c_circuit_sleep; 68 | // double sleep_delay; 69 | // powerDef sleep_power; 70 | double V_delta; 71 | 72 | int num_sleep_tx; 73 | 74 | const Area & cell; 75 | bool is_sleep_tx; 76 | 77 | 78 | 79 | // void compute_area(); 80 | double compute_penalty(); // return outrisetime 81 | 82 | void leakage_feedback(double temperature){}; 83 | ~Sleep_tx(){}; 84 | }; 85 | 86 | #endif /* POWERGATING_H_ */ 87 | -------------------------------------------------------------------------------- /cacti/regression.test: -------------------------------------------------------------------------------- 1 | cache 4 types 2 | ./cacti -infile test_configs/cache1.cfg #L1 2-way 32K 3 | ./cacti -infile test_configs/cache2.cfg #L2 4-way 256K 4 | ./cacti -infile test_configs/cache3.cfg #L3 8-way 16M 5 | ./cacti -infile test_configs/cache4.cfg #L1 full-asso 4K with single search port 6 | RAM 4 types 7 | ./cacti -infile test_configs/ram1.cfg # 16M 8 | ./cacti -infile test_configs/ram2.cfg # itrs-hp itrs-lstp 9 | ./cacti -infile test_configs/ram3.cfg # two banks no-ecc 128M 10 | ./cacti -infile test_configs/ram4.cfg # 32K 2-way 11 | CAM 4 types 12 | ./cacti -infile test_configs/cam1.cfg # same as ram1 but ram->cam and full-asso 13 | ./cacti -infile test_configs/cam2.cfg # same as cam1 with line size = 128 14 | ./cacti -infile test_configs/cam3.cfg # cam1 for 40nm technology 15 | ./cacti -infile test_configs/cam4.cfg # ca1 with exclusive read and write port 16 | NUCA 4 types 17 | ./cacti -infile test_configs/nuca1.cfg # 18 | ./cacti -infile test_configs/nuca2.cfg 19 | ./cacti -infile test_configs/nuca3.cfg 20 | ./cacti -infile test_configs/nuca3.cfg 21 | eDRAM 4 types 22 | ./cacti -infile test_configs/edram1.cfg # 23 | ./cacti -infile test_configs/edram2.cfg 24 | ./cacti -infile test_configs/edram3.cfg 25 | ./cacti -infile test_configs/edram4.cfg 26 | DRAM 4 types 27 | ./cacti -infile test_configs/dram1.cfg # 28 | ./cacti -infile test_configs/dram2.cfg 29 | ./cacti -infile test_configs/dram3.cfg 30 | ./cacti -infile test_configs/dram4.cfg 31 | IO 4 different parameters 32 | ./cacti -infile test_configs/io1.cfg # 33 | ./cacti -infile test_configs/io2.cfg 34 | ./cacti -infile test_configs/io3.cfg 35 | ./cacti -infile test_configs/io4.cfg 36 | Power gating 4 types 37 | ./cacti -infile test_configs/power_gate1.cfg 38 | ./cacti -infile test_configs/power_gate2.cfg 39 | ./cacti -infile test_configs/power_gate3.cfg 40 | ./cacti -infile test_configs/power_gate4.cfg 41 | 3D 4 types 42 | ./cacti -infile test_configs/3D1.cfg 43 | ./cacti -infile test_configs/3D2.cfg 44 | ./cacti -infile test_configs/3D3.cfg 45 | ./cacti -infile test_configs/3D4.cfg -------------------------------------------------------------------------------- /cacti/router.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * CACTI 7.0 3 | * SOFTWARE LICENSE AGREEMENT 4 | * Copyright 2015 Hewlett-Packard Development Company, L.P. 5 | * All Rights Reserved 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are 9 | * met: redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer; 11 | * redistributions in binary form must reproduce the above copyright 12 | * notice, this list of conditions and the following disclaimer in the 13 | * documentation and/or other materials provided with the distribution; 14 | * neither the name of the copyright holders nor the names of its 15 | * contributors may be used to endorse or promote products derived from 16 | * this software without specific prior written permission. 17 | 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” 29 | * 30 | ***************************************************************************/ 31 | 32 | 33 | 34 | #ifndef __ROUTER_H__ 35 | #define __ROUTER_H__ 36 | 37 | #include 38 | #include 39 | #include "basic_circuit.h" 40 | #include "cacti_interface.h" 41 | #include "component.h" 42 | #include "mat.h" 43 | #include "parameter.h" 44 | #include "wire.h" 45 | #include "crossbar.h" 46 | #include "arbiter.h" 47 | 48 | 49 | 50 | class Router : public Component 51 | { 52 | public: 53 | Router( 54 | double flit_size_, 55 | double vc_buf, /* vc size = vc_buffer_size * flit_size */ 56 | double vc_count, 57 | /*TechnologyParameter::*/DeviceType *dt = &(g_tp.peri_global), 58 | double I_ = 5, 59 | double O_ = 5, 60 | double M_ = 0.6); 61 | ~Router(); 62 | 63 | 64 | void print_router(); 65 | 66 | Component arbiter, crossbar, buffer; 67 | 68 | double cycle_time, max_cyc; 69 | double flit_size; 70 | double vc_count; 71 | double vc_buffer_size; /* vc size = vc_buffer_size * flit_size */ 72 | 73 | private: 74 | /*TechnologyParameter::*/DeviceType *deviceType; 75 | double FREQUENCY; // move this to config file --TODO 76 | double Cw3(double len); 77 | double gate_cap(double w); 78 | double diff_cap(double w, int type /*0 for n-mos and 1 for p-mos*/, double stack); 79 | enum Wire_type wtype; 80 | enum Wire_placement wire_placement; 81 | //corssbar 82 | double NTtr, PTtr, wt, ht, I, O, NTi, PTi, NTid, PTid, NTod, PTod, TriS1, TriS2; 83 | double M; //network load 84 | double transmission_buf_inpcap(); 85 | double transmission_buf_outcap(); 86 | double transmission_buf_ctrcap(); 87 | double crossbar_inpline(); 88 | double crossbar_outline(); 89 | double crossbar_ctrline(); 90 | double tr_crossbar_power(); 91 | void cb_stats (); 92 | double arb_power(); 93 | void arb_stats (); 94 | double buffer_params(); 95 | void buffer_stats(); 96 | 97 | 98 | //arbiter 99 | 100 | //buffer 101 | 102 | //router params 103 | double Vdd; 104 | 105 | void calc_router_parameters(); 106 | void get_router_area(); 107 | void get_router_power(); 108 | void get_router_delay(); 109 | 110 | double min_w_pmos; 111 | 112 | 113 | }; 114 | 115 | #endif 116 | -------------------------------------------------------------------------------- /cacti/subarray.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * CACTI 7.0 3 | * SOFTWARE LICENSE AGREEMENT 4 | * Copyright 2015 Hewlett-Packard Development Company, L.P. 5 | * All Rights Reserved 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are 9 | * met: redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer; 11 | * redistributions in binary form must reproduce the above copyright 12 | * notice, this list of conditions and the following disclaimer in the 13 | * documentation and/or other materials provided with the distribution; 14 | * neither the name of the copyright holders nor the names of its 15 | * contributors may be used to endorse or promote products derived from 16 | * this software without specific prior written permission. 17 | 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” 29 | * 30 | ***************************************************************************/ 31 | 32 | 33 | 34 | #ifndef __SUBARRAY_H__ 35 | #define __SUBARRAY_H__ 36 | 37 | #include "area.h" 38 | #include "component.h" 39 | #include "parameter.h" 40 | 41 | using namespace std; 42 | 43 | 44 | class Subarray : public Component 45 | { 46 | public: 47 | Subarray(const DynamicParameter & dp, bool is_fa_); 48 | ~Subarray(); 49 | 50 | const DynamicParameter & dp; 51 | double get_total_cell_area(); 52 | unsigned int num_rows; 53 | unsigned int num_cols; 54 | int32_t num_cols_fa_cam; 55 | int32_t num_cols_fa_ram; 56 | Area cell, cam_cell; 57 | 58 | bool is_fa; 59 | double C_wl, C_wl_cam, C_wl_ram; 60 | double R_wl, R_wl_cam, R_wl_ram; 61 | double C_bl, C_bl_cam; 62 | private: 63 | 64 | void compute_C(); // compute bitline and wordline capacitance 65 | }; 66 | 67 | 68 | 69 | #endif 70 | 71 | -------------------------------------------------------------------------------- /cacti/tech_params/16nm.dat: -------------------------------------------------------------------------------- 1 | Invalid technology nodes 2 | -------------------------------------------------------------------------------- /cacti/tech_params/180nm.dat: -------------------------------------------------------------------------------- 1 | parameters (unit) hp lstp lop lp-dram comm-dram 2 | -C_g_ideal (F/um) 1.328e-15 0 0 0 0 3 | -C_fringe (F/um) 1.6e-16 0 0 0 0 4 | -C_junc (F/um^2) 2e-15 0 0 0 0 5 | -C_junc_sw (F/um^2) 2.5e-16 2.5e-16 2.5e-16 2.5e-16 2.5e-16 6 | -l_phy (um) 0.12 0 0 0 0 7 | -l_elec (um) 0.1 0 0 0 0 8 | -nmos_effective_resistance_multiplier (-) 1.54 0 0 0 0 9 | -Vdd (V) 1.5 0 0 0 0 10 | -Vth (V) 0.4407 0 0 0 0 11 | -Vdsat (V) 0.256 0 0 0 0 12 | -I_on_n (A/um) 0.00075 0 0 0 0 13 | -I_on_p (A/um) 0.00035 0 0 0 0 14 | 15 | 16 | 17 | parameters (unit) temp hp lstp lop lp-dram comm-dram 18 | -I_off_n (A/um) 0 7e-10 0 0 0 0 19 | -I_off_n (A/um) 10 8.26e-10 0 0 0 0 20 | -I_off_n (A/um) 20 9.74e-10 0 0 0 0 21 | -I_off_n (A/um) 30 1.15e-09 0 0 0 0 22 | -I_off_n (A/um) 40 1.35e-09 0 0 0 0 23 | -I_off_n (A/um) 50 1.6e-09 0 0 0 0 24 | -I_off_n (A/um) 60 1.88e-09 0 0 0 0 25 | -I_off_n (A/um) 70 2.29e-09 0 0 0 0 26 | -I_off_n (A/um) 80 2.7e-09 0 0 0 0 27 | -I_off_n (A/um) 90 3.19e-09 0 0 0 0 28 | -I_off_n (A/um) 100 3.76e-09 0 0 0 0 29 | -I_g_on_n (A/um) 0 1.65e-10 0 0 0 0 30 | -I_g_on_n (A/um) 10 1.65e-10 0 0 0 0 31 | -I_g_on_n (A/um) 20 1.65e-10 0 0 0 0 32 | -I_g_on_n (A/um) 30 1.65e-10 0 0 0 0 33 | -I_g_on_n (A/um) 40 1.65e-10 0 0 0 0 34 | -I_g_on_n (A/um) 50 1.65e-10 0 0 0 0 35 | -I_g_on_n (A/um) 60 1.65e-10 0 0 0 0 36 | -I_g_on_n (A/um) 70 1.65e-10 0 0 0 0 37 | -I_g_on_n (A/um) 80 1.65e-10 0 0 0 0 38 | -I_g_on_n (A/um) 90 1.65e-10 0 0 0 0 39 | -I_g_on_n (A/um) 100 1.65e-10 0 0 0 0 40 | 41 | 42 | parameters (unit) hp lstp lop lp-dram comm-dram 43 | -C_ox (F/um^2) 3.58e-14 0 0 0 0 44 | -t_ox (um) 0.0024 0 0 0 0 45 | -n2p_drv_rt (-) 2.45 0 0 0 0 46 | -lch_lk_rdc (-) 1 0 0 0 0 47 | -Mobility_n (um^2/V.sec) 3.0216e+10 0 0 0 0 48 | -gmp_to_gmn_multiplier (-) 1.22 0 0 0 0 49 | -vpp (V) 0 0 0 0 0 50 | 51 | SRAM 52 | parameters cell_type hp lstp lop lp-dram comm-dram 53 | -Wmemcella (um) 0 1.31 1.31 1.31 1.31 1.31 54 | -Wmemcellpmos (um) 0 1.23 1.23 1.23 1.23 1.23 55 | -Wmemcellnmos (um) 0 2.08 2.08 2.08 2.08 2.08 56 | -area_cell (um^2) 0 146 146 146 146 146 57 | -asp_ratio_cell (-) 0 1.46 1.46 1.46 1.46 1.46 58 | 59 | CAM 60 | parameters cell_type hp lstp lop lp-dram comm-dram 61 | -Wmemcella (um) 1 1.31 1.31 1.31 1.31 1.31 62 | -Wmemcellpmos (um) 1 1.23 1.23 1.23 1.23 1.23 63 | -Wmemcellnmos (um) 1 2.08 2.08 2.08 2.08 2.08 64 | -area_cell (um^2) 1 292 292 292 292 292 65 | -asp_ratio_cell (-) 1 2.92 2.92 2.92 2.92 2.92 66 | 67 | DRAM 68 | parameters cell_type hp lstp lop lp-dram comm-dram 69 | -vdd_cell (V) 2 0 0 0 0 0 70 | -Wmemcella (um) 2 0 0 0 0 0 71 | -Wmemcellpmos (um) 2 0 0 0 0 0 72 | -Wmemcellnmos (um) 2 0 0 0 0 0 73 | -area_cell (um^2) 2 0 0 0 0 0 74 | -asp_ratio_cell (-) 2 0 0 0 0 0 75 | 76 | parameters hp lstp lop lp-dram comm-dram 77 | -dram_cell_I_on (A/um) 0 0 0 0 0 78 | -dram_cell_Vdd (V) 0 0 0 0 0 79 | -dram_cell_C (F) 0 0 0 0 0 80 | -dram_cell_I_off_worst_case_len_temp (A/um) 0 0 0 0 0 81 | 82 | 83 | -logic_scaling_co_eff (-) 1.5 84 | -core_tx_density (1/um^2) 0.245 85 | -sckt_co_eff (-) 1.11 86 | -chip_layout_overhead (-) 1 87 | -macro_layout_overhead (-) 1 88 | -sense_delay (sec) 2.8e-10 89 | -sense_dy_power (J) 1.47e-14 90 | 91 | parameters 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 92 | -wire_pitch (um) 2.5 4 8 0 2.5 4 8 2 93 | -barrier_thickness (um) 0.017 0.017 0.017 0 0.017 0.017 0.017 0 94 | -dishing_thickness (um) 0 0 0 0 0 0 0.1584 0 95 | -alpha_scatter (-) 1 1 1 0 1 1 1 0 96 | -aspect_ratio (-) 2 2.4 2.2 0 2 2 2.2 0 97 | -miller_value (-) 1.5 1.5 1.5 0 1.5 1.5 1.5 0 98 | -horiz_dielectric_constant (-) 2.709 2.709 2.709 0 3.038 3.038 3.038 0 99 | -vert_dielectric_constant (-) 3.9 3.9 3.9 0 3.9 3.9 3.9 0 100 | -ild_thickness (um) 0.75 0.75 1.5 0 0.75 0.75 1.98 0 101 | -fringe_cap (F/um) 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 102 | -resistivity (u-ohm.m) 0.022 0.022 0.022 0.022 0.022 0.022 0.022 0.022 103 | 104 | parameters 0/0 0/1 0/2 1/0 1/1 1/2 105 | -wire_r_per_micron (ohm/um) 0 0 0 0 0 0 0 66.6667 106 | -wire_c_per_micron (F/um) 0 0 0 0 0 0 0 6.51042e-16 107 | -tsv_pitch (um) 0 0 0 0 0 0 108 | -tsv_diameter (um) 0 0 0 0 0 0 109 | -tsv_length (um) 0 0 0 0 0 0 110 | -tsv_dielec_thickness (um) 0 0 0 0 0 0 111 | -tsv_contact_resistance (ohm) 0 0 0 0 0 0 112 | -tsv_depletion_width (um) 0 0 0 0 0 0 113 | -tsv_liner_dielectric_cons (-) 0 0 0 0 0 0 114 | -------------------------------------------------------------------------------- /cacti/tech_params/22nm.dat: -------------------------------------------------------------------------------- 1 | parameters (unit) hp lstp lop lp-dram comm-dram 2 | -C_g_ideal (F/um) 3.27e-16 3.22e-16 3.16e-16 0 1.99e-16 3 | -C_fringe (F/um) 6e-17 8e-17 8e-17 0 5.3e-17 4 | -C_junc (F/um^2) 0 0 0 0 1e-15 5 | -C_junc_sw (F/um^2) 2.5e-16 2.5e-16 2.5e-16 2.5e-16 2.5e-16 6 | -l_phy (um) 0.009 0.014 0.011 0 0.022 7 | -l_elec (um) 0.00468 0.008 0.00604 0 0.0181 8 | -nmos_effective_resistance_multiplier (-) 1.45 1.99 1.73 0 1.69 9 | -Vdd (V) 0.8 0.8 0.6 0 0.9 10 | -Vth (V) 0.1395 0.40126 0.2315 0 1 11 | -Vdsat (V) 0.0233 0.0664 0.0181 0 0.0972 12 | -I_on_n (A/um) 0.0026264 0.0007276 0.0009161 0 0.0009105 13 | -I_on_p (A/um) 0.0013132 0.0003638 0.00045805 0 0.00045525 14 | 15 | 16 | 17 | parameters (unit) temp hp lstp lop lp-dram comm-dram 18 | -I_off_n (A/um) 0 1.216e-07 2.43e-11 1.31e-08 0 1.1e-13 19 | -I_off_n (A/um) 10 1.24e-07 4.85e-11 2.6e-08 0 2.11e-13 20 | -I_off_n (A/um) 20 1.272e-07 9.68e-11 5.14e-08 0 3.88e-13 21 | -I_off_n (A/um) 30 1.344e-07 1.94e-10 1.02e-07 0 6.9e-13 22 | -I_off_n (A/um) 40 1.52e-07 3.87e-10 2.02e-07 0 1.19e-12 23 | -I_off_n (A/um) 50 2.152e-07 7.73e-10 3.99e-07 0 1.98e-12 24 | -I_off_n (A/um) 60 4.256e-07 3.55e-10 7.91e-07 0 3.22e-12 25 | -I_off_n (A/um) 70 8.16e-07 3.09e-09 1.09e-06 0 5.09e-12 26 | -I_off_n (A/um) 80 1.296e-06 6.19e-09 2.09e-06 0 7.85e-12 27 | -I_off_n (A/um) 90 2.184e-06 1.24e-08 4.04e-06 0 1.18e-11 28 | -I_off_n (A/um) 100 4.88e-06 2.48e-08 4.48e-06 0 1.72e-11 29 | -I_g_on_n (A/um) 0 1.81e-09 4.51e-10 2.74e-09 0 0 30 | -I_g_on_n (A/um) 10 1.81e-09 4.51e-10 2.74e-09 0 0 31 | -I_g_on_n (A/um) 20 1.81e-09 4.51e-10 2.74e-09 0 0 32 | -I_g_on_n (A/um) 30 1.81e-09 4.51e-10 2.74e-09 0 0 33 | -I_g_on_n (A/um) 40 1.81e-09 4.51e-10 2.74e-09 0 0 34 | -I_g_on_n (A/um) 50 1.81e-09 4.51e-10 2.74e-09 0 0 35 | -I_g_on_n (A/um) 60 1.81e-09 4.51e-10 2.74e-09 0 0 36 | -I_g_on_n (A/um) 70 1.81e-09 4.51e-10 2.74e-09 0 0 37 | -I_g_on_n (A/um) 80 1.81e-09 4.51e-10 2.74e-09 0 0 38 | -I_g_on_n (A/um) 90 1.81e-09 4.51e-10 2.74e-09 0 0 39 | -I_g_on_n (A/um) 100 1.81e-09 4.51e-10 2.74e-09 0 0 40 | 41 | 42 | parameters (unit) hp lstp lop lp-dram comm-dram 43 | -C_ox (F/um^2) 3.63e-14 2.3e-14 2.87e-14 0 9.06e-15 44 | -t_ox (um) 0.00055 0.0011 0.0008 0 0.0035 45 | -n2p_drv_rt (-) 2 2 2 0 1.95 46 | -lch_lk_rdc (-) 0.305437 0.529101 0.420168 0 1 47 | -Mobility_n (um^2/V.sec) 4.2607e+10 7.3809e+10 6.9837e+10 0 3.6729e+10 48 | -gmp_to_gmn_multiplier (-) 1.38 0.99 1.11 0 0.9 49 | -vpp (V) 0 0 0 0 2.3 50 | 51 | SRAM 52 | parameters cell_type hp lstp lop lp-dram comm-dram 53 | -Wmemcella (um) 0 1.31 1.31 1.31 1.31 1.31 54 | -Wmemcellpmos (um) 0 1.23 1.23 1.23 1.23 1.23 55 | -Wmemcellnmos (um) 0 2.08 2.08 2.08 2.08 2.08 56 | -area_cell (um^2) 0 146 146 146 146 146 57 | -asp_ratio_cell (-) 0 1.46 1.46 1.46 1.46 1.46 58 | 59 | CAM 60 | parameters cell_type hp lstp lop lp-dram comm-dram 61 | -Wmemcella (um) 1 1.31 1.31 1.31 1.31 1.31 62 | -Wmemcellpmos (um) 1 1.23 1.23 1.23 1.23 1.23 63 | -Wmemcellnmos (um) 1 2.08 2.08 2.08 2.08 2.08 64 | -area_cell (um^2) 1 292 292 292 292 292 65 | -asp_ratio_cell (-) 1 2.92 2.92 2.92 2.92 2.92 66 | 67 | DRAM 68 | parameters cell_type hp lstp lop lp-dram comm-dram 69 | -vdd_cell (V) 2 0 0 0 0 0 70 | -Wmemcella (um) 2 0 0 0 0 0.022 71 | -Wmemcellpmos (um) 2 0 0 0 0 0 72 | -Wmemcellnmos (um) 2 0 0 0 0 0 73 | -area_cell (um^2) 2 0 0 0 0 0.001936 74 | -asp_ratio_cell (-) 2 0 0 0 0 1 75 | 76 | parameters hp lstp lop lp-dram comm-dram 77 | -dram_cell_I_on (A/um) 0 0 0 0 2e-05 78 | -dram_cell_Vdd (V) 0 0 0 0 0.9 79 | -dram_cell_C (F) 0 0 0 0 3e-14 80 | -dram_cell_I_off_worst_case_len_temp (A/um) 0 0 0 0 1e-15 81 | 82 | 83 | -logic_scaling_co_eff (-) 0.2401 84 | -core_tx_density (1/um^2) 2.55102 85 | -sckt_co_eff (-) 1.1296 86 | -chip_layout_overhead (-) 1.2 87 | -macro_layout_overhead (-) 1.1 88 | -sense_delay (sec) 3e-11 89 | -sense_dy_power (J) 2.16e-15 90 | 91 | parameters 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 92 | -wire_pitch (um) 2.5 4 8 0 2.5 4 8 2 93 | -barrier_thickness (um) 0 0 0 0 0.003 0.003 0.003 0 94 | -dishing_thickness (um) 0 0 0 0 0 0 0.01936 0 95 | -alpha_scatter (-) 1 1 1 0 1.05 1.05 1.05 0 96 | -aspect_ratio (-) 3 3 3 0 2 2 2.2 0 97 | -miller_value (-) 1.5 1.5 1.5 0 1.5 1.5 1.5 0 98 | -horiz_dielectric_constant (-) 1.414 1.414 1.414 0 2.104 2.104 2.104 0 99 | -vert_dielectric_constant (-) 3.9 3.9 3.9 0 3.9 3.9 3.9 0 100 | -ild_thickness (um) 0.15 0.15 0.3 0 0.15 0.15 0.275 0 101 | -fringe_cap (F/um) 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 102 | -resistivity (u-ohm.m) 0.018 0.018 0.018 0.018 0.022 0.022 0.022 0.022 103 | 104 | parameters 0/0 0/1 0/2 1/0 1/1 1/2 105 | -wire_r_per_micron (ohm/um) 0 0 0 0 0 0 0 545.455 106 | -wire_c_per_micron (F/um) 0 0 0 0 0 0 0 2.75213e-15 107 | -tsv_pitch (um) 0.8 40 0 1.5 9 0 108 | -tsv_diameter (um) 0.4 7.5 0 0.8 4.5 0 109 | -tsv_length (um) 4 50 0 10 25 0 110 | -tsv_dielec_thickness (um) 0.1 0.2 0 0.1 0.1 0 111 | -tsv_contact_resistance (ohm) 0.1 0.2 0 0.1 0.1 0 112 | -tsv_depletion_width (um) 0.6 0.6 0 0.6 0.6 0 113 | -tsv_liner_dielectric_cons (-) 1.414 1.414 0 2.104 2.104 0 114 | -------------------------------------------------------------------------------- /cacti/tech_params/32nm.dat: -------------------------------------------------------------------------------- 1 | parameters (unit) hp lstp lop lp-dram comm-dram 2 | -C_g_ideal (F/um) 5.34e-16 4.58e-16 4.54e-16 7.45e-16 2.56e-16 3 | -C_fringe (F/um) 4e-17 5.3e-17 5.7e-17 5.3e-17 5.3e-17 4 | -C_junc (F/um^2) 1e-15 1e-15 1e-15 1e-15 1e-15 5 | -C_junc_sw (F/um^2) 2.5e-16 2.5e-16 2.5e-16 2.5e-16 2.5e-16 6 | -l_phy (um) 0.013 0.02 0.016 0.056 0.032 7 | -l_elec (um) 0.01013 0.0173 0.01232 0.0419 0.0205 8 | -nmos_effective_resistance_multiplier (-) 1.49 1.99 1.73 1.65 1.69 9 | -Vdd (V) 0.9 1 0.6 1 1 10 | -Vth (V) 0.21835 0.513 0.24227 0.44467 1 11 | -Vdsat (V) 0.0509 0.0864 0.0464 0.174 0.129 12 | -I_on_n (A/um) 0.0022117 0.0006836 0.0008278 0.0010554 0.0010245 13 | -I_on_p (A/um) 0.00110585 0.0003418 0.0004139 0.0005277 0.00051225 14 | 15 | 16 | 17 | parameters (unit) temp hp lstp lop lp-dram comm-dram 18 | -I_off_n (A/um) 0 1.52e-07 2.06e-11 5.94e-08 3.57e-11 3.63e-14 19 | -I_off_n (A/um) 10 1.55e-07 3.3e-11 7.23e-08 5.51e-11 7.18e-14 20 | -I_off_n (A/um) 20 1.59e-07 5.15e-11 8.7e-08 8.27e-11 1.36e-13 21 | -I_off_n (A/um) 30 1.68e-07 7.83e-11 1.04e-07 1.21e-10 2.49e-13 22 | -I_off_n (A/um) 40 1.9e-07 1.16e-10 1.22e-07 1.74e-10 4.41e-13 23 | -I_off_n (A/um) 50 2.69e-07 1.69e-10 1.43e-07 2.45e-10 7.55e-13 24 | -I_off_n (A/um) 60 5.32e-07 2.4e-10 1.65e-07 3.38e-10 1.26e-12 25 | -I_off_n (A/um) 70 1.02e-06 3.34e-10 1.9e-07 4.53e-10 2.03e-12 26 | -I_off_n (A/um) 80 1.62e-06 4.54e-10 2.15e-07 5.87e-10 3.19e-12 27 | -I_off_n (A/um) 90 2.73e-06 5.96e-10 2.39e-07 7.29e-10 4.87e-12 28 | -I_off_n (A/um) 100 6.1e-06 7.44e-10 2.63e-07 8.87e-10 7.16e-12 29 | -I_g_on_n (A/um) 0 6.55e-08 3.73e-11 2.93e-09 0 0 30 | -I_g_on_n (A/um) 10 6.55e-08 3.73e-11 2.93e-09 0 0 31 | -I_g_on_n (A/um) 20 6.55e-08 3.73e-11 2.93e-09 0 0 32 | -I_g_on_n (A/um) 30 6.55e-08 3.73e-11 2.93e-09 0 0 33 | -I_g_on_n (A/um) 40 6.55e-08 3.73e-11 2.93e-09 0 0 34 | -I_g_on_n (A/um) 50 6.55e-08 3.73e-11 2.93e-09 0 0 35 | -I_g_on_n (A/um) 60 6.55e-08 3.73e-11 2.93e-09 0 0 36 | -I_g_on_n (A/um) 70 6.55e-08 3.73e-11 2.93e-09 0 0 37 | -I_g_on_n (A/um) 80 6.55e-08 3.73e-11 2.93e-09 0 0 38 | -I_g_on_n (A/um) 90 6.55e-08 3.73e-11 2.93e-09 0 0 39 | -I_g_on_n (A/um) 100 6.55e-08 3.73e-11 2.93e-09 0 0 40 | 41 | 42 | parameters (unit) hp lstp lop lp-dram comm-dram 43 | -C_ox (F/um^2) 4.11e-14 2.29e-14 2.84e-14 1.48e-14 7.99e-15 44 | -t_ox (um) 0.0005 0.0012 0.0009 0.002 0.004 45 | -n2p_drv_rt (-) 2.41 2.23 2.28 2.05 1.95 46 | -lch_lk_rdc (-) 0.269833 0.518135 0.529101 1 1 47 | -Mobility_n (um^2/V.sec) 3.6184e+10 3.4746e+10 5.1352e+10 4.0812e+10 3.8076e+10 48 | -gmp_to_gmn_multiplier (-) 1.38 0.99 1.11 0.9 0.9 49 | -vpp (V) 0 0 0 1.5 2.6 50 | 51 | SRAM 52 | parameters cell_type hp lstp lop lp-dram comm-dram 53 | -Wmemcella (um) 0 1.31 1.31 1.31 1.31 1.31 54 | -Wmemcellpmos (um) 0 1.23 1.23 1.23 1.23 1.23 55 | -Wmemcellnmos (um) 0 2.08 2.08 2.08 2.08 2.08 56 | -area_cell (um^2) 0 146 146 146 146 146 57 | -asp_ratio_cell (-) 0 1.46 1.46 1.46 1.46 1.46 58 | 59 | CAM 60 | parameters cell_type hp lstp lop lp-dram comm-dram 61 | -Wmemcella (um) 1 1.31 1.31 1.31 1.31 1.31 62 | -Wmemcellpmos (um) 1 1.23 1.23 1.23 1.23 1.23 63 | -Wmemcellnmos (um) 1 2.08 2.08 2.08 2.08 2.08 64 | -area_cell (um^2) 1 292 292 292 292 292 65 | -asp_ratio_cell (-) 1 2.92 2.92 2.92 2.92 2.92 66 | 67 | DRAM 68 | parameters cell_type hp lstp lop lp-dram comm-dram 69 | -vdd_cell (V) 2 0 0 0 1 1 70 | -Wmemcella (um) 2 0 0 0 0.056 0.032 71 | -Wmemcellpmos (um) 2 0 0 0 0 0 72 | -Wmemcellnmos (um) 2 0 0 0 0 0 73 | -area_cell (um^2) 2 0 0 0 0.03136 0.006144 74 | -asp_ratio_cell (-) 2 0 0 0 1.46 1.5 75 | 76 | parameters hp lstp lop lp-dram comm-dram 77 | -dram_cell_I_on (A/um) 0 0 0 3.6e-05 2e-05 78 | -dram_cell_Vdd (V) 0 0 0 1 1 79 | -dram_cell_C (F) 0 0 0 2e-14 3e-14 80 | -dram_cell_I_off_worst_case_len_temp (A/um) 0 0 0 1.89e-11 1e-15 81 | 82 | 83 | -logic_scaling_co_eff (-) 0.343 84 | -core_tx_density (1/um^2) 1.78571 85 | -sckt_co_eff (-) 1.1111 86 | -chip_layout_overhead (-) 1.2 87 | -macro_layout_overhead (-) 1.1 88 | -sense_delay (sec) 3e-11 89 | -sense_dy_power (J) 2.16e-15 90 | 91 | parameters 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 92 | -wire_pitch (um) 2.5 4 8 0 2.5 4 8 2 93 | -barrier_thickness (um) 0 0 0 0 0.003 0.003 0.003 0 94 | -dishing_thickness (um) 0 0 0 0 0 0 0.02816 0 95 | -alpha_scatter (-) 1 1 1 0 1 1 1 0 96 | -aspect_ratio (-) 3 3 3 0 2 2 2.2 0 97 | -miller_value (-) 1.5 1.5 1.5 0 1.5 1.5 1.5 0 98 | -horiz_dielectric_constant (-) 1.664 1.664 1.664 0 2.214 2.214 2.214 0 99 | -vert_dielectric_constant (-) 3.9 3.9 3.9 0 3.9 3.9 3.9 0 100 | -ild_thickness (um) 0.21 0.21 0.42 0 0.21 0.21 0.385 0 101 | -fringe_cap (F/um) 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 102 | -resistivity (u-ohm.m) 0.018 0.018 0.018 0.018 0.022 0.022 0.022 0.022 103 | 104 | parameters 0/0 0/1 0/2 1/0 1/1 1/2 105 | -wire_r_per_micron (ohm/um) 0 0 0 0 0 0 0 375 106 | -wire_c_per_micron (F/um) 0 0 0 0 0 0 0 1.89209e-15 107 | -tsv_pitch (um) 1.4 15 0 4 30 0 108 | -tsv_diameter (um) 0.7 2.3 0 2 3.8 0 109 | -tsv_length (um) 5 30 0 15 37.5 0 110 | -tsv_dielec_thickness (um) 0.1 0.2 0 0.1 0.5 0 111 | -tsv_contact_resistance (ohm) 0.1 0.2 0 0.1 0.2 0 112 | -tsv_depletion_width (um) 0.6 0.6 0 0.6 0.6 0 113 | -tsv_liner_dielectric_cons (-) 1.664 1.664 0 2.214 2.214 0 114 | -------------------------------------------------------------------------------- /cacti/tech_params/45nm.dat: -------------------------------------------------------------------------------- 1 | parameters (unit) hp lstp lop lp-dram comm-dram 2 | -C_g_ideal (F/um) 6.78e-16 5.18e-16 6.2e-16 1.1e-15 3.59e-16 3 | -C_fringe (F/um) 5e-17 8e-17 7.3e-17 8e-17 8e-17 4 | -C_junc (F/um^2) 1e-15 1e-15 1e-15 1e-15 1e-15 5 | -C_junc_sw (F/um^2) 2.5e-16 2.5e-16 2.5e-16 2.5e-16 2.5e-16 6 | -l_phy (um) 0.018 0.028 0.022 0.078 0.045 7 | -l_elec (um) 0.01345 0.0212 0.016 0.0504 0.0298 8 | -nmos_effective_resistance_multiplier (-) 1.51 1.99 1.76 1.65 1.69 9 | -Vdd (V) 1 1.1 0.7 1.1 1.1 10 | -Vth (V) 0.18035 0.50245 0.22599 0.44559 1 11 | -Vdsat (V) 0.0938 0.0912 0.0571 0.181 0.147 12 | -I_on_n (A/um) 0.0020466 0.0006662 0.0007489 0.000456 0.0009994 13 | -I_on_p (A/um) 0.0010233 0.0003331 0.00037445 0.000228 0.0004997 14 | 15 | 16 | 17 | parameters (unit) temp hp lstp lop lp-dram comm-dram 18 | -I_off_n (A/um) 0 2.8e-07 1.01e-11 4.03e-09 2.54e-11 1.31e-14 19 | -I_off_n (A/um) 10 3.28e-07 1.65e-11 5.02e-09 3.94e-11 2.68e-14 20 | -I_off_n (A/um) 20 3.81e-07 2.62e-11 6.18e-09 5.95e-11 5.25e-14 21 | -I_off_n (A/um) 30 4.39e-07 4.06e-11 7.51e-09 8.79e-11 9.88e-14 22 | -I_off_n (A/um) 40 5.02e-07 6.12e-11 9.04e-09 1.27e-10 1.79e-13 23 | -I_off_n (A/um) 50 5.69e-07 9.02e-11 1.08e-08 1.79e-10 3.15e-13 24 | -I_off_n (A/um) 60 6.42e-07 1.3e-10 1.27e-08 2.47e-10 5.36e-13 25 | -I_off_n (A/um) 70 7.2e-07 1.83e-10 1.47e-08 3.31e-10 8.86e-13 26 | -I_off_n (A/um) 80 8.03e-07 2.51e-10 1.66e-08 4.26e-10 1.42e-12 27 | -I_off_n (A/um) 90 8.91e-07 3.29e-10 1.84e-08 5.27e-10 2.2e-12 28 | -I_off_n (A/um) 100 9.84e-07 4.1e-10 2.03e-08 6.46e-10 3.29e-12 29 | -I_g_on_n (A/um) 0 3.59e-08 9.47e-12 3.24e-08 0 0 30 | -I_g_on_n (A/um) 10 3.59e-08 9.47e-12 4.01e-08 0 0 31 | -I_g_on_n (A/um) 20 3.59e-08 9.47e-12 4.9e-08 0 0 32 | -I_g_on_n (A/um) 30 3.59e-08 9.47e-12 5.92e-08 0 0 33 | -I_g_on_n (A/um) 40 3.59e-08 9.47e-12 7.08e-08 0 0 34 | -I_g_on_n (A/um) 50 3.59e-08 9.47e-12 8.38e-08 0 0 35 | -I_g_on_n (A/um) 60 3.59e-08 9.47e-12 9.82e-08 0 0 36 | -I_g_on_n (A/um) 70 3.59e-08 9.47e-12 1.14e-07 0 0 37 | -I_g_on_n (A/um) 80 3.59e-08 9.47e-12 1.29e-07 0 0 38 | -I_g_on_n (A/um) 90 3.59e-08 9.47e-12 1.43e-07 0 0 39 | -I_g_on_n (A/um) 100 3.59e-08 9.47e-12 1.54e-07 0 0 40 | 41 | 42 | parameters (unit) hp lstp lop lp-dram comm-dram 43 | -C_ox (F/um^2) 3.77e-14 2.01e-14 2.82e-14 1.41e-14 7.98e-15 44 | -t_ox (um) 0.00065 0.0014 0.0009 0.0021 0.004 45 | -n2p_drv_rt (-) 2.41 2.23 2.28 2.05 1.95 46 | -lch_lk_rdc (-) 0.282008 0.480769 0.520833 1 1 47 | -Mobility_n (um^2/V.sec) 2.6668e+10 3.6396e+10 5.089e+10 4.263e+10 3.6858e+10 48 | -gmp_to_gmn_multiplier (-) 1.38 0.99 1.11 0.9 0.9 49 | -vpp (V) 0 0 0 1.5 2.7 50 | 51 | SRAM 52 | parameters cell_type hp lstp lop lp-dram comm-dram 53 | -Wmemcella (um) 0 1.31 1.31 1.31 1.31 1.31 54 | -Wmemcellpmos (um) 0 1.23 1.23 1.23 1.23 1.23 55 | -Wmemcellnmos (um) 0 2.08 2.08 2.08 2.08 2.08 56 | -area_cell (um^2) 0 146 146 146 146 146 57 | -asp_ratio_cell (-) 0 1.46 1.46 1.46 1.46 1.46 58 | 59 | CAM 60 | parameters cell_type hp lstp lop lp-dram comm-dram 61 | -Wmemcella (um) 1 1.31 1.31 1.31 1.31 1.31 62 | -Wmemcellpmos (um) 1 1.23 1.23 1.23 1.23 1.23 63 | -Wmemcellnmos (um) 1 2.08 2.08 2.08 2.08 2.08 64 | -area_cell (um^2) 1 292 292 292 292 292 65 | -asp_ratio_cell (-) 1 2.92 2.92 2.92 2.92 2.92 66 | 67 | DRAM 68 | parameters cell_type hp lstp lop lp-dram comm-dram 69 | -vdd_cell (V) 2 0 0 0 1.1 1.1 70 | -Wmemcella (um) 2 0 0 0 0.079 0.045 71 | -Wmemcellpmos (um) 2 0 0 0 0 0 72 | -Wmemcellnmos (um) 2 0 0 0 0 0 73 | -area_cell (um^2) 2 0 0 0 0.06162 0.01215 74 | -asp_ratio_cell (-) 2 0 0 0 1.46 1.5 75 | 76 | parameters hp lstp lop lp-dram comm-dram 77 | -dram_cell_I_on (A/um) 0 0 0 3.6e-05 2e-05 78 | -dram_cell_Vdd (V) 0 0 0 1.1 1.1 79 | -dram_cell_C (F) 0 0 0 2e-14 3e-14 80 | -dram_cell_I_off_worst_case_len_temp (A/um) 0 0 0 1.95e-11 1e-15 81 | 82 | 83 | -logic_scaling_co_eff (-) 0.49 84 | -core_tx_density (1/um^2) 1.25 85 | -sckt_co_eff (-) 1.1387 86 | -chip_layout_overhead (-) 1.2 87 | -macro_layout_overhead (-) 1.1 88 | -sense_delay (sec) 4e-11 89 | -sense_dy_power (J) 2.7e-15 90 | 91 | parameters 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 92 | -wire_pitch (um) 2.5 4 8 0 2.5 4 8 2 93 | -barrier_thickness (um) 0 0 0 0 0.004 0.004 0.004 0 94 | -dishing_thickness (um) 0 0 0 0 0 0 0.0396 0 95 | -alpha_scatter (-) 1 1 1 0 1 1 1 0 96 | -aspect_ratio (-) 3 3 3 0 2 2 2.2 0 97 | -miller_value (-) 1.5 1.5 1.5 0 1.5 1.5 1.5 0 98 | -horiz_dielectric_constant (-) 1.958 1.958 1.958 0 2.46 2.46 2.46 0 99 | -vert_dielectric_constant (-) 3.9 3.9 3.9 0 3.9 3.9 3.9 0 100 | -ild_thickness (um) 0.315 0.315 0.63 0 0.315 0.315 0.55 0 101 | -fringe_cap (F/um) 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 102 | -resistivity (u-ohm.m) 0.018 0.018 0.018 0.018 0.022 0.022 0.022 0.022 103 | 104 | parameters 0/0 0/1 0/2 1/0 1/1 1/2 105 | -wire_r_per_micron (ohm/um) 0 0 0 0 0 0 0 266.667 106 | -wire_c_per_micron (F/um) 0 0 0 0 0 0 0 1.6276e-15 107 | -tsv_pitch (um) 2.2 20 0 3.4 40 0 108 | -tsv_diameter (um) 1.1 3.1 0 1.7 5 0 109 | -tsv_length (um) 6 40 0 20 50 0 110 | -tsv_dielec_thickness (um) 0.1 0.2 0 0.1 0.5 0 111 | -tsv_contact_resistance (ohm) 0.1 0.2 0 0.1 0.2 0 112 | -tsv_depletion_width (um) 0.6 0.6 0 0.6 0.6 0 113 | -tsv_liner_dielectric_cons (-) 1.958 1.958 0 2.46 2.46 0 114 | -------------------------------------------------------------------------------- /cacti/tech_params/65nm.dat: -------------------------------------------------------------------------------- 1 | parameters (unit) hp lstp lop lp-dram comm-dram 2 | -C_g_ideal (F/um) 4.69e-16 6.14e-16 6e-16 1.46e-15 4e-16 3 | -C_fringe (F/um) 7.7e-17 8e-17 8e-17 8e-17 8e-17 4 | -C_junc (F/um^2) 1e-15 1e-15 1e-15 1e-15 1e-15 5 | -C_junc_sw (F/um^2) 2.5e-16 2.5e-16 2.5e-16 2.5e-16 2.5e-16 6 | -l_phy (um) 0.025 0.045 0.032 0.12 0.065 7 | -l_elec (um) 0.019 0.0298 0.0216 0.0756 0.0426 8 | -nmos_effective_resistance_multiplier (-) 1.5 1.96 1.82 1.65 1.69 9 | -Vdd (V) 1.1 1.2 0.8 1.2 1.3 10 | -Vth (V) 0.19491 0.52354 0.28512 0.43806 1 11 | -Vdsat (V) 0.0771 0.128 0.292 0.43806 0.385 12 | -I_on_n (A/um) 0.0011972 0.0005192 0.0005731 0.0003998 0.001031 13 | -I_on_p (A/um) 0.0008708 0.000266 0.0003406 0.0002434 0.0005155 14 | 15 | 16 | 17 | parameters (unit) temp hp lstp lop lp-dram comm-dram 18 | -I_off_n (A/um) 0 1.96e-07 9.12e-12 4.9e-09 2.23e-11 1.8e-14 19 | -I_off_n (A/um) 10 2.29e-07 1.49e-11 6.49e-09 3.46e-11 3.64e-14 20 | -I_off_n (A/um) 20 2.66e-07 2.36e-11 8.45e-09 5.24e-11 7.03e-14 21 | -I_off_n (A/um) 30 3.05e-07 3.64e-11 1.08e-08 7.75e-11 1.31e-13 22 | -I_off_n (A/um) 40 3.49e-07 5.48e-11 1.37e-08 1.12e-10 2.35e-13 23 | -I_off_n (A/um) 50 3.95e-07 8.05e-11 1.71e-08 1.58e-10 4.09e-13 24 | -I_off_n (A/um) 60 4.45e-07 1.15e-10 2.09e-08 2.18e-10 6.89e-13 25 | -I_off_n (A/um) 70 4.97e-07 1.59e-10 2.48e-08 2.88e-10 1.13e-12 26 | -I_off_n (A/um) 80 5.48e-07 2.1e-10 2.84e-08 3.63e-10 1.78e-12 27 | -I_off_n (A/um) 90 5.94e-07 2.62e-10 3.13e-08 4.41e-10 2.71e-12 28 | -I_off_n (A/um) 100 6.3e-07 3.21e-10 3.42e-08 5.36e-10 3.99e-12 29 | -I_g_on_n (A/um) 0 4.09e-08 1.09e-10 9.61e-09 0 0 30 | -I_g_on_n (A/um) 10 4.09e-08 1.09e-10 9.61e-09 0 0 31 | -I_g_on_n (A/um) 20 4.09e-08 1.09e-10 9.61e-09 0 0 32 | -I_g_on_n (A/um) 30 4.09e-08 1.09e-10 9.61e-09 0 0 33 | -I_g_on_n (A/um) 40 4.09e-08 1.09e-10 9.61e-09 0 0 34 | -I_g_on_n (A/um) 50 4.09e-08 1.09e-10 9.61e-09 0 0 35 | -I_g_on_n (A/um) 60 4.09e-08 1.09e-10 9.61e-09 0 0 36 | -I_g_on_n (A/um) 70 4.09e-08 1.09e-10 9.61e-09 0 0 37 | -I_g_on_n (A/um) 80 4.09e-08 1.09e-10 9.61e-09 0 0 38 | -I_g_on_n (A/um) 90 4.09e-08 1.09e-10 9.61e-09 0 0 39 | -I_g_on_n (A/um) 100 4.09e-08 1.09e-10 9.61e-09 0 0 40 | 41 | 42 | parameters (unit) hp lstp lop lp-dram comm-dram 43 | -C_ox (F/um^2) 1.88e-14 1.36e-14 1.87e-14 1.22e-14 6.16e-15 44 | -t_ox (um) 0.0011 0.0019 0.0012 0.0022 0.005 45 | -n2p_drv_rt (-) 2.41 2.23 2.28 2.05 2.39 46 | -lch_lk_rdc (-) 0.26738 0.35461 0.487805 1 1 47 | -Mobility_n (um^2/V.sec) 4.3624e+10 3.4121e+10 4.9519e+10 3.2832e+10 3.0344e+10 48 | -gmp_to_gmn_multiplier (-) 1.38 0.99 1.11 0.9 0.9 49 | -vpp (V) 0 0 0 1.6 3.3 50 | 51 | SRAM 52 | parameters cell_type hp lstp lop lp-dram comm-dram 53 | -Wmemcella (um) 0 1.31 1.31 1.31 1.31 1.31 54 | -Wmemcellpmos (um) 0 1.23 1.23 1.23 1.23 1.23 55 | -Wmemcellnmos (um) 0 2.08 2.08 2.08 2.08 2.08 56 | -area_cell (um^2) 0 146 146 146 146 146 57 | -asp_ratio_cell (-) 0 1.46 1.46 1.46 1.46 1.46 58 | 59 | CAM 60 | parameters cell_type hp lstp lop lp-dram comm-dram 61 | -Wmemcella (um) 1 1.31 1.31 1.31 1.31 1.31 62 | -Wmemcellpmos (um) 1 1.23 1.23 1.23 1.23 1.23 63 | -Wmemcellnmos (um) 1 2.08 2.08 2.08 2.08 2.08 64 | -area_cell (um^2) 1 292 292 292 292 292 65 | -asp_ratio_cell (-) 1 2.92 2.92 2.92 2.92 2.92 66 | 67 | DRAM 68 | parameters cell_type hp lstp lop lp-dram comm-dram 69 | -vdd_cell (V) 2 0 0 0 0 1.2 70 | -Wmemcella (um) 2 0 0 0 0.09 0.065 71 | -Wmemcellpmos (um) 2 0 0 0 0 0 72 | -Wmemcellnmos (um) 2 0 0 0 0 0 73 | -area_cell (um^2) 2 0 0 0 0.11 0.02535 74 | -asp_ratio_cell (-) 2 0 0 0 1.46 1.5 75 | 76 | parameters hp lstp lop lp-dram comm-dram 77 | -dram_cell_I_on (A/um) 0 0 0 3.6e-05 2e-05 78 | -dram_cell_Vdd (V) 0 0 0 1.2 1.3 79 | -dram_cell_C (F) 0 0 0 2e-14 3e-14 80 | -dram_cell_I_off_worst_case_len_temp (A/um) 0 0 0 1.96e-11 1e-15 81 | 82 | 83 | -logic_scaling_co_eff (-) 0.7 84 | -core_tx_density (1/um^2) 0.875 85 | -sckt_co_eff (-) 1.1359 86 | -chip_layout_overhead (-) 1.2 87 | -macro_layout_overhead (-) 1.1 88 | -sense_delay (sec) 2e-10 89 | -sense_dy_power (J) 5.7e-15 90 | 91 | parameters 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 92 | -wire_pitch (um) 2.5 4 8 0 2.5 4 8 2 93 | -barrier_thickness (um) 0 0 0 0 0.006 0.006 0.006 0 94 | -dishing_thickness (um) 0 0 0 0 0 0 0.0572 0 95 | -alpha_scatter (-) 1 1 1 0 1 1 1 0 96 | -aspect_ratio (-) 2.7 2.7 2.8 0 2 2 2.2 0 97 | -miller_value (-) 1.5 1.5 1.5 0 1.5 1.5 1.5 0 98 | -horiz_dielectric_constant (-) 2.303 2.303 2.303 0 2.734 2.734 2.734 0 99 | -vert_dielectric_constant (-) 3.9 3.9 3.9 0 3.9 3.9 3.9 0 100 | -ild_thickness (um) 0.405 0.405 0.81 0 0.405 0.405 0.77 0 101 | -fringe_cap (F/um) 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 102 | -resistivity (u-ohm.m) 0.018 0.018 0.018 0.018 0.022 0.022 0.022 0.022 103 | 104 | parameters 0/0 0/1 0/2 1/0 1/1 1/2 105 | -wire_r_per_micron (ohm/um) 0 0 0 0 0 0 0 184.615 106 | -wire_c_per_micron (F/um) 0 0 0 0 0 0 0 1.57752e-15 107 | -tsv_pitch (um) 3.2 30 0 5 60 0 108 | -tsv_diameter (um) 1.6 4.6 0 2.5 7.5 0 109 | -tsv_length (um) 7 50 0 25 62.5 0 110 | -tsv_dielec_thickness (um) 0.1 0.2 0 0.1 0.5 0 111 | -tsv_contact_resistance (ohm) 0.1 0.2 0 0.1 0.2 0 112 | -tsv_depletion_width (um) 0.6 0.6 0 0.6 0.6 0 113 | -tsv_liner_dielectric_cons (-) 2.303 2.303 0 2.734 2.734 0 114 | -------------------------------------------------------------------------------- /cacti/tech_params/90nm.dat: -------------------------------------------------------------------------------- 1 | parameters (unit) hp lstp lop lp-dram comm-dram 2 | -C_g_ideal (F/um) 6.64e-16 9.15e-16 8.45e-16 1.47e-15 5.08e-16 3 | -C_fringe (F/um) 8e-17 8e-17 8e-17 8e-17 8e-17 4 | -C_junc (F/um^2) 1e-15 1e-15 1e-15 1e-15 1e-15 5 | -C_junc_sw (F/um^2) 2.5e-16 2.5e-16 2.5e-16 2.5e-16 2.5e-16 6 | -l_phy (um) 0.037 0.075 0.053 0.12 0.09 7 | -l_elec (um) 0.0266 0.0486 0.0354 0.0756 0.0576 8 | -nmos_effective_resistance_multiplier (-) 1.54 1.92 1.77 1.65 1.62 9 | -Vdd (V) 1.2 1.3 0.9 1.2 1.6 10 | -Vth (V) 0.23707 0.48203 0.30764 0.4545 1 11 | -Vdsat (V) 0.128 0.373 0.113 0.3 0.32 12 | -I_on_n (A/um) 0.0010769 0.0005036 0.0003866 0.0003216 0.0010943 13 | -I_on_p (A/um) 0.0007126 0.0002351 0.0002097 0.0002033 0.00054715 14 | 15 | 16 | 17 | parameters (unit) temp hp lstp lop lp-dram comm-dram 18 | -I_off_n (A/um) 0 3.24e-08 2.81e-12 2.14e-09 1.42e-11 5.8e-15 19 | -I_off_n (A/um) 10 4.01e-08 4.76e-12 2.9e-09 2.25e-11 1.21e-14 20 | -I_off_n (A/um) 20 4.9e-08 7.82e-12 3.87e-09 3.46e-11 2.42e-14 21 | -I_off_n (A/um) 30 5.92e-08 1.25e-11 5.07e-09 5.18e-11 4.65e-14 22 | -I_off_n (A/um) 40 7.08e-08 1.94e-11 6.54e-09 7.58e-11 8.6e-14 23 | -I_off_n (A/um) 50 8.38e-08 2.94e-11 8.27e-08 1.08e-10 1.54e-13 24 | -I_off_n (A/um) 60 9.82e-08 4.36e-11 1.02e-07 1.51e-10 2.66e-13 25 | -I_off_n (A/um) 70 1.14e-07 6.32e-11 1.2e-07 2.02e-10 4.45e-13 26 | -I_off_n (A/um) 80 1.29e-07 8.95e-11 1.36e-08 2.57e-10 7.17e-13 27 | -I_off_n (A/um) 90 1.43e-07 1.25e-10 1.52e-08 3.14e-10 1.11e-12 28 | -I_off_n (A/um) 100 1.54e-07 1.7e-10 1.73e-08 3.85e-10 1.67e-12 29 | -I_g_on_n (A/um) 0 1.65e-08 3.87e-11 4.31e-08 0 0 30 | -I_g_on_n (A/um) 10 1.65e-08 3.87e-11 4.31e-08 0 0 31 | -I_g_on_n (A/um) 20 1.65e-08 3.87e-11 4.31e-08 0 0 32 | -I_g_on_n (A/um) 30 1.65e-08 3.87e-11 4.31e-08 0 0 33 | -I_g_on_n (A/um) 40 1.65e-08 3.87e-11 4.31e-08 0 0 34 | -I_g_on_n (A/um) 50 1.65e-08 3.87e-11 4.31e-08 0 0 35 | -I_g_on_n (A/um) 60 1.65e-08 3.87e-11 4.31e-08 0 0 36 | -I_g_on_n (A/um) 70 1.65e-08 3.87e-11 4.31e-08 0 0 37 | -I_g_on_n (A/um) 80 1.65e-08 3.87e-11 4.31e-08 0 0 38 | -I_g_on_n (A/um) 90 1.65e-08 3.87e-11 4.31e-08 0 0 39 | -I_g_on_n (A/um) 100 1.65e-08 3.87e-11 4.31e-08 0 0 40 | 41 | 42 | parameters (unit) hp lstp lop lp-dram comm-dram 43 | -C_ox (F/um^2) 1.79e-14 1.22e-14 1.59e-14 1.22e-14 5.65e-15 44 | -t_ox (um) 0.0012 0.0022 0.0015 0.0022 0.0055 45 | -n2p_drv_rt (-) 2.45 2.44 2.54 1.95 2.05 46 | -lch_lk_rdc (-) 1 1 1 1 1 47 | -Mobility_n (um^2/V.sec) 3.4216e+10 3.5676e+10 4.6039e+10 3.2395e+10 3.022e+10 48 | -gmp_to_gmn_multiplier (-) 1.22 0.88 0.98 0.9 0.9 49 | -vpp (V) 0 0 0 1.6 3.7 50 | 51 | SRAM 52 | parameters cell_type hp lstp lop lp-dram comm-dram 53 | -Wmemcella (um) 0 1.31 1.31 1.31 1.31 1.31 54 | -Wmemcellpmos (um) 0 1.23 1.23 1.23 1.23 1.23 55 | -Wmemcellnmos (um) 0 2.08 2.08 2.08 2.08 2.08 56 | -area_cell (um^2) 0 146 146 146 146 146 57 | -asp_ratio_cell (-) 0 1.46 1.46 1.46 1.46 1.46 58 | 59 | CAM 60 | parameters cell_type hp lstp lop lp-dram comm-dram 61 | -Wmemcella (um) 1 1.31 1.31 1.31 1.31 1.31 62 | -Wmemcellpmos (um) 1 1.23 1.23 1.23 1.23 1.23 63 | -Wmemcellnmos (um) 1 2.08 2.08 2.08 2.08 2.08 64 | -area_cell (um^2) 1 292 292 292 292 292 65 | -asp_ratio_cell (-) 1 2.92 2.92 2.92 2.92 2.92 66 | 67 | DRAM 68 | parameters cell_type hp lstp lop lp-dram comm-dram 69 | -vdd_cell (V) 2 0 0 0 1.2 1.6 70 | -Wmemcella (um) 2 0 0 0 0.14 0.09 71 | -Wmemcellpmos (um) 2 0 0 0 0 0 72 | -Wmemcellnmos (um) 2 0 0 0 0 0 73 | -area_cell (um^2) 2 0 0 0 0.168 0.0486 74 | -asp_ratio_cell (-) 2 0 0 0 1.46 1.5 75 | 76 | parameters hp lstp lop lp-dram comm-dram 77 | -dram_cell_I_on (A/um) 0 0 0 4.5e-05 2e-05 78 | -dram_cell_Vdd (V) 0 0 0 1.2 1.6 79 | -dram_cell_C (F) 0 0 0 2e-14 3e-14 80 | -dram_cell_I_off_worst_case_len_temp (A/um) 0 0 0 2.11e-11 1e-15 81 | 82 | 83 | -logic_scaling_co_eff (-) 1 84 | -core_tx_density (1/um^2) 0.6125 85 | -sckt_co_eff (-) 1.1539 86 | -chip_layout_overhead (-) 1.2 87 | -macro_layout_overhead (-) 1.1 88 | -sense_delay (sec) 2.8e-10 89 | -sense_dy_power (J) 1.47e-14 90 | 91 | parameters 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 92 | -wire_pitch (um) 2.5 4 8 0 2.5 4 8 2 93 | -barrier_thickness (um) 0.01 0.01 0.01 0 0.008 0.008 0.008 0 94 | -dishing_thickness (um) 0 0 0 0 0 0 0.0792 0 95 | -alpha_scatter (-) 1 1 1 0 1 1 1 0 96 | -aspect_ratio (-) 2.4 2.4 2.7 0 2 2 2.2 0 97 | -miller_value (-) 1.5 1.5 1.5 0 1.5 1.5 1.5 0 98 | -horiz_dielectric_constant (-) 2.709 2.709 2.709 0 3.038 3.038 3.038 0 99 | -vert_dielectric_constant (-) 3.9 3.9 3.9 0 3.9 3.9 3.9 0 100 | -ild_thickness (um) 0.48 0.48 0.96 0 0.48 0.48 1.1 0 101 | -fringe_cap (F/um) 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 1.15e-16 102 | -resistivity (u-ohm.m) 0.022 0.022 0.022 0.022 0.022 0.022 0.022 0.022 103 | 104 | parameters 0/0 0/1 0/2 1/0 1/1 1/2 105 | -wire_r_per_micron (ohm/um) 0 0 0 0 0 0 0 133.333 106 | -wire_c_per_micron (F/um) 0 0 0 0 0 0 0 1.30208e-15 107 | -tsv_pitch (um) 4 45 0 6.9 90 0 108 | -tsv_diameter (um) 2 6.9 0 3.5 11.3 0 109 | -tsv_length (um) 8 60 0 30 75 0 110 | -tsv_dielec_thickness (um) 0.1 0.2 0 0.1 0.5 0 111 | -tsv_contact_resistance (ohm) 0.1 0.2 0 0.1 0.2 0 112 | -tsv_depletion_width (um) 0.6 0.6 0 0.6 0.6 0 113 | -tsv_liner_dielectric_cons (-) 2.709 2.709 0 3.038 3.038 0 114 | -------------------------------------------------------------------------------- /cacti/uca.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * CACTI 7.0 3 | * SOFTWARE LICENSE AGREEMENT 4 | * Copyright 2015 Hewlett-Packard Development Company, L.P. 5 | * All Rights Reserved 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are 9 | * met: redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer; 11 | * redistributions in binary form must reproduce the above copyright 12 | * notice, this list of conditions and the following disclaimer in the 13 | * documentation and/or other materials provided with the distribution; 14 | * neither the name of the copyright holders nor the names of its 15 | * contributors may be used to endorse or promote products derived from 16 | * this software without specific prior written permission. 17 | 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” 29 | * 30 | ***************************************************************************/ 31 | 32 | 33 | 34 | #ifndef __UCA_H__ 35 | #define __UCA_H__ 36 | 37 | #include "area.h" 38 | #include "bank.h" 39 | #include "component.h" 40 | #include "parameter.h" 41 | #include "htree2.h" 42 | #include "memorybus.h" 43 | #include "basic_circuit.h" 44 | #include "cacti_interface.h" 45 | 46 | 47 | 48 | class UCA : public Component 49 | { 50 | public: 51 | UCA(const DynamicParameter & dyn_p); 52 | ~UCA(); 53 | double compute_delays(double inrisetime); // returns outrisetime 54 | void compute_power_energy(); 55 | 56 | DynamicParameter dp; 57 | Bank bank; 58 | 59 | Htree2 * htree_in_add; 60 | Htree2 * htree_in_data; 61 | Htree2 * htree_out_data; 62 | Htree2 * htree_in_search; 63 | Htree2 * htree_out_search; 64 | 65 | Memorybus * membus_RAS; 66 | Memorybus * membus_CAS; 67 | Memorybus * membus_data; 68 | 69 | powerDef power_routing_to_bank; 70 | 71 | uint32_t nbanks; 72 | 73 | int num_addr_b_bank; 74 | int num_di_b_bank; 75 | int num_do_b_bank; 76 | int num_si_b_bank; 77 | int num_so_b_bank; 78 | int RWP, ERP, EWP,SCHP; 79 | double area_all_dataramcells; 80 | double total_area_per_die; 81 | 82 | double dyn_read_energy_from_closed_page; 83 | double dyn_read_energy_from_open_page; 84 | double dyn_read_energy_remaining_words_in_burst; 85 | 86 | double refresh_power; // only for DRAM 87 | double activate_energy; 88 | double read_energy; 89 | double write_energy; 90 | double precharge_energy; 91 | double leak_power_subbank_closed_page; 92 | double leak_power_subbank_open_page; 93 | double leak_power_request_and_reply_networks; 94 | 95 | double delay_array_to_sa_mux_lev_1_decoder; 96 | double delay_array_to_sa_mux_lev_2_decoder; 97 | double delay_before_subarray_output_driver; 98 | double delay_from_subarray_out_drv_to_out; 99 | double access_time; 100 | double precharge_delay; 101 | double multisubbank_interleave_cycle_time; 102 | 103 | double t_RAS, t_CAS, t_RCD, t_RC, t_RP, t_RRD; 104 | double activate_power, read_power, write_power; 105 | 106 | double delay_TSV_tot, area_TSV_tot, dyn_pow_TSV_tot, dyn_pow_TSV_per_access; 107 | unsigned int num_TSV_tot; 108 | unsigned int comm_bits, row_add_bits, col_add_bits, data_bits; 109 | double area_lwl_drv, area_row_predec_dec, area_col_predec_dec, 110 | area_subarray, area_bus, area_address_bus, area_data_bus, area_data_drv, area_IOSA, area_sense_amp, 111 | area_per_bank; 112 | 113 | }; 114 | 115 | #endif 116 | 117 | -------------------------------------------------------------------------------- /cacti/version_cacti.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * McPAT 3 | * SOFTWARE LICENSE AGREEMENT 4 | * Copyright 2015 Hewlett-Packard Development Company, L.P. 5 | * All Rights Reserved 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are 9 | * met: redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer; 11 | * redistributions in binary form must reproduce the above copyright 12 | * notice, this list of conditions and the following disclaimer in the 13 | * documentation and/or other materials provided with the distribution; 14 | * neither the name of the copyright holders nor the names of its 15 | * contributors may be used to endorse or promote products derived from 16 | * this software without specific prior written permission. 17 | 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” 29 | * 30 | ***************************************************************************/ 31 | 32 | #ifndef VERSION_H_ 33 | #define VERSION_H_ 34 | 35 | #define VER_MAJOR_CACTI 7 /* 3dd */ 36 | #define VER_MINOR_CACTI 0 37 | #define VER_COMMENT_CACTI "3DD Prerelease" 38 | #define VER_UPDATE_CACTI "Aug, 2012" 39 | 40 | #endif /* VERSION_H_ */ 41 | -------------------------------------------------------------------------------- /cacti/wire.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * CACTI 7.0 3 | * SOFTWARE LICENSE AGREEMENT 4 | * Copyright 2015 Hewlett-Packard Development Company, L.P. 5 | * All Rights Reserved 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are 9 | * met: redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer; 11 | * redistributions in binary form must reproduce the above copyright 12 | * notice, this list of conditions and the following disclaimer in the 13 | * documentation and/or other materials provided with the distribution; 14 | * neither the name of the copyright holders nor the names of its 15 | * contributors may be used to endorse or promote products derived from 16 | * this software without specific prior written permission. 17 | 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” 29 | * 30 | ***************************************************************************/ 31 | 32 | 33 | 34 | #ifndef __WIRE_H__ 35 | #define __WIRE_H__ 36 | 37 | #include "basic_circuit.h" 38 | #include "component.h" 39 | #include "parameter.h" 40 | #include "assert.h" 41 | #include "cacti_interface.h" 42 | #include 43 | #include 44 | 45 | class Wire : public Component 46 | { 47 | public: 48 | Wire(enum Wire_type wire_model, double len /* in u*/, 49 | int nsense = 1/* no. of sense amps connected to the low-swing wire */, 50 | double width_scaling = 1, 51 | double spacing_scaling = 1, 52 | enum Wire_placement wire_placement = outside_mat, 53 | double resistivity = CU_RESISTIVITY, 54 | /*TechnologyParameter::*/DeviceType *dt = &(g_tp.peri_global)); 55 | ~Wire(); 56 | 57 | Wire( double width_scaling = 1, 58 | double spacing_scaling = 1, 59 | enum Wire_placement wire_placement = outside_mat, 60 | double resistivity = CU_RESISTIVITY, 61 | /*TechnologyParameter::*/DeviceType *dt = &(g_tp.peri_global) 62 | ); // should be used only once for initializing static members 63 | void init_wire(); 64 | 65 | void calculate_wire_stats(); 66 | void delay_optimal_wire(); 67 | double wire_cap(double len, bool call_from_outside=false); 68 | double wire_res(double len); 69 | void low_swing_model(); 70 | double signal_fall_time(); 71 | double signal_rise_time(); 72 | double sense_amp_input_cap(); 73 | 74 | enum Wire_type wt; 75 | double wire_spacing; 76 | double wire_width; 77 | enum Wire_placement wire_placement; 78 | double repeater_size; 79 | double repeater_spacing; 80 | double wire_length; 81 | double in_rise_time, out_rise_time; 82 | 83 | void set_in_rise_time(double rt) 84 | { 85 | in_rise_time = rt; 86 | } 87 | static Component global; 88 | static Component global_5; 89 | static Component global_10; 90 | static Component global_20; 91 | static Component global_30; 92 | static Component low_swing; 93 | static double wire_width_init; 94 | static double wire_spacing_init; 95 | void print_wire(); 96 | 97 | private: 98 | 99 | int nsense; // no. of sense amps connected to a low-swing wire if it 100 | // is broadcasting data to multiple destinations 101 | // width and spacing scaling factor can be used 102 | // to model low level wires or special 103 | // fat wires 104 | double w_scale, s_scale; 105 | double resistivity; 106 | powerDef wire_model (double space, double size, double *delay); 107 | list repeated_wire; 108 | void update_fullswing(); 109 | static int initialized; 110 | 111 | 112 | //low-swing 113 | Component transmitter; 114 | Component l_wire; 115 | Component sense_amp; 116 | 117 | double min_w_pmos; 118 | 119 | /*TechnologyParameter::*/DeviceType *deviceType; 120 | 121 | }; 122 | 123 | #endif 124 | -------------------------------------------------------------------------------- /config/sata_config.yaml: -------------------------------------------------------------------------------- 1 | # Define your yaml config for a single PE here. 2 | arch: 3 | n_pe: 128 4 | 5 | pe: 6 | mul: 0 7 | acc: 1 8 | add: 1 9 | and: 1 10 | comp: 1 11 | mux: 2 12 | reg: 1 13 | 14 | 15 | # Define the operations in forward stage 16 | fwd: 17 | 18 | # Define the number of operations in lif operation 19 | # reg: 1 for writing residual u, 1 for accumulate u with mac results, 1 for reset acc reg, total 3 20 | lif: 21 | mul: 0 22 | acc: 0 23 | add: 1 24 | and: 0 25 | comp: 1 26 | mux: 2 27 | reg: 3 28 | sft: 1 29 | 30 | # Define the number of operations in mac operation 31 | mac: 32 | mul: 0 33 | acc: 1 34 | add: 0 35 | and: 1 36 | comp: 0 37 | mux: 0 38 | reg: 0 39 | sft: 0 40 | 41 | 42 | # Define the number of operations in backward stage 43 | bwd: 44 | 45 | # Define the number of operations in pgu 46 | # 47 | pgu: 48 | mul: 1 49 | acc: 0 50 | add: 2 51 | and: 0 52 | comp: 0 53 | mux: 1 54 | reg: 0 55 | sft: 2 56 | 57 | # Define the number of operations in on mac operation 58 | mac: 59 | mul: 1 60 | acc: 1 61 | add: 0 62 | and: 0 63 | comp: 0 64 | mux: 0 65 | reg: 0 66 | sft: 0 67 | 68 | 69 | # Define the operations in weight update stage 70 | wup: 71 | 72 | # Define the number of operations in on mac operation 73 | mac: 74 | mul: 0 75 | acc: 1 76 | add: 0 77 | and: 1 78 | comp: 0 79 | mux: 0 80 | reg: 0 81 | sft: 0 -------------------------------------------------------------------------------- /config/vgg5_cifar10.yaml: -------------------------------------------------------------------------------- 1 | # Network config for 1D network fcn 2 | 3 | conv1: 4 | type: 2dconv 5 | H_h: 32 6 | H_w: 32 7 | C: 3 8 | R_h: 3 9 | R_w: 3 10 | K: 64 11 | E_h: 32 12 | E_w: 32 13 | 14 | conv2: 15 | type: 2dconv 16 | H_h: 16 17 | H_w: 16 18 | C: 64 19 | R_h: 3 20 | R_w: 3 21 | K: 128 22 | E_h: 16 23 | E_w: 16 24 | 25 | conv3: 26 | type: 2dconv 27 | H_h: 16 28 | H_w: 16 29 | C: 128 30 | R_h: 3 31 | R_w: 3 32 | K: 128 33 | E_h: 16 34 | E_w: 16 35 | 36 | lin4: 37 | type: linear 38 | in: 8192 39 | out: 1024 40 | 41 | lin5: 42 | type: out_linear 43 | in: 1024 44 | out: 10 -------------------------------------------------------------------------------- /har_configs/dcl_har.yaml: -------------------------------------------------------------------------------- 1 | # Network config for 1D network fcn 2 | 3 | 2dconv1: 4 | w_kt: 5 5 | w_ks: 1 6 | w_cin: 1 7 | w_cout: 64 8 | out_s: 9 9 | out_t: 124 10 | 11 | 2dconv2: 12 | w_kt: 5 13 | w_ks: 1 14 | w_cin: 64 15 | w_cout: 64 16 | out_s: 9 17 | out_t: 120 18 | 19 | 2dconv3: 20 | w_kt: 5 21 | w_ks: 1 22 | w_cin: 64 23 | w_cout: 64 24 | out_s: 9 25 | out_t: 116 26 | 27 | 2dconv4: 28 | w_kt: 5 29 | w_ks: 1 30 | w_cin: 64 31 | w_cout: 64 32 | out_s: 9 33 | out_t: 112 -------------------------------------------------------------------------------- /har_configs/dcl_shar.yaml: -------------------------------------------------------------------------------- 1 | # Network config for 1D network fcn 2 | 3 | 2dconv1: 4 | w_kt: 5 5 | w_ks: 1 6 | w_cin: 1 7 | w_cout: 64 8 | out_s: 3 9 | out_t: 147 10 | 11 | 2dconv2: 12 | w_kt: 5 13 | w_ks: 1 14 | w_cin: 64 15 | w_cout: 64 16 | out_s: 3 17 | out_t: 143 18 | 19 | 2dconv3: 20 | w_kt: 5 21 | w_ks: 1 22 | w_cin: 64 23 | w_cout: 64 24 | out_s: 3 25 | out_t: 139 26 | 27 | 2dconv4: 28 | w_kt: 5 29 | w_ks: 1 30 | w_cin: 64 31 | w_cout: 64 32 | out_s: 3 33 | out_t: 135 -------------------------------------------------------------------------------- /har_configs/fcn_har.yaml: -------------------------------------------------------------------------------- 1 | # Network config for 1D network fcn 2 | 3 | conv1: 4 | w_k: 8 5 | w_cin: 9 6 | w_cout: 32 7 | in: 128 8 | out: 129 9 | 10 | conv2: 11 | w_k: 8 12 | w_cin: 32 13 | w_cout: 64 14 | in: 65 15 | out: 66 16 | 17 | conv3: 18 | w_k: 8 19 | w_cin: 64 20 | w_cout: 128 21 | in: 34 22 | out: 35 23 | 24 | fc1: 25 | in: 2304 26 | out: 6 27 | -------------------------------------------------------------------------------- /har_configs/fcn_shar.yaml: -------------------------------------------------------------------------------- 1 | # Network config for 1D network fcn 2 | 3 | conv1: 4 | w_k: 8 5 | w_cin: 3 6 | w_cout: 32 7 | in: 151 8 | out: 152 9 | 10 | conv2: 11 | w_k: 8 12 | w_cin: 32 13 | w_cout: 64 14 | in: 77 15 | out: 78 16 | 17 | conv3: 18 | w_k: 8 19 | w_cin: 64 20 | w_cout: 128 21 | in: 40 22 | out: 41 23 | 24 | fc1: 25 | in: 2688 26 | out: 17 27 | -------------------------------------------------------------------------------- /har_configs/sata_ann_watch_config.yaml: -------------------------------------------------------------------------------- 1 | # Define your yaml config for a single PE here. 2 | arch: 3 | n_pe: 128 4 | 5 | pe: 6 | mul: 0 7 | acc: 1 8 | add: 1 9 | and: 1 10 | comp: 1 11 | mux: 2 12 | reg: 1 13 | 14 | 15 | # Define the number of operations in on lif operation 16 | # reg: 1 for writing residual u, 1 for accumulate u with mac results, 1 for reset acc reg, total 3 17 | lif: 18 | mul: 0 19 | acc: 0 20 | add: 0 21 | and: 0 22 | comp: 0 23 | mux: 0 24 | reg: 0 25 | 26 | # Define the number of operations in on relu operation 27 | # reg: 1 for reseting the acc 28 | relu: 29 | mul: 0 30 | acc: 0 31 | add: 0 32 | and: 0 33 | comp: 1 34 | mux: 1 35 | reg: 1 36 | 37 | 38 | # Define the number of operations in on mac operation 39 | mac: 40 | mul: 1 41 | acc: 1 42 | add: 0 43 | and: 0 44 | comp: 0 45 | mux: 0 46 | reg: 0 -------------------------------------------------------------------------------- /har_configs/sata_watch_config.yaml: -------------------------------------------------------------------------------- 1 | # Define your yaml config for a single PE here. 2 | arch: 3 | n_pe: 128 4 | 5 | pe: 6 | mul: 0 7 | acc: 1 8 | add: 1 9 | and: 1 10 | comp: 1 11 | mux: 2 12 | reg: 1 13 | 14 | # Define the number of operations in on lif operation 15 | # reg: 1 for writing residual u, 1 for accumulate u with mac results, 1 for reset acc reg, total 3 16 | lif: 17 | mul: 0 18 | acc: 0 19 | add: 1 20 | and: 0 21 | comp: 1 22 | mux: 2 23 | reg: 3 24 | 25 | # Define the number of operations in on mac operation 26 | mac: 27 | mul: 0 28 | acc: 1 29 | add: 0 30 | and: 1 31 | comp: 0 32 | mux: 0 33 | reg: 0 -------------------------------------------------------------------------------- /inference-energy-cal/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/inference-energy-cal/.DS_Store -------------------------------------------------------------------------------- /inference-energy-cal/__pycache__/hw_kernels.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Yale/SATA/7bcc80486e25af73fb76aa145130e5ca18486490/inference-energy-cal/__pycache__/hw_kernels.cpython-39.pyc -------------------------------------------------------------------------------- /inference-energy-cal/comp-utils.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | import hw_kernels # Assuming hw-kernels.py is in the same directory 3 | 4 | # Function Definitions 5 | 6 | def extract_act_dict_from_yaml(filename): 7 | with open(filename, 'r') as file: 8 | data = yaml.safe_load(file) 9 | 10 | act_dict = {} 11 | 12 | def recursive_search(subtrees): 13 | for subtree in subtrees: 14 | if subtree.get('class') == 'pe-compute': 15 | locals_ = subtree.get('local', []) 16 | for local in locals_: 17 | act_tag = local.get('act-tag') 18 | attributes = local.get('attributes', {}) 19 | kernel_name = attributes.get('kernel') 20 | count = attributes.get('count') 21 | gated = attributes.get('gated') 22 | width = attributes.get('width') 23 | 24 | # Create the kernel object using the class from hw-kernels.py 25 | KernelClass = getattr(hw_kernels, kernel_name, None) 26 | if KernelClass: 27 | kernel_obj = KernelClass(width) 28 | else: 29 | print(f"Warning: No matching class found for kernel: {kernel_name}") 30 | kernel_obj = kernel_name # Use the name as a fallback 31 | 32 | if act_tag in act_dict: 33 | act_dict[act_tag].append((kernel_obj, count, gated)) 34 | else: 35 | act_dict[act_tag] = [(kernel_obj, count, gated)] 36 | elif subtree.get('class') == 'pe-mem': 37 | locals_ = subtree.get('local', []) 38 | for local in locals_: 39 | act_tag = local.get('act-tag') 40 | attributes = local.get('attributes', {}) 41 | kernel_name = attributes.get('kernel') 42 | count = attributes.get('count') 43 | gated = attributes.get('gated') 44 | width = attributes.get('width') 45 | size = attributes.get('size-bytes') 46 | 47 | # Create the kernel object using the class from hw-kernels.py 48 | KernelClass = getattr(hw_kernels, kernel_name, None) 49 | if KernelClass: 50 | kernel_obj = KernelClass(size) 51 | # print(kernel_obj.get_dpower()) 52 | else: 53 | print(f"Warning: No matching register-files found for kernel: {kernel_name}") 54 | kernel_obj = kernel_name # Use the name as a fallback 55 | if act_tag in act_dict: 56 | act_dict[act_tag].append((kernel_obj, count, gated)) 57 | else: 58 | act_dict[act_tag] = [(kernel_obj, count, gated)] 59 | # print(act_dict) 60 | else: 61 | recursive_search(subtree.get('subtree', [])) 62 | 63 | recursive_search(data.get('architecture', {}).get('subtree', [])) 64 | return act_dict 65 | 66 | def aggregate_act_data(act_dict): 67 | aggregated_data = {} 68 | 69 | for act_tag, kernel_list in act_dict.items(): 70 | area_total = 0 71 | lpower_total = 0 72 | dpower = {'n': 0, 'y': 0} 73 | 74 | for kernel_obj, count, gated in kernel_list: 75 | # print(gated) 76 | area_total += kernel_obj.get_area() * count 77 | lpower_total += kernel_obj.get_lpower() * count 78 | dpower[gated] += kernel_obj.get_dpower() * count 79 | # if 'spad' in act_tag: 80 | 81 | 82 | # Restricting to 4 decimal places 83 | area_total = round(area_total, 6) 84 | lpower_total = round(lpower_total, 6) 85 | dpower['n'] = round(dpower['n'], 6) 86 | dpower['y'] = round(dpower['y'], 6) 87 | 88 | aggregated_data[act_tag] = { 89 | 'area': area_total, 90 | 'lpower': lpower_total, 91 | 'dpower': dpower 92 | } 93 | 94 | return aggregated_data 95 | 96 | # Main Execution 97 | 98 | if __name__ == "__main__": 99 | filename = 'sata-config.yaml' 100 | act_dict = extract_act_dict_from_yaml(filename) 101 | # print(act_dict) 102 | aggregated_act_data = aggregate_act_data(act_dict) 103 | print(aggregated_act_data) 104 | 105 | output_filename = 'results/comp-stat.yaml' 106 | with open(output_filename, 'w') as outfile: 107 | yaml.dump(aggregated_act_data, outfile, default_flow_style=False) 108 | print(f"Computation components written to {output_filename}") -------------------------------------------------------------------------------- /inference-energy-cal/cycle-utils.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | import os 3 | import shutil 4 | import subprocess 5 | import csv 6 | 7 | # Function Definitions 8 | 9 | def extract_scalesim_dict_from_yaml(filename): 10 | with open(filename, 'r') as file: 11 | data = yaml.safe_load(file) 12 | 13 | scalesim_dict = {} 14 | 15 | def recursive_search(subtrees): 16 | for subtree in subtrees: 17 | if subtree.get('class') == 'glbs': 18 | locals_ = subtree.get('local', []) 19 | # print(locals_) 20 | for local in locals_: 21 | act_tag = local.get('act-tag') 22 | attributes = local.get('attributes', {}) 23 | size = int(attributes.get('size-bytes')/1024) 24 | 25 | #! Adding support for multiple srams for one operand (e.g., 2 sram for ifmap) 26 | if "weight" in act_tag: 27 | scalesim_dict["FilterSramSzkB"] = size 28 | elif "ifmap" in act_tag: 29 | scalesim_dict["IfmapSramSzkB"] = size 30 | elif "ofmap" in act_tag: 31 | scalesim_dict["OfmapSramSzkB"] = size 32 | else: 33 | print(f"Warning: No matching sram class found for scalesim: {act_tag}") 34 | elif subtree.get('class') == 'pe-array': 35 | attributes = subtree.get('attributes', {}) 36 | scalesim_dict["ArrayWidth"] = attributes.get("width") 37 | scalesim_dict["ArrayHeight"] = attributes.get("height") 38 | else: 39 | recursive_search(subtree.get('subtree', [])) 40 | 41 | recursive_search(data.get('architecture', {}).get('subtree', [])) 42 | if not "OfmapSramSzkB" in scalesim_dict.keys(): 43 | scalesim_dict["OfmapSramSzkB"] = scalesim_dict["IfmapSramSzkB"] 44 | 45 | arch = data.get('architecture') 46 | dataflow = arch.get('dataflow') 47 | name = arch.get('name') 48 | scalesim_dict['run_name'] = name 49 | scalesim_dict['dataflow'] = dataflow 50 | 51 | return scalesim_dict 52 | 53 | def update_config(yaml_filename, default_config_path, output_config_path): 54 | # Load the YAML file 55 | with open(yaml_filename, 'r') as yaml_file: 56 | yaml_data = yaml.safe_load(yaml_file) 57 | 58 | # Define a mapping from the placeholders to the YAML keys 59 | mapping = { 60 | "x0": "run_name", 61 | "x1": "ArrayHeight", 62 | "x2": "ArrayWidth", 63 | "x3": "IfmapSramSzkB", 64 | "x4": "FilterSramSzkB", 65 | "x5": "OfmapSramSzkB", 66 | "x6": "dataflow" 67 | } 68 | 69 | # Copy the original default.cfg to a new file 70 | shutil.copy(default_config_path, output_config_path) 71 | 72 | # Load the copied file 73 | with open(output_config_path, 'r') as config_file: 74 | config_content = config_file.read() 75 | 76 | # Replace the placeholders with the values from the YAML file 77 | for placeholder, key in mapping.items(): 78 | value = yaml_data.get(key, placeholder) # Use the placeholder as default if key not found 79 | 80 | config_content = config_content.replace(placeholder, str(value)) 81 | 82 | # Write the updated content back to the copied file 83 | with open(output_config_path, 'w') as config_file: 84 | config_file.write(config_content) 85 | 86 | print(f"{output_config_path} updated successfully!") 87 | 88 | 89 | def generate_temp_workload(): 90 | with open('./workload.yaml', 'r') as file: 91 | data = yaml.safe_load(file) 92 | 93 | # Convert YAML data to CSV rows 94 | csv_rows = [] 95 | for layer in data.get('Layers', []): 96 | attributes = layer.get('attributes', {}) 97 | row = [ 98 | layer.get('name', 'N/A'), 99 | attributes.get('IFMAP Height', 'N/A'), 100 | attributes.get('IFMAP Width', 'N/A'), 101 | attributes.get('Filter Height', 'N/A'), 102 | attributes.get('Filter Width', 'N/A'), 103 | attributes.get('Channels', 'N/A'), 104 | attributes.get('Num Filter', 'N/A'), 105 | attributes.get('Strides', 'N/A'), 106 | '' 107 | ] 108 | csv_rows.append(row) 109 | 110 | # Write the CSV data to a file 111 | csv_file_path = '../scale-sim-v2/temp_workload.csv' 112 | with open(csv_file_path, 'w', newline='') as csv_file: 113 | csv_writer = csv.writer(csv_file) 114 | csv_writer.writerow(['Layer name', 'IFMAP Height', 'IFMAP Width', 'Filter Height', 'Filter Width', 'Channels', 'Num Filter', 'Strides', '']) 115 | csv_writer.writerows(csv_rows) 116 | 117 | print("CSV file written successfully.") 118 | 119 | # Main Execution 120 | 121 | if __name__ == "__main__": 122 | filename = 'sata-config.yaml' 123 | scalesim_dict = extract_scalesim_dict_from_yaml(filename) 124 | # print(scalesim_dict) 125 | scalesim_path = '../scale-sim-v2' 126 | 127 | output_filename = 'cycle-stat-temp.yaml' 128 | with open(output_filename, 'w') as outfile: 129 | yaml.dump(scalesim_dict, outfile, default_flow_style=False) 130 | print(f"Cycle stats written to {output_filename}") 131 | 132 | 133 | default_config_path = os.path.join('..', 'scale-sim-v2', 'configs', 'default.cfg') 134 | output_config_path = os.path.join('..', 'scale-sim-v2', 'configs', 'running.cfg') 135 | 136 | # Update the copied file with values from the YAML file 137 | update_config(output_filename, default_config_path, output_config_path) 138 | os.remove(output_filename) 139 | generate_temp_workload() 140 | os.chdir(scalesim_path) 141 | subprocess.run('python3 run.py', shell=True) 142 | -------------------------------------------------------------------------------- /inference-energy-cal/related-work-estimate.py: -------------------------------------------------------------------------------- 1 | import hw_kernels 2 | 3 | adder = hw_kernels.adder(8) 4 | mul = hw_kernels.multiplier(8) 5 | reg = hw_kernels.register(8) 6 | mac_8_bit = adder.get_dpower() + mul.get_dpower() + reg.get_dpower() 7 | acc_8_bit = adder.get_dpower() + reg.get_dpower() 8 | scale = 1/250 9 | 10 | ######################### TDBN ################################### 11 | 12 | # The work provide the estimate adds and muls numbers 13 | 14 | add_n = 1.8e9 15 | mul_n = 3.4e7 16 | 17 | # Assuming 8 bits, 300MHz 18 | adder = hw_kernels.adder(8) 19 | mul = hw_kernels.multiplier(8) 20 | 21 | tdbn_total_estimated_energy = (add_n*adder.get_dpower() + mul_n*mul.get_dpower()) * scale 22 | 23 | print('TDBN estimated energy in (/8-bit int MAC): ', round(tdbn_total_estimated_energy,2)) 24 | 25 | 26 | ######################### TSSL ################################### 27 | 28 | Conv = [128, 256,'m',512,'m',1024,'m',512] 29 | Linear = [1024,512] 30 | T = 5 31 | spa = 0.901 32 | 33 | kernel_size = 3*3 34 | input_channel = 3 35 | img_size = 32 36 | 37 | n_acc = 0 38 | for c in Conv: 39 | if type(c) is int: 40 | n_acc += input_channel*kernel_size*c*img_size*img_size 41 | input_channel = c 42 | else: 43 | img_size = img_size/2 44 | n_acc = n_acc + (512*img_size*img_size*1024) + 1024*512 45 | tssl_total_estimated_energy = (n_acc * acc_8_bit * T * (1-spa)) * scale 46 | 47 | print('TSSL estimated energy in (/8-bit int MAC): ', round(tssl_total_estimated_energy,2)) 48 | 49 | 50 | ######################### Direct ################################# 51 | 52 | Conv = [128,256,'m',512,'m',1024,'m',512] 53 | Linear = [1024,512] 54 | T = 10 55 | spa = 0.90 56 | 57 | kernel_size = 3*3 58 | input_channel = 3 59 | img_size = 32 60 | 61 | n_acc = 0 62 | for c in Conv: 63 | if type(c) is int: 64 | n_acc += input_channel*kernel_size*c*img_size*img_size 65 | input_channel = c 66 | else: 67 | img_size = img_size/2 68 | n_acc = n_acc + (512*img_size*img_size*1024) + 1024*512 69 | tssl_total_estimated_energy = (n_acc * acc_8_bit * T * (1-spa)) * scale 70 | print('Direct estimated energy in (/8-bit int MAC): ', round(tssl_total_estimated_energy,2)) 71 | 72 | 73 | 74 | ######################### BNTT ################################# 75 | 76 | Conv = [64, 64, "M", 128, 128, "M", 256, 256, 256, "M"] 77 | Linear = [1024,512] 78 | T = 20 79 | spa = 0.91 80 | 81 | kernel_size = 3*3 82 | input_channel = 3 83 | img_size = 32 84 | 85 | W_size = 0 86 | U_size = 0 87 | w_bit = 8 88 | u_bit = 32 89 | membrane_size = img_size 90 | batch = 1 91 | 92 | n_acc = 0 93 | for c in Conv: 94 | if type(c) is int: 95 | n_acc += input_channel*kernel_size*c*img_size*img_size 96 | U_size += c*(membrane_size**2)*batch 97 | W_size += input_channel*c*kernel_size 98 | input_channel = c 99 | else: 100 | img_size = img_size/2 101 | membrane_size = membrane_size/2 102 | n_acc = n_acc + (512*img_size*img_size*1024) + 1024*512 103 | # print(n_acc) 104 | tssl_total_estimated_energy = (n_acc * acc_8_bit * T * (1-spa)) * scale 105 | print('BNTT estimated energy in (/8-bit int MAC): ', round(tssl_total_estimated_energy,2)) 106 | 107 | # U_size = U_size + 1024 + 512 108 | # U_size = U_size*u_bit/8 ### bits to MB 109 | 110 | # W_size += (512*img_size*img_size*1024) + 1024*512 111 | 112 | # W_size = W_size*w_bit/8 ### bits to MB 113 | 114 | # print('U size', U_size) 115 | # print('W size', W_size) 116 | 117 | # print(acc_8_bit*1e-3*(1/300e6)) -------------------------------------------------------------------------------- /inference-energy-cal/results/bntt/comp-stat.yaml: -------------------------------------------------------------------------------- 1 | lif: 2 | area: 0.000129 3 | dpower: 4 | n: 0.05625 5 | y: 0.01886 6 | lpower: 8.4e-05 7 | spad: 8 | area: 0.000891 9 | dpower: 10 | n: 0.21233 11 | y: 0.11241 12 | lpower: 0.000523 13 | spike-mac: 14 | area: 9.7e-05 15 | dpower: 16 | n: 0.01924 17 | y: 0.02511 18 | lpower: 6.2e-05 19 | -------------------------------------------------------------------------------- /inference-energy-cal/results/bntt/computation-energy.yaml: -------------------------------------------------------------------------------- 1 | lif: 2 | energy-leakage: 503.36732544000006 3 | energy-operation: 186.84979199999998 4 | energy-ungated: 337076.334 5 | total: 337766.55111744 6 | spad: 7 | energy-leakage: 3134.0608476800003 8 | energy-operation: 53406.3540843 9 | energy-ungated: 1272380.7644128 10 | total: 1328921.1793447803 11 | spike-mac: 12 | energy-leakage: 371.53302592 13 | energy-operation: 11929.8421053 14 | energy-ungated: 115295.0873984 15 | total: 127596.46252962001 16 | total: 1794284.1929918402 17 | -------------------------------------------------------------------------------- /inference-energy-cal/results/bntt/cycle-stat.yaml: -------------------------------------------------------------------------------- 1 | Layer 0: 2 | DRAM Filter Reads: 1728.0 3 | DRAM IFMAP Reads: 3072.0 4 | DRAM OFMAP Writes: 57607.0 5 | SRAM Filter Cycles: 14085.0 6 | SRAM Filter Reads: 1728.0 7 | SRAM IFMAP Cycles: 22324.0 8 | SRAM IFMAP Reads: 194400.0 9 | SRAM OFMAP Cycles: 22317.0 10 | SRAM OFMAP Start Cycle: 26.0 11 | SRAM OFMAP Writes: 68544.0 12 | Layer 1: 13 | DRAM Filter Reads: 36864.0 14 | DRAM IFMAP Reads: 649682.0 15 | DRAM OFMAP Writes: 57607.0 16 | SRAM Filter Cycles: 269221.0 17 | SRAM Filter Reads: 36864.0 18 | SRAM IFMAP Cycles: 272668.0 19 | SRAM IFMAP Reads: 4147200.0 20 | SRAM OFMAP Cycles: 272112.0 21 | SRAM OFMAP Start Cycle: 575.0 22 | SRAM OFMAP Writes: 68544.0 23 | Layer 2: 24 | DRAM Filter Reads: 73728.0 25 | DRAM IFMAP Reads: 276832.0 26 | DRAM OFMAP Writes: 25095.0 27 | SRAM Filter Cycles: 134565.0 28 | SRAM Filter Reads: 73728.0 29 | SRAM IFMAP Cycles: 124364.0 30 | SRAM IFMAP Reads: 1806336.0 31 | SRAM OFMAP Cycles: 123808.0 32 | SRAM OFMAP Start Cycle: 575.0 33 | SRAM OFMAP Writes: 30080.0 34 | Layer 3: 35 | DRAM Filter Reads: 147456.0 36 | DRAM IFMAP Reads: 8192.0 37 | DRAM OFMAP Writes: 4608.0 38 | SRAM Filter Cycles: 85189.0 39 | SRAM Filter Reads: 147456.0 40 | SRAM IFMAP Cycles: 56332.0 41 | SRAM IFMAP Reads: 663552.0 42 | SRAM OFMAP Cycles: 55200.0 43 | SRAM OFMAP Start Cycle: 1151.0 44 | SRAM OFMAP Writes: 5760.0 45 | Layer 4: 46 | DRAM Filter Reads: 294912.0 47 | DRAM IFMAP Reads: 2048.0 48 | DRAM OFMAP Writes: 1024.0 49 | SRAM Filter Cycles: 96741.0 50 | SRAM Filter Reads: 294912.0 51 | SRAM IFMAP Cycles: 37548.0 52 | SRAM IFMAP Reads: 147456.0 53 | SRAM OFMAP Cycles: 36404.0 54 | SRAM OFMAP Start Cycle: 1163.0 55 | SRAM OFMAP Writes: 1792.0 56 | Layer 5: 57 | DRAM Filter Reads: 589824.0 58 | DRAM IFMAP Reads: 4096.0 59 | DRAM OFMAP Writes: 1024.0 60 | SRAM Filter Cycles: 193509.0 61 | SRAM Filter Reads: 589824.0 62 | SRAM IFMAP Cycles: 74412.0 63 | SRAM IFMAP Reads: 294912.0 64 | SRAM OFMAP Cycles: 72116.0 65 | SRAM OFMAP Start Cycle: 2315.0 66 | SRAM OFMAP Writes: 1792.0 67 | Layer 6: 68 | DRAM Filter Reads: 589824.0 69 | DRAM IFMAP Reads: 4096.0 70 | DRAM OFMAP Writes: 1024.0 71 | SRAM Filter Cycles: 193509.0 72 | SRAM Filter Reads: 589824.0 73 | SRAM IFMAP Cycles: 74412.0 74 | SRAM IFMAP Reads: 294912.0 75 | SRAM OFMAP Cycles: 72116.0 76 | SRAM OFMAP Start Cycle: 2315.0 77 | SRAM OFMAP Writes: 1792.0 78 | Layer 7: 79 | DRAM Filter Reads: 1048576.0 80 | DRAM IFMAP Reads: 1024.0 81 | DRAM OFMAP Writes: 1024.0 82 | SRAM Filter Cycles: 319464.0 83 | SRAM Filter Reads: 1048576.0 84 | SRAM IFMAP Cycles: 133865.0 85 | SRAM IFMAP Reads: 131072.0 86 | SRAM OFMAP Cycles: 132849.0 87 | SRAM OFMAP Start Cycle: 1038.0 88 | SRAM OFMAP Writes: 4096.0 89 | Layer 8: 90 | DRAM Filter Reads: 524288.0 91 | DRAM IFMAP Reads: 1024.0 92 | DRAM OFMAP Writes: 512.0 93 | SRAM Filter Cycles: 159720.0 94 | SRAM Filter Reads: 524288.0 95 | SRAM IFMAP Cycles: 66921.0 96 | SRAM IFMAP Reads: 65536.0 97 | SRAM OFMAP Cycles: 65905.0 98 | SRAM OFMAP Start Cycle: 1038.0 99 | SRAM OFMAP Writes: 2048.0 100 | -------------------------------------------------------------------------------- /inference-energy-cal/results/bntt/mem-stat.yaml: -------------------------------------------------------------------------------- 1 | DRAM: 2 | - name: DRAM-system 3 | read energy: 0.468762 4 | SRAM: 5 | - name: GLB-ifmap 6 | read dynamic energy: 0.0110295 7 | write dynamic energy: 0.012772 8 | leakage power: 6.25862 9 | - name: GLB-ofmap 10 | read dynamic energy: 0.0110295 11 | write dynamic energy: 0.012772 12 | leakage power: 6.25862 13 | - name: GLB-weight 14 | read dynamic energy: 0.0286315 15 | write dynamic energy: 0.0282197 16 | leakage power: 24.9086 17 | -------------------------------------------------------------------------------- /inference-energy-cal/results/bntt/memory-energy.yaml: -------------------------------------------------------------------------------- 1 | DRAM-system: 2 | ifmap: 445354.838292 3 | ofmap: 70091.63805000001 4 | total: 2065736.1627419998 5 | weight: 1550289.6864 6 | GLB-ifmap: 7 | ifmap-dynamic: 85427.624592 8 | ifmap-leakage: 293004.53965265 9 | ifmap-total: 378432.16424465 10 | GLB-ofmap: 11 | ofmap-dynamic: 2355.769856 12 | ofmap-leakage: 293004.53965265 13 | ofmap-total: 295360.30950865004 14 | GLB-weight: 15 | weight-dynamic: 94690.0968 16 | weight-leakage: 1166124.9407045 17 | weight-total: 1260815.0375045 18 | dram_total: 2065736.1627419998 19 | sram_total: 1934607.5112578 20 | -------------------------------------------------------------------------------- /inference-energy-cal/results/direct/comp-stat.yaml: -------------------------------------------------------------------------------- 1 | lif: 2 | area: 0.000129 3 | dpower: 4 | n: 0.05625 5 | y: 0.01886 6 | lpower: 8.4e-05 7 | spad: 8 | area: 0.000891 9 | dpower: 10 | n: 0.21233 11 | y: 0.11241 12 | lpower: 0.000523 13 | spike-mac: 14 | area: 9.7e-05 15 | dpower: 16 | n: 0.01924 17 | y: 0.02511 18 | lpower: 6.2e-05 19 | -------------------------------------------------------------------------------- /inference-energy-cal/results/direct/computation-energy.yaml: -------------------------------------------------------------------------------- 1 | lif: 2 | energy-leakage: 2349.9881664 3 | energy-operation: 288.48256 4 | energy-ungated: 1573652.79 5 | total: 1576291.2607264 6 | spad: 7 | energy-leakage: 14631.4739408 8 | energy-operation: 299354.052736575 9 | energy-ungated: 5940154.611568 10 | total: 6254140.138245375 11 | spike-mac: 12 | energy-leakage: 1734.5150752000002 13 | energy-operation: 66869.32002682501 14 | energy-ungated: 538259.194304 15 | total: 606863.0294060251 16 | total: 8437294.4283778 17 | -------------------------------------------------------------------------------- /inference-energy-cal/results/direct/cycle-stat.yaml: -------------------------------------------------------------------------------- 1 | Layer 0: 2 | DRAM Filter Reads: 3456.0 3 | DRAM IFMAP Reads: 3072.0 4 | DRAM OFMAP Writes: 115207.0 5 | SRAM Filter Cycles: 29093.0 6 | SRAM Filter Reads: 3456.0 7 | SRAM IFMAP Cycles: 44668.0 8 | SRAM IFMAP Reads: 388800.0 9 | SRAM OFMAP Cycles: 44661.0 10 | SRAM OFMAP Start Cycle: 26.0 11 | SRAM OFMAP Writes: 137088.0 12 | Layer 1: 13 | DRAM Filter Reads: 294912.0 14 | DRAM IFMAP Reads: 7501612.0 15 | DRAM OFMAP Writes: 230407.0 16 | SRAM Filter Cycles: 2160229.0 17 | SRAM Filter Reads: 294912.0 18 | SRAM IFMAP Cycles: 2141356.0 19 | SRAM IFMAP Reads: 33177600.0 20 | SRAM OFMAP Cycles: 2140224.0 21 | SRAM OFMAP Start Cycle: 1151.0 22 | SRAM OFMAP Writes: 274176.0 23 | Layer 2: 24 | DRAM Filter Reads: 1179648.0 25 | DRAM IFMAP Reads: 6799586.0 26 | DRAM OFMAP Writes: 100359.0 27 | SRAM Filter Cycles: 2156325.0 28 | SRAM Filter Reads: 1179648.0 29 | SRAM IFMAP Cycles: 1935212.0 30 | SRAM IFMAP Reads: 28901376.0 31 | SRAM OFMAP Cycles: 1932928.0 32 | SRAM OFMAP Start Cycle: 2303.0 33 | SRAM OFMAP Writes: 120320.0 34 | Layer 3: 35 | DRAM Filter Reads: 4718592.0 36 | DRAM IFMAP Reads: 7320866.0 37 | DRAM OFMAP Writes: 36871.0 38 | SRAM Filter Cycles: 2727877.0 39 | SRAM Filter Reads: 4718592.0 40 | SRAM IFMAP Cycles: 1777900.0 41 | SRAM IFMAP Reads: 21233664.0 42 | SRAM OFMAP Cycles: 1773312.0 43 | SRAM OFMAP Start Cycle: 4607.0 44 | SRAM OFMAP Writes: 46080.0 45 | Layer 4: 46 | DRAM Filter Reads: 4718592.0 47 | DRAM IFMAP Reads: 1058891.0 48 | DRAM OFMAP Writes: 2048.0 49 | SRAM Filter Cycles: 1548261.0 50 | SRAM Filter Reads: 4718592.0 51 | SRAM IFMAP Cycles: 591212.0 52 | SRAM IFMAP Reads: 2359296.0 53 | SRAM OFMAP Cycles: 582004.0 54 | SRAM OFMAP Start Cycle: 9227.0 55 | SRAM OFMAP Writes: 3584.0 56 | Layer 5: 57 | DRAM Filter Reads: 8388608.0 58 | DRAM IFMAP Reads: 8192.0 59 | DRAM OFMAP Writes: 1024.0 60 | SRAM Filter Cycles: 2555880.0 61 | SRAM Filter Reads: 8388608.0 62 | SRAM IFMAP Cycles: 1051369.0 63 | SRAM IFMAP Reads: 1048576.0 64 | SRAM OFMAP Cycles: 1043185.0 65 | SRAM OFMAP Start Cycle: 8206.0 66 | SRAM OFMAP Writes: 4096.0 67 | Layer 6: 68 | DRAM Filter Reads: 524288.0 69 | DRAM IFMAP Reads: 1024.0 70 | DRAM OFMAP Writes: 512.0 71 | SRAM Filter Cycles: 159720.0 72 | SRAM Filter Reads: 524288.0 73 | SRAM IFMAP Cycles: 66921.0 74 | SRAM IFMAP Reads: 65536.0 75 | SRAM OFMAP Cycles: 65905.0 76 | SRAM OFMAP Start Cycle: 1038.0 77 | SRAM OFMAP Writes: 2048.0 78 | -------------------------------------------------------------------------------- /inference-energy-cal/results/direct/mem-stat.yaml: -------------------------------------------------------------------------------- 1 | DRAM: 2 | - name: DRAM-system 3 | read energy: 0.468762 4 | SRAM: 5 | - name: GLB-ifmap 6 | read dynamic energy: 0.0110295 7 | write dynamic energy: 0.012772 8 | leakage power: 6.25862 9 | - name: GLB-ofmap 10 | read dynamic energy: 0.0110295 11 | write dynamic energy: 0.012772 12 | leakage power: 6.25862 13 | - name: GLB-weight 14 | read dynamic energy: 0.0286315 15 | write dynamic energy: 0.0282197 16 | leakage power: 24.9086 17 | -------------------------------------------------------------------------------- /inference-energy-cal/results/direct/memory-energy.yaml: -------------------------------------------------------------------------------- 1 | DRAM-system: 2 | ifmap: 10637729.975166 3 | ofmap: 228018.96213600002 4 | total: 20160406.874454 5 | weight: 9294657.937152 6 | GLB-ifmap: 7 | ifmap-dynamic: 961494.9860159999 8 | ifmap-leakage: 1367902.05896525 9 | ifmap-total: 2329397.0449812496 10 | GLB-ofmap: 11 | ofmap-dynamic: 7502.170624 12 | ofmap-leakage: 1367902.05896525 13 | ofmap-total: 1375404.22958925 14 | GLB-weight: 15 | weight-dynamic: 567708.130624 16 | weight-leakage: 5444095.5395825 17 | weight-total: 6011803.6702065 18 | dram_total: 20160406.874454 19 | sram_total: 9716604.944777 20 | -------------------------------------------------------------------------------- /inference-energy-cal/results/tdbn/comp-stat.yaml: -------------------------------------------------------------------------------- 1 | lif: 2 | area: 0.000129 3 | dpower: 4 | n: 0.05625 5 | y: 0.01886 6 | lpower: 8.4e-05 7 | spad: 8 | area: 0.000891 9 | dpower: 10 | n: 0.21233 11 | y: 0.11241 12 | lpower: 0.000523 13 | spike-mac: 14 | area: 9.7e-05 15 | dpower: 16 | n: 0.01924 17 | y: 0.02511 18 | lpower: 6.2e-05 19 | -------------------------------------------------------------------------------- /inference-energy-cal/results/tdbn/computation-energy.yaml: -------------------------------------------------------------------------------- 1 | lif: 2 | energy-leakage: 2149.7978073599998 3 | energy-operation: 272.3565056 4 | energy-ungated: 1439596.7459999998 5 | total: 1442018.90031296 6 | spad: 7 | energy-leakage: 13385.05063392 8 | energy-operation: 385679.407785075 9 | energy-ungated: 5434125.8147232 10 | total: 5833190.273142195 11 | spike-mac: 12 | energy-leakage: 1586.75552448 13 | energy-operation: 86152.56587032501 14 | energy-ungated: 492406.0692096 15 | total: 580145.390604405 16 | total: 7855354.56405956 17 | -------------------------------------------------------------------------------- /inference-energy-cal/results/tdbn/mem-stat.yaml: -------------------------------------------------------------------------------- 1 | DRAM: 2 | - name: DRAM-system 3 | read energy: 0.468762 4 | SRAM: 5 | - name: GLB-ifmap 6 | read dynamic energy: 0.0110295 7 | write dynamic energy: 0.012772 8 | leakage power: 6.25862 9 | - name: GLB-ofmap 10 | read dynamic energy: 0.0110295 11 | write dynamic energy: 0.012772 12 | leakage power: 6.25862 13 | - name: GLB-weight 14 | read dynamic energy: 0.0286315 15 | write dynamic energy: 0.0282197 16 | leakage power: 24.9086 17 | -------------------------------------------------------------------------------- /inference-energy-cal/results/tdbn/memory-energy.yaml: -------------------------------------------------------------------------------- 1 | DRAM-system: 2 | ifmap: 28812870.246846 3 | ofmap: 567790.31631 4 | total: 36171574.406100005 5 | weight: 6790913.842944 6 | GLB-ifmap: 7 | ifmap-dynamic: 2461365.8084159996 8 | ifmap-leakage: 1251373.4703403502 9 | ifmap-total: 3712739.27875635 10 | GLB-ofmap: 11 | ofmap-dynamic: 18484.864512 12 | ofmap-leakage: 1251373.4703403502 13 | ofmap-total: 1269858.3348523502 14 | GLB-weight: 15 | weight-dynamic: 414782.020928 16 | weight-leakage: 4980324.931585501 17 | weight-total: 5395106.952513501 18 | dram_total: 36171574.406100005 19 | sram_total: 10377704.5661222 20 | -------------------------------------------------------------------------------- /inference-energy-cal/results/tssl/comp-stat.yaml: -------------------------------------------------------------------------------- 1 | lif: 2 | area: 0.000129 3 | dpower: 4 | n: 0.05625 5 | y: 0.01886 6 | lpower: 8.4e-05 7 | spad: 8 | area: 0.000891 9 | dpower: 10 | n: 0.21233 11 | y: 0.11241 12 | lpower: 0.000523 13 | spike-mac: 14 | area: 9.7e-05 15 | dpower: 16 | n: 0.01924 17 | y: 0.02511 18 | lpower: 6.2e-05 19 | -------------------------------------------------------------------------------- /inference-energy-cal/results/tssl/computation-energy.yaml: -------------------------------------------------------------------------------- 1 | lif: 2 | energy-leakage: 1327.3685376 3 | energy-operation: 144.24128 4 | energy-ungated: 888862.86 5 | total: 890334.4698176 6 | spad: 7 | energy-leakage: 8264.4493472 8 | energy-operation: 149677.02622777497 9 | energy-ungated: 3355240.018912 10 | total: 3513181.494486975 11 | spike-mac: 12 | energy-leakage: 979.7243968000001 13 | energy-operation: 33434.659982025005 14 | energy-ungated: 304030.603136 15 | total: 338444.98751482496 16 | total: 4741960.951819399 17 | -------------------------------------------------------------------------------- /inference-energy-cal/results/tssl/cycle-stat.yaml: -------------------------------------------------------------------------------- 1 | Layer 0: 2 | DRAM Filter Reads: 3456.0 3 | DRAM IFMAP Reads: 3072.0 4 | DRAM OFMAP Writes: 115207.0 5 | SRAM Filter Cycles: 29093.0 6 | SRAM Filter Reads: 3456.0 7 | SRAM IFMAP Cycles: 44668.0 8 | SRAM IFMAP Reads: 388800.0 9 | SRAM OFMAP Cycles: 44661.0 10 | SRAM OFMAP Start Cycle: 26.0 11 | SRAM OFMAP Writes: 137088.0 12 | Layer 1: 13 | DRAM Filter Reads: 294912.0 14 | DRAM IFMAP Reads: 7501612.0 15 | DRAM OFMAP Writes: 230407.0 16 | SRAM Filter Cycles: 2160229.0 17 | SRAM Filter Reads: 294912.0 18 | SRAM IFMAP Cycles: 2141356.0 19 | SRAM IFMAP Reads: 33177600.0 20 | SRAM OFMAP Cycles: 2140224.0 21 | SRAM OFMAP Start Cycle: 1151.0 22 | SRAM OFMAP Writes: 274176.0 23 | Layer 2: 24 | DRAM Filter Reads: 1179648.0 25 | DRAM IFMAP Reads: 6799586.0 26 | DRAM OFMAP Writes: 100359.0 27 | SRAM Filter Cycles: 2156325.0 28 | SRAM Filter Reads: 1179648.0 29 | SRAM IFMAP Cycles: 1935212.0 30 | SRAM IFMAP Reads: 28901376.0 31 | SRAM OFMAP Cycles: 1932928.0 32 | SRAM OFMAP Start Cycle: 2303.0 33 | SRAM OFMAP Writes: 120320.0 34 | Layer 3: 35 | DRAM Filter Reads: 4718592.0 36 | DRAM IFMAP Reads: 7320866.0 37 | DRAM OFMAP Writes: 36871.0 38 | SRAM Filter Cycles: 2727877.0 39 | SRAM Filter Reads: 4718592.0 40 | SRAM IFMAP Cycles: 1777900.0 41 | SRAM IFMAP Reads: 21233664.0 42 | SRAM OFMAP Cycles: 1773312.0 43 | SRAM OFMAP Start Cycle: 4607.0 44 | SRAM OFMAP Writes: 46080.0 45 | Layer 4: 46 | DRAM Filter Reads: 4718592.0 47 | DRAM IFMAP Reads: 1058891.0 48 | DRAM OFMAP Writes: 2048.0 49 | SRAM Filter Cycles: 1548261.0 50 | SRAM Filter Reads: 4718592.0 51 | SRAM IFMAP Cycles: 591212.0 52 | SRAM IFMAP Reads: 2359296.0 53 | SRAM OFMAP Cycles: 582004.0 54 | SRAM OFMAP Start Cycle: 9227.0 55 | SRAM OFMAP Writes: 3584.0 56 | Layer 5: 57 | DRAM Filter Reads: 8388608.0 58 | DRAM IFMAP Reads: 8192.0 59 | DRAM OFMAP Writes: 1024.0 60 | SRAM Filter Cycles: 2555880.0 61 | SRAM Filter Reads: 8388608.0 62 | SRAM IFMAP Cycles: 1051369.0 63 | SRAM IFMAP Reads: 1048576.0 64 | SRAM OFMAP Cycles: 1043185.0 65 | SRAM OFMAP Start Cycle: 8206.0 66 | SRAM OFMAP Writes: 4096.0 67 | Layer 6: 68 | DRAM Filter Reads: 524288.0 69 | DRAM IFMAP Reads: 1024.0 70 | DRAM OFMAP Writes: 512.0 71 | SRAM Filter Cycles: 159720.0 72 | SRAM Filter Reads: 524288.0 73 | SRAM IFMAP Cycles: 66921.0 74 | SRAM IFMAP Reads: 65536.0 75 | SRAM OFMAP Cycles: 65905.0 76 | SRAM OFMAP Start Cycle: 1038.0 77 | SRAM OFMAP Writes: 2048.0 78 | -------------------------------------------------------------------------------- /inference-energy-cal/results/tssl/mem-stat.yaml: -------------------------------------------------------------------------------- 1 | DRAM: 2 | - name: DRAM-system 3 | read energy: 0.468762 4 | SRAM: 5 | - name: GLB-ifmap 6 | read dynamic energy: 0.0110295 7 | write dynamic energy: 0.012772 8 | leakage power: 6.25862 9 | - name: GLB-ofmap 10 | read dynamic energy: 0.0110295 11 | write dynamic energy: 0.012772 12 | leakage power: 6.25862 13 | - name: GLB-weight 14 | read dynamic energy: 0.0286315 15 | write dynamic energy: 0.0282197 16 | leakage power: 24.9086 17 | -------------------------------------------------------------------------------- /inference-energy-cal/results/tssl/memory-energy.yaml: -------------------------------------------------------------------------------- 1 | DRAM-system: 2 | ifmap: 10637729.975166 3 | ofmap: 228018.96213600002 4 | total: 20160406.874454 5 | weight: 9294657.937152 6 | GLB-ifmap: 7 | ifmap-dynamic: 961494.9860159999 8 | ifmap-leakage: 772646.5101185001 9 | ifmap-total: 1734141.4961345 10 | GLB-ofmap: 11 | ofmap-dynamic: 7502.170624 12 | ofmap-leakage: 772646.5101185001 13 | ofmap-total: 780148.6807425001 14 | GLB-weight: 15 | weight-dynamic: 567708.130624 16 | weight-leakage: 3075045.754805 17 | weight-total: 3642753.885429 18 | dram_total: 20160406.874454 19 | sram_total: 6157044.062306 20 | -------------------------------------------------------------------------------- /inference-energy-cal/run.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | 3 | 4 | def print_sata_sim_banner(): 5 | banner = r""" 6 | _____ _______ _____ _____ __ __ 7 | / ____| /\|__ __|/\ / ____|_ _| \/ | 8 | | (___ / \ | | / \ ______ | (___ | | | \ / | 9 | \___ \ / /\ \ | | / /\ \ |______| \___ \ | | | |\/| | 10 | ____) / ____ \| |/ ____ \ ____) |_| |_| | | | 11 | |_____/_/ \_\_/_/ \_\ |_____/|_____|_| |_| 12 | 13 | """ 14 | print(banner) 15 | 16 | if __name__ == "__main__": 17 | 18 | 19 | print_sata_sim_banner() 20 | subprocess.run('python3 comp-utils.py', shell=True) 21 | subprocess.run('python3 mem-utils.py', shell=True) 22 | subprocess.run('python3 cycle-utils.py', shell=True) 23 | subprocess.run('python3 energy-cal.py', shell=True) 24 | -------------------------------------------------------------------------------- /inference-energy-cal/sata-config.yaml: -------------------------------------------------------------------------------- 1 | architecture: 2 | name: SATA-inference 3 | dataflow: sata 4 | # timestep: 8 5 | clock-frequency: 400 6 | 7 | subtree: 8 | - name: PE-array 9 | class: pe-array 10 | attributes: 11 | width: 8 12 | height: 16 13 | 14 | subtree: 15 | - name: PE-compute 16 | class: pe-compute 17 | 18 | local: 19 | - name: and-gate 20 | class: compute 21 | act-tag: spike-mac 22 | attributes: 23 | kernel: andgate 24 | width: 8 25 | gated: n 26 | count: 1 27 | - name: adder 28 | class: compute 29 | act-tag: spike-mac 30 | attributes: 31 | kernel: adder 32 | width: 8 33 | gated: n 34 | count: 1 35 | - name: reg-acc 36 | class: compute 37 | act-tag: spike-mac 38 | attributes: 39 | kernel: register 40 | width: 16 41 | gated: y 42 | count: 1 43 | - name: comp-lif 44 | class: compute 45 | act-tag: lif 46 | attributes: 47 | kernel: comparator 48 | width: 16 49 | gated: n 50 | count: 1 51 | - name: sub-lif 52 | class: compute 53 | act-tag: lif 54 | attributes: 55 | kernel: subtractor 56 | width: 16 57 | gated: n 58 | count: 1 59 | - name: reg-th 60 | class: compute 61 | act-tag: lif 62 | attributes: 63 | kernel: register 64 | width: 4 65 | gated: y 66 | count: 1 67 | - name: reg-membrane 68 | class: compute 69 | act-tag: lif 70 | attributes: 71 | kernel: register 72 | width: 8 73 | gated: y 74 | count: 1 75 | 76 | - name: PE-mem 77 | class: pe-mem 78 | 79 | local: 80 | - name: SPAD-spike 81 | class: memory 82 | act-tag: spad 83 | attributes: 84 | kernel: registerfiles 85 | width: 8 86 | size-bytes: 9 87 | count: 1 88 | gated: n 89 | - name: SPAD-weight 90 | class: memory 91 | act-tag: spad 92 | attributes: 93 | kernel: registerfiles 94 | width: 8 95 | size-bytes: 9 96 | count: 1 97 | gated: y 98 | - name: SPAD-psum 99 | class: memory 100 | act-tag: spad 101 | attributes: 102 | kernel: registerfiles 103 | width: 8 104 | size-bytes: 8 105 | count: 1 106 | gated: n 107 | 108 | - name: GLBs 109 | class: glbs 110 | 111 | local: 112 | - name: GLB-weight 113 | class: memory 114 | act-tag: glb-weight 115 | attributes: 116 | kernel: sram 117 | width: 32 118 | size-bytes: 147456 119 | bank: 2 120 | count: 1 121 | gated: y 122 | - name: GLB-ifmap 123 | class: memory 124 | act-tag: glb-ifmap 125 | attributes: 126 | kernel: sram 127 | width: 32 128 | size-bytes: 32768 129 | bank: 2 130 | count: 1 131 | gated: y 132 | - name: GLB-ofmap 133 | class: memory 134 | act-tag: glb-ofmap 135 | attributes: 136 | kernel: sram 137 | width: 32 138 | size-bytes: 32768 139 | bank: 2 140 | count: 1 141 | gated: y 142 | 143 | - name: DRAMs 144 | class: dram 145 | 146 | local: 147 | - name: DRAM-system 148 | class: memory 149 | act-tag: dram 150 | attributes: 151 | kernel: dram 152 | width: 128 153 | size-bytes: 536870912 154 | bank: 16 155 | count: 1 156 | gated: y 157 | -------------------------------------------------------------------------------- /inference-energy-cal/workload.yaml: -------------------------------------------------------------------------------- 1 | General: 2 | timestep: 4 3 | sparsity: 0.85 4 | 5 | Layers: 6 | - name: Conv1 7 | attributes: 8 | IFMAP Height: 32 9 | IFMAP Width: 32 10 | Filter Height: 3 11 | Filter Width: 3 12 | Channels: 3 13 | Num Filter: 128 14 | Strides: 1 15 | - name: Conv2 16 | attributes: 17 | IFMAP Height: 32 18 | IFMAP Width: 32 19 | Filter Height: 3 20 | Filter Width: 3 21 | Channels: 128 22 | Num Filter: 128 23 | Strides: 1 24 | - name: Conv3 25 | attributes: 26 | IFMAP Height: 32 27 | IFMAP Width: 32 28 | Filter Height: 3 29 | Filter Width: 3 30 | Channels: 128 31 | Num Filter: 128 32 | Strides: 1 33 | - name: Conv4 34 | attributes: 35 | IFMAP Height: 32 36 | IFMAP Width: 32 37 | Filter Height: 3 38 | Filter Width: 3 39 | Channels: 128 40 | Num Filter: 128 41 | Strides: 1 42 | - name: Conv5 43 | attributes: 44 | IFMAP Height: 32 45 | IFMAP Width: 32 46 | Filter Height: 3 47 | Filter Width: 3 48 | Channels: 128 49 | Num Filter: 128 50 | Strides: 1 51 | - name: Conv6 52 | attributes: 53 | IFMAP Height: 32 54 | IFMAP Width: 32 55 | Filter Height: 3 56 | Filter Width: 3 57 | Channels: 128 58 | Num Filter: 128 59 | Strides: 1 60 | - name: Conv7 61 | attributes: 62 | IFMAP Height: 32 63 | IFMAP Width: 32 64 | Filter Height: 3 65 | Filter Width: 3 66 | Channels: 128 67 | Num Filter: 128 68 | Strides: 1 69 | - name: Conv8 70 | attributes: 71 | IFMAP Height: 32 72 | IFMAP Width: 32 73 | Filter Height: 3 74 | Filter Width: 3 75 | Channels: 128 76 | Num Filter: 256 77 | Strides: 2 78 | - name: Conv9 79 | attributes: 80 | IFMAP Height: 16 81 | IFMAP Width: 16 82 | Filter Height: 3 83 | Filter Width: 3 84 | Channels: 256 85 | Num Filter: 256 86 | Strides: 1 87 | - name: Conv10 88 | attributes: 89 | IFMAP Height: 16 90 | IFMAP Width: 16 91 | Filter Height: 3 92 | Filter Width: 3 93 | Channels: 256 94 | Num Filter: 256 95 | Strides: 1 96 | - name: Conv11 97 | attributes: 98 | IFMAP Height: 16 99 | IFMAP Width: 16 100 | Filter Height: 3 101 | Filter Width: 3 102 | Channels: 256 103 | Num Filter: 256 104 | Strides: 1 105 | - name: Conv12 106 | attributes: 107 | IFMAP Height: 16 108 | IFMAP Width: 16 109 | Filter Height: 3 110 | Filter Width: 3 111 | Channels: 256 112 | Num Filter: 256 113 | Strides: 1 114 | - name: Conv13 115 | attributes: 116 | IFMAP Height: 16 117 | IFMAP Width: 16 118 | Filter Height: 3 119 | Filter Width: 3 120 | Channels: 256 121 | Num Filter: 256 122 | Strides: 1 123 | - name: Conv14 124 | attributes: 125 | IFMAP Height: 16 126 | IFMAP Width: 16 127 | Filter Height: 3 128 | Filter Width: 3 129 | Channels: 256 130 | Num Filter: 512 131 | Strides: 2 132 | - name: Conv15 133 | attributes: 134 | IFMAP Height: 8 135 | IFMAP Width: 8 136 | Filter Height: 3 137 | Filter Width: 3 138 | Channels: 512 139 | Num Filter: 512 140 | Strides: 1 141 | - name: Conv16 142 | attributes: 143 | IFMAP Height: 8 144 | IFMAP Width: 8 145 | Filter Height: 3 146 | Filter Width: 3 147 | Channels: 512 148 | Num Filter: 512 149 | Strides: 1 150 | - name: Conv17 151 | attributes: 152 | IFMAP Height: 8 153 | IFMAP Width: 8 154 | Filter Height: 3 155 | Filter Width: 3 156 | Channels: 512 157 | Num Filter: 512 158 | Strides: 1 159 | - name: FC1 160 | attributes: 161 | IFMAP Height: 1 162 | IFMAP Width: 1 163 | Filter Height: 1 164 | Filter Width: 1 165 | Channels: 8192 166 | Num Filter: 256 167 | Strides: 1 -------------------------------------------------------------------------------- /inference-energy-cal/workloads/workload._bntt.yaml: -------------------------------------------------------------------------------- 1 | General: 2 | timestep: 20 3 | sparsity: 0.91 4 | 5 | Layers: 6 | - name: Conv1 7 | attributes: 8 | IFMAP Height: 32 9 | IFMAP Width: 32 10 | Filter Height: 3 11 | Filter Width: 3 12 | Channels: 3 13 | Num Filter: 64 14 | Strides: 1 15 | - name: Conv2 16 | attributes: 17 | IFMAP Height: 32 18 | IFMAP Width: 32 19 | Filter Height: 3 20 | Filter Width: 3 21 | Channels: 64 22 | Num Filter: 64 23 | Strides: 1 24 | - name: Conv3 25 | attributes: 26 | IFMAP Height: 16 27 | IFMAP Width: 16 28 | Filter Height: 3 29 | Filter Width: 3 30 | Channels: 64 31 | Num Filter: 128 32 | Strides: 1 33 | - name: Conv4 34 | attributes: 35 | IFMAP Height: 8 36 | IFMAP Width: 8 37 | Filter Height: 3 38 | Filter Width: 3 39 | Channels: 128 40 | Num Filter: 128 41 | Strides: 1 42 | - name: Conv5 43 | attributes: 44 | IFMAP Height: 4 45 | IFMAP Width: 4 46 | Filter Height: 3 47 | Filter Width: 3 48 | Channels: 128 49 | Num Filter: 256 50 | Strides: 1 51 | - name: Conv6 52 | attributes: 53 | IFMAP Height: 4 54 | IFMAP Width: 4 55 | Filter Height: 3 56 | Filter Width: 3 57 | Channels: 256 58 | Num Filter: 256 59 | Strides: 1 60 | - name: Conv7 61 | attributes: 62 | IFMAP Height: 4 63 | IFMAP Width: 4 64 | Filter Height: 3 65 | Filter Width: 3 66 | Channels: 256 67 | Num Filter: 256 68 | Strides: 1 69 | - name: FC1 70 | attributes: 71 | IFMAP Height: 1 72 | IFMAP Width: 1 73 | Filter Height: 1 74 | Filter Width: 1 75 | Channels: 1024 76 | Num Filter: 1024 77 | Strides: 1 78 | - name: FC2 79 | attributes: 80 | IFMAP Height: 1 81 | IFMAP Width: 1 82 | Filter Height: 1 83 | Filter Width: 1 84 | Channels: 1024 85 | Num Filter: 512 86 | Strides: 1 -------------------------------------------------------------------------------- /inference-energy-cal/workloads/workload_direct.yaml: -------------------------------------------------------------------------------- 1 | General: 2 | timestep: 10 3 | sparsity: 0.9 4 | 5 | Layers: 6 | - name: Conv1 7 | attributes: 8 | IFMAP Height: 32 9 | IFMAP Width: 32 10 | Filter Height: 3 11 | Filter Width: 3 12 | Channels: 3 13 | Num Filter: 128 14 | Strides: 1 15 | - name: Conv2 16 | attributes: 17 | IFMAP Height: 32 18 | IFMAP Width: 32 19 | Filter Height: 3 20 | Filter Width: 3 21 | Channels: 128 22 | Num Filter: 256 23 | Strides: 1 24 | - name: Conv3 25 | attributes: 26 | IFMAP Height: 16 27 | IFMAP Width: 16 28 | Filter Height: 3 29 | Filter Width: 3 30 | Channels: 256 31 | Num Filter: 512 32 | Strides: 1 33 | - name: Conv4 34 | attributes: 35 | IFMAP Height: 8 36 | IFMAP Width: 8 37 | Filter Height: 3 38 | Filter Width: 3 39 | Channels: 512 40 | Num Filter: 1024 41 | Strides: 1 42 | - name: Conv5 43 | attributes: 44 | IFMAP Height: 4 45 | IFMAP Width: 4 46 | Filter Height: 3 47 | Filter Width: 3 48 | Channels: 1024 49 | Num Filter: 512 50 | Strides: 1 51 | - name: FC1 52 | attributes: 53 | IFMAP Height: 1 54 | IFMAP Width: 1 55 | Filter Height: 1 56 | Filter Width: 1 57 | Channels: 8192 58 | Num Filter: 1024 59 | Strides: 1 60 | - name: FC2 61 | attributes: 62 | IFMAP Height: 1 63 | IFMAP Width: 1 64 | Filter Height: 1 65 | Filter Width: 1 66 | Channels: 1024 67 | Num Filter: 512 68 | Strides: 1 -------------------------------------------------------------------------------- /inference-energy-cal/workloads/workload_tdbn.yaml: -------------------------------------------------------------------------------- 1 | General: 2 | timestep: 4 3 | sparsity: 0.85 4 | 5 | Layers: 6 | - name: Conv1 7 | attributes: 8 | IFMAP Height: 32 9 | IFMAP Width: 32 10 | Filter Height: 3 11 | Filter Width: 3 12 | Channels: 3 13 | Num Filter: 128 14 | Strides: 1 15 | - name: Conv2 16 | attributes: 17 | IFMAP Height: 32 18 | IFMAP Width: 32 19 | Filter Height: 3 20 | Filter Width: 3 21 | Channels: 128 22 | Num Filter: 128 23 | Strides: 1 24 | - name: Conv3 25 | attributes: 26 | IFMAP Height: 32 27 | IFMAP Width: 32 28 | Filter Height: 3 29 | Filter Width: 3 30 | Channels: 128 31 | Num Filter: 128 32 | Strides: 1 33 | - name: Conv4 34 | attributes: 35 | IFMAP Height: 32 36 | IFMAP Width: 32 37 | Filter Height: 3 38 | Filter Width: 3 39 | Channels: 128 40 | Num Filter: 128 41 | Strides: 1 42 | - name: Conv5 43 | attributes: 44 | IFMAP Height: 32 45 | IFMAP Width: 32 46 | Filter Height: 3 47 | Filter Width: 3 48 | Channels: 128 49 | Num Filter: 128 50 | Strides: 1 51 | - name: Conv6 52 | attributes: 53 | IFMAP Height: 32 54 | IFMAP Width: 32 55 | Filter Height: 3 56 | Filter Width: 3 57 | Channels: 128 58 | Num Filter: 128 59 | Strides: 1 60 | - name: Conv7 61 | attributes: 62 | IFMAP Height: 32 63 | IFMAP Width: 32 64 | Filter Height: 3 65 | Filter Width: 3 66 | Channels: 128 67 | Num Filter: 128 68 | Strides: 1 69 | - name: Conv8 70 | attributes: 71 | IFMAP Height: 32 72 | IFMAP Width: 32 73 | Filter Height: 3 74 | Filter Width: 3 75 | Channels: 128 76 | Num Filter: 256 77 | Strides: 2 78 | - name: Conv9 79 | attributes: 80 | IFMAP Height: 16 81 | IFMAP Width: 16 82 | Filter Height: 3 83 | Filter Width: 3 84 | Channels: 256 85 | Num Filter: 256 86 | Strides: 1 87 | - name: Conv10 88 | attributes: 89 | IFMAP Height: 16 90 | IFMAP Width: 16 91 | Filter Height: 3 92 | Filter Width: 3 93 | Channels: 256 94 | Num Filter: 256 95 | Strides: 1 96 | - name: Conv11 97 | attributes: 98 | IFMAP Height: 16 99 | IFMAP Width: 16 100 | Filter Height: 3 101 | Filter Width: 3 102 | Channels: 256 103 | Num Filter: 256 104 | Strides: 1 105 | - name: Conv12 106 | attributes: 107 | IFMAP Height: 16 108 | IFMAP Width: 16 109 | Filter Height: 3 110 | Filter Width: 3 111 | Channels: 256 112 | Num Filter: 256 113 | Strides: 1 114 | - name: Conv13 115 | attributes: 116 | IFMAP Height: 16 117 | IFMAP Width: 16 118 | Filter Height: 3 119 | Filter Width: 3 120 | Channels: 256 121 | Num Filter: 256 122 | Strides: 1 123 | - name: Conv14 124 | attributes: 125 | IFMAP Height: 16 126 | IFMAP Width: 16 127 | Filter Height: 3 128 | Filter Width: 3 129 | Channels: 256 130 | Num Filter: 512 131 | Strides: 2 132 | - name: Conv15 133 | attributes: 134 | IFMAP Height: 8 135 | IFMAP Width: 8 136 | Filter Height: 3 137 | Filter Width: 3 138 | Channels: 512 139 | Num Filter: 512 140 | Strides: 1 141 | - name: Conv16 142 | attributes: 143 | IFMAP Height: 8 144 | IFMAP Width: 8 145 | Filter Height: 3 146 | Filter Width: 3 147 | Channels: 512 148 | Num Filter: 512 149 | Strides: 1 150 | - name: Conv17 151 | attributes: 152 | IFMAP Height: 8 153 | IFMAP Width: 8 154 | Filter Height: 3 155 | Filter Width: 3 156 | Channels: 512 157 | Num Filter: 512 158 | Strides: 1 159 | - name: FC1 160 | attributes: 161 | IFMAP Height: 1 162 | IFMAP Width: 1 163 | Filter Height: 1 164 | Filter Width: 1 165 | Channels: 8192 166 | Num Filter: 256 167 | Strides: 1 -------------------------------------------------------------------------------- /inference-energy-cal/workloads/workload_tssl.yaml: -------------------------------------------------------------------------------- 1 | General: 2 | timestep: 5 3 | sparsity: 0.9 4 | 5 | Layers: 6 | - name: Conv1 7 | attributes: 8 | IFMAP Height: 32 9 | IFMAP Width: 32 10 | Filter Height: 3 11 | Filter Width: 3 12 | Channels: 3 13 | Num Filter: 128 14 | Strides: 1 15 | - name: Conv2 16 | attributes: 17 | IFMAP Height: 32 18 | IFMAP Width: 32 19 | Filter Height: 3 20 | Filter Width: 3 21 | Channels: 128 22 | Num Filter: 256 23 | Strides: 1 24 | - name: Conv3 25 | attributes: 26 | IFMAP Height: 16 27 | IFMAP Width: 16 28 | Filter Height: 3 29 | Filter Width: 3 30 | Channels: 256 31 | Num Filter: 512 32 | Strides: 1 33 | - name: Conv4 34 | attributes: 35 | IFMAP Height: 8 36 | IFMAP Width: 8 37 | Filter Height: 3 38 | Filter Width: 3 39 | Channels: 512 40 | Num Filter: 1024 41 | Strides: 1 42 | - name: Conv5 43 | attributes: 44 | IFMAP Height: 4 45 | IFMAP Width: 4 46 | Filter Height: 3 47 | Filter Width: 3 48 | Channels: 1024 49 | Num Filter: 512 50 | Strides: 1 51 | - name: FC1 52 | attributes: 53 | IFMAP Height: 1 54 | IFMAP Width: 1 55 | Filter Height: 1 56 | Filter Width: 1 57 | Channels: 8192 58 | Num Filter: 1024 59 | Strides: 1 60 | - name: FC2 61 | attributes: 62 | IFMAP Height: 1 63 | IFMAP Width: 1 64 | Filter Height: 1 65 | Filter Width: 1 66 | Channels: 1024 67 | Num Filter: 512 68 | Strides: 1 -------------------------------------------------------------------------------- /training_energy_cal/energy_cal.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import energy_configs 3 | import mem_configs 4 | import yaml 5 | from get_workload import get_workload 6 | from get_arch_energy import get_arch_energy 7 | 8 | 9 | 10 | def main(): 11 | 12 | hw_config = 'sata_config.yaml' 13 | network_path = 'vgg5_cifar10.yaml' 14 | args = energy_configs.get_args() 15 | mem_args = mem_configs.get_args() 16 | 17 | T = 4 18 | fwd_b = 2 19 | 20 | sp_s = 0.9 21 | sp_du = 0.7 22 | sp_df = 0.6 23 | 24 | 25 | keyword = ['lif','mac_fwd','pgu','mac_bwd','mac_wup','dram_fwd', 'glb_fwd','spad_fwd','dram_bwd', 26 | 'glb_bwd', 'spad_bwd', 'dram_wup', 'glb_wup', 'spad_wup'] 27 | 28 | 29 | workload_d = get_workload(T,fwd_b,network_path,sp_s,sp_du,sp_df) 30 | arch_d = get_arch_energy(args, mem_args, hw_config, fwd_b) 31 | 32 | 33 | total_energy = 0 34 | total_fwd_comp = 0 35 | total_fwd_mem = 0 36 | 37 | for k in keyword: 38 | total_energy += workload_d[k]*arch_d[k] 39 | 40 | if 'lif' in k: 41 | total_fwd_comp += workload_d[k]*arch_d[k] 42 | if 'mac_fwd' in k: 43 | total_fwd_comp += workload_d[k]*arch_d[k] 44 | elif 'fwd' in k: 45 | total_fwd_mem += workload_d[k]*arch_d[k] 46 | 47 | 48 | single_ann_mac = 0.239 + 0.0389 49 | print("Bitwidth for fwd: ", fwd_b) 50 | print("Total Energy in (\MAC): ", total_energy/single_ann_mac) 51 | print("Total Fwd Energy in (\MAC): ", (total_fwd_comp + total_fwd_mem)/single_ann_mac) 52 | print("Total Fwd Comp Energy in (\MAC): ", total_fwd_comp/single_ann_mac) 53 | print("Total Fwd Mem Energy in (\MAC): ", total_fwd_mem/single_ann_mac) 54 | 55 | 56 | 57 | 58 | if __name__ == '__main__': 59 | main() 60 | -------------------------------------------------------------------------------- /training_energy_cal/energy_configs.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | 4 | def get_args(): 5 | 6 | parser = argparse.ArgumentParser("SATA_Energy_Component") 7 | 8 | parser.add_argument('--mul', type=float, default=0.239, help='dynamic energy for 16 bits multiplier') 9 | parser.add_argument('--acc', type=float, default=0.0389, help='dynamic energy for 16 bits accumulator') 10 | parser.add_argument('--add', type=float, default=0.00967, help='dynamic energy for 16 bits adder') 11 | parser.add_argument('--and', type=float, default=0.000794, help='dynamic energy for 16 bits bitwsie-and') 12 | parser.add_argument('--comp', type=float, default=0.00309, help='dynamic energy for 16 bits comparator') 13 | parser.add_argument('--mux', type=float, default=0.00172, help='dynamic energy for 16 bits mux with 2 inputs') 14 | parser.add_argument('--reg', type=float, default=0.0301, help='dynamic energy for 16 bits register') 15 | parser.add_argument('--sft', type=float, default=0.00605, help='dynamic energy for 16 bits sfter, 3 stage') 16 | 17 | # parser.add_argument('--kw', type=int, default=8, help='bitwidth for weight') 18 | # parser.add_argument('--ku', type=int, default=8, help='bitwidth for membrane potential') 19 | # parser.add_argument('--kdu', type=int, default=8, help='bitwidth for gradient of membrane potential') 20 | # parser.add_argument('--kds', type=int, default=8, help='bitwidth for gradient of spike') 21 | # parser.add_argument('--kdw', type=int, default=8, help='bitwidth for gradient of weight') 22 | # parser.add_argument('--kh', type=int, default=8, help='bitwidth for error') 23 | 24 | 25 | args = parser.parse_args() 26 | print(args) 27 | 28 | return args -------------------------------------------------------------------------------- /training_energy_cal/get_arch_energy.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import energy_configs 3 | import mem_configs 4 | import yaml 5 | 6 | 7 | # def backward(energy_dic): 8 | 9 | 10 | def get_arch_energy(args, mem_args, hw_config, fwd_b): 11 | 12 | arch_e_dic = {} 13 | 14 | with open(hw_config, 'r') as file: 15 | 16 | documents = yaml.full_load(file) 17 | energy_dic = (vars(args)) 18 | 19 | single_lif = 0 20 | single_mac_fwd = 0 21 | single_pgu = 0 22 | single_mac_bwd = 0 23 | single_mac_wup = 0 24 | 25 | for item, doc in documents.items(): 26 | if item == "fwd": 27 | for item2, doc2 in doc.items(): 28 | if item2 == "lif": 29 | for k in doc2: 30 | single_lif += (energy_dic[k] * (fwd_b/16)) * doc2[k] 31 | elif item2 == "mac": 32 | for k in doc2: 33 | single_mac_fwd += (energy_dic[k] * (fwd_b/16)) * doc2[k] 34 | 35 | elif item == "bwd": 36 | for item2, doc2 in doc.items(): 37 | if item2 == "pgu": 38 | for k in doc2: 39 | single_pgu += energy_dic[k] * doc2[k] 40 | elif item2 == "mac": 41 | for k in doc2: 42 | single_mac_bwd += energy_dic[k] * doc2[k] 43 | 44 | elif item == "wup": 45 | for item2, doc2 in doc.items(): 46 | if item2 == "mac": 47 | for k in doc2: 48 | single_mac_wup += energy_dic[k] * doc2[k] 49 | 50 | 51 | arch_e_dic['lif'] = single_lif 52 | arch_e_dic['mac_fwd'] = single_mac_fwd 53 | arch_e_dic['pgu'] = single_pgu 54 | arch_e_dic['mac_bwd'] = single_mac_bwd 55 | arch_e_dic['mac_wup'] = single_mac_wup 56 | 57 | arch_e_dic['dram_fwd']=mem_args.dram * (fwd_b/16) 58 | arch_e_dic['glb_fwd']=mem_args.sram * (fwd_b/16) 59 | arch_e_dic['spad_fwd']=mem_args.spad * (fwd_b/16) 60 | arch_e_dic['dram_bwd']=mem_args.dram 61 | arch_e_dic['glb_bwd']= mem_args.sram 62 | arch_e_dic['spad_bwd']=mem_args.spad 63 | arch_e_dic['dram_wup']=mem_args.dram 64 | arch_e_dic['glb_wup']=mem_args.sram 65 | arch_e_dic['spad_wup']=mem_args.spad 66 | 67 | 68 | return arch_e_dic 69 | -------------------------------------------------------------------------------- /training_energy_cal/get_workload.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import energy_configs 3 | import yaml 4 | 5 | 6 | 7 | def get_workload(T,b,network_path,sp_s,sp_du,sp_df): 8 | 9 | # network_path = 'vgg5_cifar10.yaml' 10 | workload_dic = {} 11 | 12 | with open(network_path,'r') as file: 13 | documents = yaml.full_load(file) 14 | 15 | lif_n = 0 16 | mac_fwd_n = 0 17 | pgu_n = 0 18 | mac_bwd_n = 0 19 | mac_wup_n = 0 20 | dram_fwd_n =0 21 | glb_fwd_n =0 22 | spad_fwd_n =0 23 | dram_bwd_n =0 24 | glb_bwd_n =0 25 | spad_bwd_n =0 26 | dram_wup_n =0 27 | glb_wup_n =0 28 | spad_wup_n =0 29 | 30 | 31 | 32 | for item, doc in documents.items(): 33 | if doc['type'] == '2dconv': 34 | lif_n += doc['K'] * doc['E_h'] * doc['E_w'] * T 35 | mac_fwd_n += (1-sp_s) * doc['C']* doc['R_h'] * doc['R_w'] * doc['K'] * doc['E_h'] * doc['E_w'] * T 36 | pgu_n += doc['K'] * doc['E_h'] * doc['E_w'] * T 37 | mac_bwd_n += (1-sp_du) * doc['C']* doc['R_h'] * doc['R_w'] * doc['K'] * doc['H_h'] * doc['H_w'] * T 38 | mac_wup_n += (1-sp_s) * doc['C']* doc['R_h'] * doc['R_w'] * doc['K'] * doc['E_h'] * doc['E_w'] * T 39 | 40 | dram_fwd_n += doc['K'] * doc['C']* doc['R_h'] * doc['R_w'] + (doc['K'] * doc['E_h'] * doc['E_w'] + (1/b) * doc['C'] * doc['H_h'] * doc['H_w']) * T 41 | glb_fwd_n += 2 * (doc['K'] * doc['C']* doc['R_h'] * doc['R_w'] + (doc['K'] * doc['E_h'] * doc['E_w'] + (1/b) * doc['C'] * doc['H_h'] * doc['H_w']) * T) 42 | spad_fwd_n += 2 * (doc['K'] * doc['C']* doc['R_h'] * doc['R_w'] + T * (1/b) * doc['C'] * doc['H_h'] * doc['H_w']) 43 | dram_bwd_n += T * (doc['K'] * doc['E_h'] * doc['E_w'] + (1/b) * doc['C'] * doc['H_h'] * doc['H_w']) 44 | glb_bwd_n += 7 * T * (doc['K'] * doc['E_h'] * doc['E_w']) + (2*T*(1/b)*doc['C'] * doc['H_h'] * doc['H_w'] + doc['K'] * doc['C']* doc['R_h'] * doc['R_w']) 45 | spad_bwd_n += doc['K'] * doc['C']* doc['R_h'] * doc['R_w'] + T * doc['K'] * doc['E_h'] * doc['E_w'] 46 | dram_wup_n += 2 * (doc['K'] * doc['C']* doc['R_h'] * doc['R_w']) 47 | glb_wup_n += 2* (1+T) * doc['K'] * doc['C']* doc['R_h'] * doc['R_w'] + T * ((1/b) * doc['C'] * doc['H_h'] * doc['H_w']+doc['K'] * doc['E_h'] * doc['E_w']) 48 | spad_wup_n += 2* (1+T) * doc['K'] * doc['C']* doc['R_h'] * doc['R_w'] + T * ((1/b) * doc['C'] * doc['H_h'] * doc['H_w']+doc['K'] * doc['E_h'] * doc['E_w']) + 2*T*doc['K'] * doc['C']* doc['R_h'] * doc['R_w'] 49 | 50 | elif doc['type'] == 'linear': 51 | lif_n += doc['out'] * T 52 | mac_fwd_n = doc['out'] * doc['in'] * T 53 | pgu_n += doc['out'] * T 54 | mac_bwd_n = doc['out'] * doc['in'] * T 55 | mac_wup_n = doc['out'] * doc['in'] * T 56 | 57 | dram_fwd_n += doc['out'] * doc['in'] + doc['out']*T + doc['in']*T*(1/b) 58 | glb_fwd_n += 2*(doc['out'] * doc['in'] + doc['out']*T + doc['in']*T*(1/b)) 59 | spad_fwd_n += 2*(doc['out'] * doc['in'] + T * (1/b) * doc['in']) 60 | dram_bwd_n += T*(doc['out'] + (1/b)*doc['in']) 61 | glb_bwd_n += 7*T*(doc['out']) + (2*T*(1/b)*doc['in'] + doc['in']*doc['out']) 62 | spad_bwd_n += (doc['out'] * doc['in'] + T*doc['out']) 63 | dram_wup_n += 2 * doc['out'] * doc['in'] 64 | glb_wup_n += 2 * (1+T) * doc['out'] * doc['in'] + T * ((1/b) * doc['in'] + doc['out']) 65 | spad_wup_n += 2 * (1+T) * doc['out'] * doc['in'] + T * ((1/b) * doc['in'] + doc['out']) + 2*T*doc['out'] * doc['in'] 66 | 67 | 68 | workload_dic['lif'] = lif_n 69 | workload_dic['mac_fwd']=mac_fwd_n 70 | workload_dic['pgu']=pgu_n 71 | workload_dic['mac_bwd']=mac_bwd_n 72 | workload_dic['mac_wup']=mac_wup_n 73 | workload_dic['dram_fwd']=dram_fwd_n 74 | workload_dic['glb_fwd']=glb_fwd_n 75 | workload_dic['spad_fwd']=spad_fwd_n 76 | workload_dic['dram_bwd']=dram_bwd_n 77 | workload_dic['glb_bwd']= glb_bwd_n 78 | workload_dic['spad_bwd']=spad_bwd_n 79 | workload_dic['dram_wup']=dram_wup_n 80 | workload_dic['glb_wup']=glb_wup_n 81 | workload_dic['spad_wup']=spad_wup_n 82 | 83 | 84 | return workload_dic 85 | 86 | 87 | -------------------------------------------------------------------------------- /training_energy_cal/get_workload_new.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import energy_configs 3 | import yaml 4 | 5 | 6 | class Workload_Calculator: 7 | """The Calculator to Get Total Workload""" 8 | 9 | def __init__(self, mapping='TF', network_path=None, sp_dic=None, T=8): 10 | 11 | 12 | self.timstep = T 13 | self.sp_du = sp_dic['du'] 14 | self.sp_s = sp_dic['s'] 15 | self.sp_df = sp_dic['df'] 16 | 17 | L = 0 18 | with open(network_path,'r') as file: 19 | documents = yaml.full_load(file) 20 | for item, doc in documents.items(): 21 | L += 1 22 | self.layer = L 23 | 24 | def cal(self): 25 | 26 | print(self.layer) 27 | 28 | return None 29 | 30 | def get_workload(T,b,network_path,sp_s,sp_du,sp_df): 31 | 32 | # network_path = 'vgg5_cifar10.yaml' 33 | workload_dic = {} 34 | 35 | with open(network_path,'r') as file: 36 | documents = yaml.full_load(file) 37 | 38 | T = 8 39 | b = 8 40 | lif_n = 0 41 | mac_fwd_n = 0 42 | pgu_n = 0 43 | mac_bwd_n = 0 44 | mac_wup_n = 0 45 | dram_fwd_n =0 46 | glb_fwd_n =0 47 | spad_fwd_n =0 48 | dram_bwd_n =0 49 | glb_bwd_n =0 50 | spad_bwd_n =0 51 | dram_wup_n =0 52 | glb_wup_n =0 53 | spad_wup_n =0 54 | 55 | 56 | 57 | for item, doc in documents.items(): 58 | if doc['type'] == '2dconv': 59 | lif_n += doc['K'] * doc['E_h'] * doc['E_w'] * T 60 | mac_fwd_n += (1-sp_s) * doc['C']* doc['R_h'] * doc['R_w'] * doc['K'] * doc['E_h'] * doc['E_w'] * T 61 | pgu_n += doc['K'] * doc['E_h'] * doc['E_w'] * T 62 | mac_bwd_n += (1-sp_du) * doc['C']* doc['R_h'] * doc['R_w'] * doc['K'] * doc['H_h'] * doc['H_w'] * T 63 | mac_wup_n += (1-sp_s) * doc['C']* doc['R_h'] * doc['R_w'] * doc['K'] * doc['E_h'] * doc['E_w'] * T 64 | 65 | dram_fwd_n += doc['K'] * doc['C']* doc['R_h'] * doc['R_w'] + (doc['K'] * doc['E_h'] * doc['E_w'] + (1/b) * doc['C'] * doc['H_h'] * doc['H_w']) * T 66 | glb_fwd_n += 2 * (doc['K'] * doc['C']* doc['R_h'] * doc['R_w'] + (doc['K'] * doc['E_h'] * doc['E_w'] + (1/b) * doc['C'] * doc['H_h'] * doc['H_w']) * T) 67 | spad_fwd_n += 2 * (doc['K'] * doc['C']* doc['R_h'] * doc['R_w'] + T * (1/b) * doc['C'] * doc['H_h'] * doc['H_w']) 68 | dram_bwd_n += T * (doc['K'] * doc['E_h'] * doc['E_w'] + (1/b) * doc['C'] * doc['H_h'] * doc['H_w']) 69 | glb_bwd_n += 7 * T * (doc['K'] * doc['E_h'] * doc['E_w']) + (2*T*(1/b)*doc['C'] * doc['H_h'] * doc['H_w'] + doc['K'] * doc['C']* doc['R_h'] * doc['R_w']) 70 | spad_bwd_n += doc['K'] * doc['C']* doc['R_h'] * doc['R_w'] + T * doc['K'] * doc['E_h'] * doc['E_w'] 71 | dram_wup_n += 2 * (doc['K'] * doc['C']* doc['R_h'] * doc['R_w']) 72 | glb_wup_n += 2* (1+T) * doc['K'] * doc['C']* doc['R_h'] * doc['R_w'] + T * ((1/b) * doc['C'] * doc['H_h'] * doc['H_w']+doc['K'] * doc['E_h'] * doc['E_w']) 73 | spad_wup_n += 2* (1+T) * doc['K'] * doc['C']* doc['R_h'] * doc['R_w'] + T * ((1/b) * doc['C'] * doc['H_h'] * doc['H_w']+doc['K'] * doc['E_h'] * doc['E_w']) + 2*T*doc['K'] * doc['C']* doc['R_h'] * doc['R_w'] 74 | 75 | elif doc['type'] == 'linear': 76 | lif_n += doc['out'] * T 77 | mac_fwd_n = doc['out'] * doc['in'] * T 78 | pgu_n += doc['out'] * T 79 | mac_bwd_n = doc['out'] * doc['in'] * T 80 | mac_wup_n = doc['out'] * doc['in'] * T 81 | 82 | dram_fwd_n += doc['out'] * doc['in'] + doc['out']*T + doc['in']*T*(1/b) 83 | glb_fwd_n += 2*(doc['out'] * doc['in'] + doc['out']*T + doc['in']*T*(1/b)) 84 | spad_fwd_n += 2*(doc['out'] * doc['in'] + T * (1/b) * doc['in']) 85 | dram_bwd_n += T*(doc['out'] + (1/b)*doc['in']) 86 | glb_bwd_n += 7*T*(doc['out']) + (2*T*(1/b)*doc['in'] + doc['in']*doc['out']) 87 | spad_bwd_n += (doc['out'] * doc['in'] + T*doc['out']) 88 | dram_wup_n += 2 * doc['out'] * doc['in'] 89 | glb_wup_n += 2 * (1+T) * doc['out'] * doc['in'] + T * ((1/b) * doc['in'] + doc['out']) 90 | spad_wup_n += 2 * (1+T) * doc['out'] * doc['in'] + T * ((1/b) * doc['in'] + doc['out']) + 2*T*doc['out'] * doc['in'] 91 | 92 | 93 | workload_dic['lif'] = lif_n 94 | workload_dic['mac_fwd']=mac_fwd_n 95 | workload_dic['pgu']=pgu_n 96 | workload_dic['mac_bwd']=mac_bwd_n 97 | workload_dic['mac_wup']=mac_wup_n 98 | workload_dic['dram_fwd']=dram_fwd_n 99 | workload_dic['glb_fwd']=glb_fwd_n 100 | workload_dic['spad_fwd']=spad_fwd_n 101 | workload_dic['dram_bwd']=dram_bwd_n 102 | workload_dic['glb_bwd']= glb_bwd_n 103 | workload_dic['spad_bwd']=spad_bwd_n 104 | workload_dic['dram_wup']=dram_wup_n 105 | workload_dic['glb_wup']=glb_wup_n 106 | workload_dic['spad_wup']=spad_wup_n 107 | 108 | 109 | return workload_dic 110 | 111 | 112 | --------------------------------------------------------------------------------