├── .gitignore ├── LICENSE ├── README.md ├── build_pytimeloop.py ├── gamma.jpg ├── patches └── update_interface.patch ├── run_gamma_timeloop.sh ├── run_gamma_timeloop_multiObjective.sh └── src ├── README.md ├── gamma_timeloop_env.py ├── in_config ├── arch.yaml ├── problem.yaml └── sparse.yaml ├── main.py ├── parse_timeloop_output.py ├── report ├── Gamma-Timeloop.csv ├── Gamma-Timeloop.txt ├── arch.yaml ├── map.yaml ├── problem.yaml └── sparse.yaml ├── timeloop_env.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # others 132 | *.plt 133 | .idea/ 134 | .idea 135 | .DS_Store 136 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 MAESTRO Project 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GAMMA-TimeLoop # 2 | This is the implementation of the mapper [GAMMA](https://dl.acm.org/doi/10.1145/3400302.3415639) using Timeloop as cost model. 3 | 4 | GAMMA-Timeloop searches through the design space modeled by Timeloop and proposes an optimized mapping. It was introduced in this IISWC 2022 [paper](https://arxiv.org/pdf/2210.03731.pdf) and [talk](https://synergy.ece.gatech.edu/files/2022/11/IISWC2022Demysitifying_MSE_talk-final.pdf). 5 | 6 | ![GAMMA Framework](./gamma.jpg) 7 | 8 | ### Sister Repo: Gamma-Maestro ### 9 | We also have GAMMA supporting MAESTRO as cost model. The sister repo can be found here [Gamma-Maestro](https://github.com/maestro-project/gamma). It searches through the design space of MAESTRO and proposes an optimized mapping. 10 | 11 | ---- 12 | ## Install Dependency ### 13 | ### Install Timeloop ### 14 | Timeloop documentation is hosted at [https://timeloop.csail.mit.edu/timeloop](https://timeloop.csail.mit.edu/timeloop). The guides there cover detailed installation steps. 15 | ### Install Timeloop-python ### 16 | 17 | ``` 18 | python build_pytimeloop.py 19 | ``` 20 | For more installation detail, please visit [https://timeloop.csail.mit.edu/timeloop](https://timeloop.csail.mit.edu/timeloop). 21 | 22 | ------------------ 23 | ## Take a Trial Run ## 24 | Run GAMMA-Timeloop 25 | ``` 26 | ./run_gamma_timeloop.sh 27 | ``` 28 | 29 | Run GAMMA-Timeloop with multi-objective 30 | ``` 31 | ./run_gamma_timeloop_multiObjective.sh 32 | ``` 33 | 34 | For more detail, please look at [``./src``](./src) 35 | 36 | -------------- 37 | 38 | ## Citation ## 39 | ``` 40 | @inproceedings{gamma_timeloop, 41 | author = {Kao, Sheng-Chun and Parashar, Angshuman and Tsai Po-An and Krishna, Tushar}, 42 | title = {Demystifying Map Space Exploration for NPUs}, 43 | booktitle = {IISWC}, 44 | year = {2022} 45 | } 46 | 47 | ``` 48 | 49 | 50 | ``` 51 | @inproceedings{gamma, 52 | author = {Kao, Sheng-Chun and Krishna, Tushar}, 53 | title = {GAMMA: Automating the HW Mapping of DNN Models on Accelerators via Genetic Algorithm}, 54 | booktitle = {ICCAD}, 55 | year = {2020} 56 | } 57 | 58 | ``` 59 | -------------------------------------------------------------------------------- /build_pytimeloop.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | commit_id = 'b5885615eeddfc249758d003a99c6854884a94b9' 3 | pytimeloop_dir = "../pytimeloop" 4 | working_path = os.getcwd() 5 | dst_path = os.path.join(working_path, pytimeloop_dir) 6 | try: 7 | os.system("git clone https://github.com/Accelergy-Project/timeloop-python.git {}".format(dst_path)) 8 | os.chdir(f'{dst_path}') 9 | os.system(f"git checkout {commit_id}") 10 | os.system(f'git am {working_path}/patches/update_interface.patch') 11 | os.system('git submodule update --init') 12 | os.system('rm -rf build') 13 | os.system('pip install -e .') 14 | except: 15 | "Something wring when installing pytimeloop, please check pytimeloop repository for detailed installation step" 16 | 17 | 18 | -------------------------------------------------------------------------------- /gamma.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maestro-project/gamma-timeloop/48a2cf905c04fa2a1b59328fc981977faabd53e5/gamma.jpg -------------------------------------------------------------------------------- /patches/update_interface.patch: -------------------------------------------------------------------------------- 1 | From b5eb9c6d9817f5dd69bca9f981ffd061e44b0e33 Mon Sep 17 00:00:00 2001 2 | From: felix 3 | Date: Tue, 7 Jun 2022 15:37:40 -0400 4 | Subject: [PATCH] Interface update on Model.py 5 | 6 | --- 7 | pytimeloop/app/model.py | 59 ++++++++++++++++++++++--------------- 8 | pytimeloop/model.py | 65 ++++++++++++++++++++++++++++++++++++++--- 9 | 2 files changed, 96 insertions(+), 28 deletions(-) 10 | 11 | diff --git a/pytimeloop/app/model.py b/pytimeloop/app/model.py 12 | index 01b5b11..e580023 100644 13 | --- a/pytimeloop/app/model.py 14 | +++ b/pytimeloop/app/model.py 15 | @@ -4,49 +4,52 @@ from pytimeloop.engine import Accelerator 16 | from pytimeloop.model import ArchSpecs, SparseOptimizationInfo 17 | from pytimeloop.mapping import ArchConstraints, Mapping 18 | from pytimeloop.problem import Workload 19 | - 20 | +import os 21 | import logging 22 | 23 | 24 | + 25 | + 26 | + 27 | + 28 | class Model: 29 | - def __init__(self, cfg: Config, out_dir: str, auto_bypass_on_failure=False, 30 | - out_prefix='', log_level=logging.INFO): 31 | + def __init__(self, cfg: Config, out_dir: str='.', auto_bypass_on_failure=False, 32 | + out_prefix='', log_level=logging.WARNING, dump_file=True ): 33 | # Setup logger 34 | self.log_level = log_level 35 | self.model_logger = logging.getLogger('pytimeloop.app.Model') 36 | self.model_logger.setLevel(log_level) 37 | 38 | - # timeloop-model configurations 39 | - self.auto_bypass_on_failure = auto_bypass_on_failure 40 | - self.out_prefix = out_prefix 41 | semi_qualified_prefix = 'timeloop-model' 42 | - self.out_prefix = out_dir + '/' + semi_qualified_prefix 43 | - 44 | + semi_qualified_prefix = semi_qualified_prefix + out_prefix 45 | + out_prefix = os.path.join(out_dir, semi_qualified_prefix) 46 | # Architecture configuration 47 | self.arch_specs = ArchSpecs(cfg['architecture']) 48 | - self.arch_specs.generate_tables( 49 | - cfg, semi_qualified_prefix, out_dir, self.out_prefix, log_level) 50 | + if dump_file: 51 | + self.arch_specs.generate_tables( 52 | + cfg, semi_qualified_prefix, out_dir, out_prefix, self.log_level) 53 | + 54 | 55 | # Problem configuration 56 | self.workload = Workload(cfg['problem']) 57 | self.model_logger.info('Problem configuration complete.') 58 | 59 | - self.arch_props = ArchProperties(self.arch_specs) 60 | + # self.arch_props = ArchProperties(self.arch_specs) 61 | 62 | # Architecture constraints 63 | - self.constraints = ArchConstraints( 64 | - self.arch_props, self.workload, cfg['architecture_constraints']) 65 | - self.model_logger.info('Architecture configuration complete.') 66 | + # self.constraints = ArchConstraints( 67 | + # self.arch_props, self.workload, cfg['architecture_constraints']) 68 | + # self.model_logger.info('Architecture configuration complete.') 69 | 70 | # Mapping configuration 71 | self.mapping = Mapping(cfg['mapping'], self.arch_specs, self.workload) 72 | self.model_logger.info('Mapping construction complete.') 73 | 74 | # Validate mapping against architecture constraints 75 | - if not self.constraints.satisfied_by(self.mapping): 76 | - self.model_logger.error( 77 | - 'Mapping violates architecture constraints.') 78 | - raise ValueError('Mapping violates architecture constraints.') 79 | + # if not self.constraints.satisfied_by(self.mapping): 80 | + # self.model_logger.error( 81 | + # 'Mapping violates architecture constraints.') 82 | + # raise ValueError('Mapping violates architecture constraints.') 83 | 84 | # Sparse optimizations 85 | if 'sparse_optimizations' in cfg: 86 | @@ -56,11 +59,19 @@ class Model: 87 | self.sparse_optimizations = SparseOptimizationInfo( 88 | sparse_opt_cfg, self.arch_specs) 89 | 90 | + 91 | + 92 | def run(self): 93 | - engine = Accelerator(self.arch_specs) 94 | + try: 95 | + engine = Accelerator(self.arch_specs) 96 | + 97 | + eval_stat = engine.evaluate(self.mapping, 98 | + self.workload, 99 | + self.sparse_optimizations, 100 | + log_level=self.log_level) 101 | + return eval_stat 102 | + except: 103 | + return None 104 | + 105 | + 106 | 107 | - eval_stat = engine.evaluate(self.mapping, 108 | - self.workload, 109 | - self.sparse_optimizations, 110 | - log_level=self.log_level) 111 | - return eval_stat 112 | diff --git a/pytimeloop/model.py b/pytimeloop/model.py 113 | index f4cfa6a..f8e759b 100644 114 | --- a/pytimeloop/model.py 115 | +++ b/pytimeloop/model.py 116 | @@ -21,17 +21,17 @@ class ArchSpecs(NativeArchSpecs): 117 | root_node = native_root_cfg.get_root() 118 | if 'ERT' in root_node: 119 | logger.info('Found Accelergy ERT, replacing internal energy model') 120 | - self.parse_accelergy_ert(root_node['ert']) 121 | + self.parse_accelergy_ert(root_node['ERT']) 122 | if 'ART' in root_node: 123 | logger.info( 124 | 'Found Accelergy ART, replacing internal area model') 125 | - self.parse_accelergy_art(root_node['art']) 126 | + self.parse_accelergy_art(root_node['ART']) 127 | else: 128 | _, native_arch_cfg = config['architecture'].get_native() 129 | if 'subtree' in native_arch_cfg or 'local' in native_arch_cfg: 130 | - with open('tmp-accelergy.yaml', 'w+') as f: 131 | + with open(f'{semi_qualified_prefix}-tmp-accelergy.yaml', 'w+') as f: 132 | f.write(config.dump_yaml()) 133 | - invoke_accelergy(['tmp-accelergy.yaml'], 134 | + invoke_accelergy([f'{semi_qualified_prefix}-tmp-accelergy.yaml'], 135 | semi_qualified_prefix, out_dir) 136 | ert_path = out_prefix + '.ERT.yaml' 137 | # Have to store config in a variable, so it doesn't get 138 | @@ -54,3 +54,60 @@ class SparseOptimizationInfo(NativeSparseOptimizationInfo): 139 | def __init__(self, sparse_config: Config, arch_specs: ArchSpecs): 140 | _, native_sparse_config_node = sparse_config.get_native() 141 | super().__init__(native_sparse_config_node, arch_specs) 142 | +from bindings import (NativeArchSpecs, NativeConfig, 143 | + NativeSparseOptimizationInfo) 144 | +from .accelergy_interface import invoke_accelergy 145 | +from .config import Config 146 | + 147 | +import logging 148 | + 149 | + 150 | +class ArchSpecs(NativeArchSpecs): 151 | + def __init__(self, config: Config): 152 | + _, native_arch_node = config.get_native() 153 | + super().__init__(native_arch_node) 154 | + 155 | + def generate_tables(self, config: Config, semi_qualified_prefix, out_dir, 156 | + out_prefix, log_level=logging.INFO): 157 | + # Setup logger 158 | + logger = logging.getLogger(__name__ + '.' + __class__.__name__) 159 | + logger.setLevel(log_level) 160 | + 161 | + native_root_cfg, native_cfg = config.get_native() 162 | + root_node = native_root_cfg.get_root() 163 | + if 'ERT' in root_node: 164 | + logger.info('Found Accelergy ERT, replacing internal energy model') 165 | + self.parse_accelergy_ert(root_node['ERT']) 166 | + if 'ART' in root_node: 167 | + logger.info( 168 | + 'Found Accelergy ART, replacing internal area model') 169 | + self.parse_accelergy_art(root_node['ART']) 170 | + else: 171 | + _, native_arch_cfg = config['architecture'].get_native() 172 | + if 'subtree' in native_arch_cfg or 'local' in native_arch_cfg: 173 | + with open(f'{semi_qualified_prefix}-tmp-accelergy.yaml', 'w+') as f: 174 | + f.write(config.dump_yaml()) 175 | + invoke_accelergy([f'{semi_qualified_prefix}-tmp-accelergy.yaml'], 176 | + semi_qualified_prefix, out_dir) 177 | + ert_path = out_prefix + '.ERT.yaml' 178 | + # Have to store config in a variable, so it doesn't get 179 | + # garbage collected. CompoundConfigNode referes to it. 180 | + ert_cfg = NativeConfig(ert_path) 181 | + ert = ert_cfg.get_root().lookup('ERT') 182 | + logger.info('Generated Accelergy ERT to replace internal ' 183 | + 'energy model') 184 | + self.parse_accelergy_ert(ert) 185 | + 186 | + art_path = out_prefix + '.ART.yaml' 187 | + art_cfg = NativeConfig(art_path) 188 | + art = art_cfg.get_root()['ART'] 189 | + logger.info('Generated Accelergy ART to replace internal ' 190 | + 'energy model') 191 | + self.parse_accelergy_art(art) 192 | + 193 | + 194 | +class SparseOptimizationInfo(NativeSparseOptimizationInfo): 195 | + def __init__(self, sparse_config: Config, arch_specs: ArchSpecs): 196 | + _, native_sparse_config_node = sparse_config.get_native() 197 | + super().__init__(native_sparse_config_node, arch_specs) 198 | + 199 | -- 200 | 2.17.1 201 | 202 | -------------------------------------------------------------------------------- /run_gamma_timeloop.sh: -------------------------------------------------------------------------------- 1 | cd src 2 | python main.py --fitness1 energy 3 | cd ../ -------------------------------------------------------------------------------- /run_gamma_timeloop_multiObjective.sh: -------------------------------------------------------------------------------- 1 | cd src 2 | python main.py --fitness1 edp --fitness2 latency --fitness3 energy 3 | cd ../ -------------------------------------------------------------------------------- /src/README.md: -------------------------------------------------------------------------------- 1 | # GAMMA-TimeLoop # 2 | [GAMMA: Automating the HW Mapping of DNN Models on 3 | Accelerators via Genetic Algorithm](https://dl.acm.org/doi/10.1145/3400302.3415639) 4 | 5 | ### Parameter 6 | We support naive multi-objective optimization, where the user can specify up to three different objectives. If the user want single-objective optimization, simply don't specify fitness2 and fitness3. 7 | * fitness1: The fitness objective 8 | * fitness2: (Optional) The second objective 9 | * fitness3: (Optional) The third objective 10 | * config_path: Configuration path, should include arch.yaml, problem.yaml, (and sparse.yaml if sparsity is considered) 11 | * use_sparse: Enable it to explore sparse accelerator space, otherwise explore dense accelerator space 12 | * explore_bypass: Enable it to explore bypass buffer option 13 | * epochs: Number of generations 14 | * num_pops: Number of populations 15 | * save_chkpt: To save the trace of improvement over epoch or not. Specify if the user want to save the trace. 16 | * report_dir: The report directory for the generated map.yaml and the trace-file 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /src/gamma_timeloop_env.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import yaml 3 | import os, sys 4 | import copy 5 | from functools import reduce 6 | import random 7 | from timeloop_env import TimeloopEnv 8 | from multiprocessing.pool import Pool 9 | from concurrent.futures import ProcessPoolExecutor 10 | from multiprocessing import cpu_count 11 | import shutil 12 | from functools import cmp_to_key, partial 13 | from collections import defaultdict, OrderedDict 14 | from utils import timing, is_pareto 15 | import math 16 | import re 17 | import glob 18 | import pickle 19 | from datetime import datetime 20 | import pandas as pd 21 | 22 | class GammaTimeloopEnv(object): 23 | def __init__(self, in_config_dir='./in_config', fitness_obj=['latency'], report_dir='./report', 24 | use_pool=True, use_IO=True, log_level=0, debug=False, init_random_tile=False, to_par_RS=False, 25 | save_chkpt=False, use_sparse=True, density=None, explore_bypass=False, emulate_random=False): 26 | self.debug = bool(debug) 27 | self.fitness_obj = fitness_obj 28 | self.dim_note = ['N', 'K', 'C', 'Y', 'X', 'R', 'S'] 29 | self.parallizable_dim_note = ['N', 'K', 'C', 'Y', 'X'] if bool(to_par_RS ) is False else self.dim_note 30 | self.parallizable_dim_note_set = set(self.parallizable_dim_note) 31 | self.len_dimension = len(self.dim_note) 32 | self.timeloop_configfile_path = f'/tmp/out_config_{datetime.now().strftime("%H:%M:%S")}' 33 | # self.timeloop_configfile_path = f'./out_config' 34 | self.report_dir = report_dir 35 | self.use_sparse = use_sparse 36 | self.explore_bypass = explore_bypass 37 | self.density = self.get_default_density() if density is None else density 38 | self.timeloop_env = TimeloopEnv(config_path=self.timeloop_configfile_path, in_config_dir=in_config_dir, debug=self.debug, 39 | use_sparse=self.use_sparse, density=self.density) 40 | self.num_buf_levels = self.timeloop_env.get_num_buffer_levels() 41 | print(f'Number of buffer levels: {self.num_buf_levels}') 42 | self.buf_spmap_cstr = self.timeloop_env.get_buffer_spmap_cstr() 43 | self.buffers_with_spmap = list(self.timeloop_env.get_buffers_with_spmap()) 44 | self.use_pool = bool(use_pool) 45 | self.use_IO = bool(use_IO) 46 | self.log_level = log_level 47 | self.init_random_tile = bool(init_random_tile) 48 | self.idealperf = {} 49 | self.save_chkpt = save_chkpt 50 | self.fitness_record = [] 51 | self.all_fitness_record = [] 52 | self.sol_record = [] 53 | self.all_sol_record = [] 54 | self.emulate_random = emulate_random 55 | 56 | 57 | def get_default_density(self): 58 | density = {'Weights': 1, 59 | 'Inputs': 1, 60 | 'Outputs': 1} 61 | return density 62 | 63 | def set_dimension(self, dimension=None): 64 | if dimension is None: 65 | self.dimension, self.dimension_dict = self.timeloop_env.get_problem_info() 66 | else: 67 | self.dimension = dimension 68 | self.dimension_dict = self.get_dimension_dict(dimension) 69 | self.dimension_factor = self.get_dimension_factors(self.dimension_dict) 70 | self.dimension_prime = {key: self.get_prime_factors(self.dimension_dict[key]) for key in self.dim_note} 71 | self.idealperf['edp'], self.idealperf['latency'], self.idealperf['energy'] = self.timeloop_env.get_ideal_perf(self.dimension) 72 | self.idealperf['utilization'] = 1 73 | self.fitness_record = [] 74 | self.all_fitness_record = [] 75 | self.sol_record = [] 76 | self.all_sol_record = [] 77 | 78 | def get_dimension_dict(self, dim_value): 79 | return {note: value for note, value in zip(self.dim_note, dim_value)} 80 | 81 | def get_prime_factors(self, n): 82 | primes = defaultdict(int) 83 | while n % 2 == 0: 84 | primes['2'] += 1 85 | n = n // 2 86 | for i in range(3,int(math.sqrt(n))+1,2): 87 | while n % i== 0: 88 | primes[f'{i}'] += 1 89 | n = n // i 90 | if n > 2: 91 | primes[f'{n}'] += 1 92 | return primes 93 | 94 | def get_factors(self, n): 95 | return list(reduce(list.__add__, 96 | ([i, n // i] for i in range(1, int(n ** 0.5) + 1) if n % i == 0))) 97 | 98 | def get_dimension_factors(self, dimension_dict): 99 | dimension_factors = dict() 100 | for key, value in dimension_dict.items(): 101 | factors = self.get_factors(value) 102 | dimension_factors[key] = factors 103 | return dimension_factors 104 | 105 | def update_tile_for_buf_cstr(self, indv, max_trial=None): 106 | is_valid_tile = self.timeloop_env.check_tile_fit_buffer(indv) 107 | trial_id = 0 108 | if max_trial is None: 109 | max_trial = float('Inf') 110 | while(is_valid_tile is False and trial_id < max_trial): 111 | indv = self.mutate_tiles(indv, alpha=1, beta=0) 112 | is_valid_tile = self.timeloop_env.check_tile_fit_buffer(indv) 113 | trial_id += 1 114 | return indv 115 | 116 | def mutate_tiles(self, indv, alpha=1.0, beta=0.0, num_mu_loc=2, gen=1, max_trial=100): 117 | if random.random() < alpha: 118 | pick_dim = np.random.choice(['N','K', 'C', 'Y', 'X', 'R', 'S'], p = [0.1, 0.2, 0.2, 0.2, 0.2, 0.05, 0.05]) 119 | if random.random()< beta: 120 | pick_level1, pick_level2 = np.random.choice(np.arange(1, self.num_buf_levels+1), 2, replace=False) 121 | for _ in range(num_mu_loc): 122 | from_genome = indv[f'l{pick_level1}']['tile_size'] 123 | to_genome = indv[f'l{pick_level2}']['tile_size'] 124 | if len(from_genome[pick_dim]) == 0 and len(to_genome[pick_dim])==0: 125 | break 126 | elif len(from_genome[pick_dim]) == 0: 127 | from_genome, to_genome = to_genome, from_genome 128 | values = list(from_genome[pick_dim].keys()) 129 | pick_prime = np.random.choice(values) 130 | 131 | from_genome[pick_dim][pick_prime] -= 1 132 | if from_genome[pick_dim][pick_prime] == 0: 133 | del from_genome[pick_dim][pick_prime] 134 | to_genome[pick_dim][pick_prime] += 1 135 | pick_dim = np.random.choice(['K', 'C', 'Y', 'X', 'R', 'S'], p = [0.2, 0.2, 0.2, 0.2, 0.1, 0.1]) 136 | else: 137 | # random mutation 138 | random_tiles = self.init_random_tile_size(dims=pick_dim) 139 | for level in indv.keys(): 140 | indv[level]['tile_size'][pick_dim] = random_tiles[level][pick_dim] 141 | return indv 142 | 143 | def mutate_bypass(self, indv, alpha=0.5, beta=1.0): 144 | if random.random() < alpha: 145 | pick_level = np.random.choice(np.arange(1, self.num_buf_levels)) 146 | if random.random() < beta: 147 | pick_tensor = np.random.choice(['Weights', 'Inputs', 'Outputs']) 148 | indv[f'l{pick_level}']['bypass'][pick_tensor] = not indv[f'l{pick_level}']['bypass'][pick_tensor] 149 | else: 150 | indv[f'l{pick_level}']['bypass'] = {'Inputs':random.choice([True, False]), 'Weights': random.choice([True, False]), 'Outputs': random.choice([True, False])} 151 | return indv 152 | 153 | def mutate_par(self, indv, alpha=0.1, beta=1.0): 154 | if random.random() < alpha: 155 | pick_level = np.random.choice(self.buffers_with_spmap) 156 | if random.random() < beta: 157 | par_dims = indv[pick_level]['par_dims'] 158 | if len(par_dims)>0 and random.random() < 0.5: 159 | del_dim = np.random.choice(list(par_dims)) 160 | indv[pick_level]['par_dims'].remove(del_dim) 161 | else: 162 | if len(par_dims) == len(self.parallizable_dim_note_set): 163 | return indv 164 | new_dim = np.random.choice(list(self.parallizable_dim_note_set - par_dims)) 165 | indv[pick_level]['par_dims'].add(new_dim) 166 | else: 167 | indv[pick_level]['par_dims'] = set(np.random.choice(self.parallizable_dim_note, random.randint(1, len(self.parallizable_dim_note)), replace=False)) 168 | return indv 169 | 170 | def mutate_thread(self, indv, alpha=0.5, beta=0, gen=1): 171 | indv = self.mutate_order(indv) 172 | indv = self.mutate_par(indv) 173 | if self.explore_bypass: 174 | indv = self.mutate_bypass(indv) 175 | indv = self.mutate_level(indv) 176 | indv = self.mutate_tiles(indv) 177 | 178 | indv = self.update_tile_for_buf_cstr(indv, max_trial=None) 179 | indv = self.update_for_spmap_cstr(indv) 180 | return indv 181 | 182 | def mutate(self, pops, alpha, gen): 183 | for i in range(len(pops)): 184 | indv = pops[i] 185 | indv = self.mutate_order(indv, alpha=0.1) 186 | indv = self.mutate_par(indv, alpha=0.1) 187 | if self.explore_bypass: 188 | indv = self.mutate_bypass(indv, alpha=alpha) 189 | indv = self.mutate_level(indv, alpha=alpha) 190 | indv = self.mutate_tiles(indv, alpha=1, gen=gen) 191 | indv = self.update_tile_for_buf_cstr(indv) 192 | indv = self.update_for_spmap_cstr(indv) 193 | pops[i] = indv 194 | return pops 195 | 196 | def mutate_order(self, indv, alpha=0.1, beta=1.0): 197 | if random.random() < alpha: 198 | pick_level = np.random.choice(np.arange(1, self.num_buf_levels+1)) 199 | if random.random() < beta: 200 | loop_order = indv[f'l{pick_level}']['loop_order'] 201 | loop_order = list(loop_order) 202 | idxs = random.sample(set(np.arange(0, self.len_dimension)), 2) 203 | loop_order[idxs[0]], loop_order[idxs[1]] = loop_order[idxs[1]], loop_order[idxs[0]] 204 | indv[f'l{pick_level}']['loop_order'] = ''.join(loop_order) 205 | else: 206 | indv[f'l{pick_level}']['loop_order'] = "".join(np.random.permutation(['N', 'K', 'C', 'Y', 'X', 'R', 'S'])) 207 | return indv 208 | 209 | def mutate_level(self, indv, alpha=0.5): 210 | if random.random() < alpha: 211 | pick_level1, pick_level2 = np.random.choice(np.arange(1, self.num_buf_levels+1), 2, replace=False) 212 | indv[f'l{pick_level1}']['tile_size'], indv[f'l{pick_level2}']['tile_size'] = indv[f'l{pick_level2}']['tile_size'], indv[f'l{pick_level1}']['tile_size'] 213 | return indv 214 | 215 | def crossover(self, pops, parents, num_injects=0, alpha=0.5): 216 | if len(parents) ==1: 217 | for idx in range(len(pops)): 218 | pops[idx] = copy.deepcopy(parents[0]) 219 | else: 220 | for idx in range(0,len(pops)-num_injects,2): 221 | dad, mom = parents[random.randint(0, len(parents)-1)], parents[random.randint(0, len(parents)-1)] 222 | dad = copy.deepcopy(dad) 223 | mom = copy.deepcopy(mom) 224 | if random.random() < alpha: 225 | length = min(len(dad), len(mom)) 226 | change_item = np.random.choice(['tile_size', 'loop_order', 'par_dims', 'bypass']) 227 | # change_item = np.random.choice(['tile_size', 'loop_order', 'bypass']) 228 | # change_item = 'tile_size' 229 | pick_dim = np.random.choice(['K', 'C', 'Y', 'X']) 230 | for l in range(1, length+1): 231 | level = f'l{l}' 232 | if change_item == 'tile_size': 233 | dad[level][change_item][pick_dim], mom[level][change_item][pick_dim] = mom[level][change_item][pick_dim], dad[level][change_item][pick_dim] 234 | else: 235 | dad[level][change_item], mom[level][change_item] = mom[level][change_item], dad[level][change_item] 236 | pops[idx] = dad 237 | if idx + 1 < len(pops): 238 | pops[idx+1] = mom 239 | for idx in range(len(pops)-num_injects,len(pops)): 240 | pops[idx] = self.init_random_indv() 241 | return pops 242 | 243 | def get_prod(self, dicts): 244 | ret_value = 1 245 | for k, v in dicts.items(): 246 | ret_value *= ((int(k))**v) 247 | return ret_value 248 | 249 | def update_for_spmap_cstr(self, indv): 250 | to_append_to_toplevel = {k:defaultdict(int) for k in 'NKCYXRS'} 251 | for level in range(1, self.num_buf_levels+1): 252 | par_dims = indv[f'l{level}']['par_dims'] 253 | tile_sizes = OrderedDict({key: indv[f'l{level}']['tile_size'][key] for key in par_dims}) 254 | num_pars = [self.get_prod(v) for v in tile_sizes.values()] 255 | idx = 0 256 | for key, value in tile_sizes.items(): 257 | if np.prod(num_pars) > self.buf_spmap_cstr[f'l{level}']: 258 | for k, v in value.items(): 259 | to_append_to_toplevel[key][k] += v 260 | indv[f'l{level}']['tile_size'][key] = defaultdict(int) 261 | num_pars[idx] = 1 262 | idx += 1 263 | else: 264 | break 265 | for key, value in to_append_to_toplevel.items(): 266 | if len(value)>0: 267 | cur_value = indv[f'l{self.num_buf_levels}']['tile_size'][key] 268 | for k, v in value.items(): 269 | cur_value[k]+= v 270 | indv[f'l{self.num_buf_levels}']['tile_size'][key] = cur_value 271 | indv[f'l{self.num_buf_levels}']['par_dims'].remove(key) if key in indv[f'l{self.num_buf_levels}']['par_dims'] else None 272 | return indv 273 | 274 | def init_random_tile_size(self, dims='NKCYXRS'): 275 | tile_hierachy = {f'l{level}': {k:defaultdict(int) for k in dims} for level in range(1, 1+self.num_buf_levels)} 276 | for key in dims: 277 | tile_budget = self.dimension_prime[key] 278 | for k, v in tile_budget.items(): 279 | for _ in range(v): 280 | level = random.randint(1, self.num_buf_levels) 281 | tile_hierachy[f'l{level}'][key][k] +=1 282 | return tile_hierachy 283 | 284 | def init_random_single_level(self,buffer_level=1): 285 | genome = { 'tile_size': {key: defaultdict(int) for key in 'NKCYXRS'}, 286 | 'loop_order': "".join(np.random.permutation(['N', 'K', 'C', 'Y', 'X', 'R', 'S'])), 287 | 'par_dims': set(np.random.choice(self.parallizable_dim_note, random.randint(1, len(self.parallizable_dim_note)), replace=False)) if f'l{buffer_level}' in self.buffers_with_spmap else set(), 288 | 'bypass':{'Inputs':random.choice([True, False]), 'Weights': random.choice([True, False]), 'Outputs': random.choice([True, False])} if self.explore_bypass else {'Inputs':False, 'Weights': False, 'Outputs': False}, 289 | } 290 | if buffer_level==self.num_buf_levels: 291 | genome = { 'tile_size':{key: defaultdict(int) for key in 'NKCYXRS'}, 292 | 'loop_order': "".join(np.random.permutation(['N', 'K', 'C', 'Y', 'X', 'R', 'S'])), 293 | 'par_dims': set(), 294 | 'bypass':{'Inputs':False, 'Weights': False, 'Outputs': False} 295 | } 296 | return genome 297 | 298 | def init_random_indv(self): 299 | indv = {f'l{i}': self.init_random_single_level(buffer_level=i) for i in range(1, 1+self.num_buf_levels)} 300 | tile_hierachy = self.init_random_tile_size() 301 | for i in range(1, 1+self.num_buf_levels): 302 | indv[f'l{i}']['tile_size'] = tile_hierachy[f'l{i}'] 303 | return indv 304 | 305 | def init_single_level(self, buffer_level=1): 306 | genome = { 'tile_size': {key: defaultdict(int) for key in 'NKCYXRS'}, 307 | 'loop_order': 'NKCYXRS', 308 | # 'par_dims': {np.random.choice(self.parallizable_dim_note)} if f'l{buffer_level}' in self.buffers_with_spmap else set(), 309 | 'par_dims': {'K', 'X','Y'} if f'l{buffer_level}' in self.buffers_with_spmap else set(), 310 | 'bypass':{'Inputs':False, 'Weights': False, 'Outputs': False} 311 | } 312 | if buffer_level==self.num_buf_levels: 313 | genome = { 'tile_size': {key: self.get_prime_factors(self.dimension_dict[key]) for key in 'NKCYXRS'}, 314 | 'loop_order': 'NKCYXRS', 315 | 'par_dims': set(), 316 | 'bypass':{'Inputs':False, 'Weights': False, 'Outputs': False} 317 | } 318 | return genome 319 | 320 | def init_indv(self,): 321 | indv = {f'l{i}': self.init_single_level(buffer_level=i) for i in range(1, 1+self.num_buf_levels)} 322 | if self.init_random_tile: 323 | tile_hierachy = self.init_random_tile_size() 324 | for i in range(1, 1+self.num_buf_levels): 325 | indv[f'l{i}']['tile_size'] = tile_hierachy[f'l{i}'] 326 | return indv 327 | 328 | def init_pops(self, num_pops, random=False): 329 | if random: 330 | pops = [self.init_random_indv() for _ in range(num_pops//2)] + [self.init_random_indv() for _ in range(num_pops - num_pops//2)] 331 | else: 332 | pops = [self.init_indv() for _ in range(num_pops//2)] + [self.init_random_indv() for _ in range(num_pops - num_pops//2)] 333 | return pops, np.ones((num_pops, len(self.fitness_obj))) * np.NINF 334 | 335 | def get_random_indv(self, num_indvs=2): 336 | new_pops = [] 337 | for i in range(num_indvs): 338 | indv = self.init_random_indv() 339 | new_pops.append(indv) 340 | return new_pops 341 | 342 | def sort_rank_func(self, cand1, cand2, delta=0.1): 343 | def helper(item1, item2, is_last=False): 344 | margin = abs(((item1+item2) /2) * delta) if not is_last else 0 345 | if margin == float('Inf'): 346 | margin = 0 347 | if item1 > item2 + margin: 348 | return 1 349 | elif item1 +margin < item2: 350 | return -1 351 | else: 352 | return 0 353 | fitness_len = len(cand1) - 1 354 | for i in range(fitness_len): 355 | ret = helper(cand1[i], cand2[i], is_last=(i==0 or i==fitness_len-1)) 356 | if ret != 0: 357 | return ret 358 | return ret 359 | 360 | def select_parents(self, pops, fitness, num_parents, num_elites, num_pops, use_soft_margin=False, use_pareto=True): 361 | if use_pareto: 362 | parereto_masks, num_paretros = is_pareto(fitness, return_mask=True) 363 | fitness_list = [tuple([m]+list(ar)+[-i]) for i, (m, ar) in enumerate(zip(parereto_masks, fitness))] 364 | else: 365 | num_paretros = 1 366 | fitness_list = [tuple(list(ar)+[-i]) for i, ar in enumerate(fitness)] 367 | if not use_soft_margin: 368 | sort_rank_func = partial(self.sort_rank_func, delta=0) 369 | else: 370 | sort_rank_func = self.sort_rank_func 371 | fitness_list = sorted(fitness_list, key=cmp_to_key(sort_rank_func), reverse=True) 372 | idx = [int(-ar[-1]) for ar in fitness_list] 373 | new_pop = [pops[i] for i in idx][:num_pops] 374 | new_fitness = fitness[idx][:num_pops] 375 | num_parents = min(num_pops, max(num_paretros, num_parents)) 376 | num_elites = min(num_pops, max(num_paretros, num_elites)) 377 | parents = copy.deepcopy(new_pop[:num_parents]) 378 | elites = copy.deepcopy(new_pop[:num_elites]) 379 | elites_fitness = copy.deepcopy(new_fitness[:num_elites]) 380 | return new_pop, new_fitness, parents, elites, elites_fitness, num_parents, num_elites 381 | 382 | def thread_fun(self, args, do_mutate=True, fitness_obj=None): 383 | indv, pool_idx = args 384 | if do_mutate: 385 | indv = self.mutate_thread(indv, alpha=self.alpha, beta=self.beta, gen=self.gen) 386 | fit = self.timeloop_env.run_timeloop( self.dimension, indv, pool_idx=pool_idx, use_IO=self.use_IO, 387 | fitness_obj=fitness_obj if fitness_obj is not None else self.fitness_obj) 388 | if do_mutate: 389 | return indv, fit 390 | else: 391 | return fit 392 | 393 | def evaluate(self, pops, fitness, pool, num_pops=10): 394 | if not pool: 395 | for i, indv in enumerate(pops): 396 | ret = self.thread_fun((indv, 0)) 397 | indv, fit = ret 398 | pops[i] = indv 399 | fitness[i] = fit 400 | else: 401 | while(1): 402 | try: 403 | rets = list(pool.map(self.thread_fun, zip(pops, np.arange(len(pops))))) 404 | for i, ret in enumerate(rets): 405 | indv, fit = ret 406 | pops[i] = indv 407 | fitness[i] = fit 408 | break 409 | except Exception as e: 410 | if self.log_level>2: 411 | print(type(e).__name__, e) 412 | pool.shutdown(wait=False) 413 | pool = ProcessPoolExecutor(num_pops) 414 | return pool, fitness, pops 415 | 416 | def create_timeloop_report(self, indv, dir_path='./report'): 417 | fitness = self.thread_fun((indv, 0), do_mutate=False) 418 | stats = self.thread_fun((indv, 0), do_mutate=False, fitness_obj='all') 419 | os.makedirs(dir_path, exist_ok=True) 420 | columns = ['EDP (uJ cycles)', 'Cycles', 'Energy (uJ)', 'Utilization', 'pJ/Algorithm-Compute', 'pJ/Actual-Compute', 'Area (mm2)'][:len(stats)] 421 | if self.use_IO is False: 422 | self.timeloop_env.dump_timeloop_config_files(self.dimension, indv, dir_path) 423 | else: 424 | os.system(f'cp -d -r {os.path.join(self.timeloop_configfile_path, "pool-0")}/* {dir_path}') 425 | with open(os.path.join(dir_path,'Gamma-Timeloop.txt'), 'w') as fd: 426 | value = [f'{v:.5e}' for v in fitness] 427 | fd.write(f'Achieved Fitness: {value}\n') 428 | fd.write(f'Statistics\n') 429 | fd.write(f'{columns}\n') 430 | fd.write(f'{stats}') 431 | stats = np.array(stats).reshape(1, -1) 432 | df = pd.DataFrame(stats, columns=columns) 433 | df.to_csv(os.path.join(dir_path,'Gamma-Timeloop.csv')) 434 | 435 | def run(self, dimension=None, num_pops=100, num_gens=100, elite_ratio=0.05, parents_ratio=0.5, inject_ratio=0.1): 436 | self.set_dimension(dimension) 437 | num_injects = max(1, int(num_pops*inject_ratio)) 438 | num_parents = int(num_pops*parents_ratio) 439 | num_elites = max(1, int(num_pops*elite_ratio)) 440 | pops, fitness = self.init_pops(num_pops) 441 | if self.use_pool: 442 | pool = ProcessPoolExecutor(num_pops) 443 | self.timeloop_env.create_pool_env(num_pools=num_pops, dimension=self.dimension, indv=pops[0], use_IO=self.use_IO) 444 | else: 445 | pool = None 446 | self.timeloop_env.create_pool_env(num_pools=1, dimension=self.dimension, indv=pops[0], use_IO=self.use_IO) 447 | for g in range(num_gens): 448 | if self.emulate_random: 449 | pops, fitness = self.init_pops(num_pops, random=True) 450 | if g == 0: 451 | pops, fitness, parents, elites, elites_fitness, num_parents, num_elites = self.select_parents(pops, fitness, num_parents, num_elites, num_pops) 452 | if g == 0: 453 | alpha = 1 454 | else: 455 | alpha = 0.5 456 | self.alpha = alpha 457 | self.beta = 0.5 458 | self.gen = g 459 | pops = self.crossover(pops, parents=parents, num_injects=num_injects, alpha=alpha) 460 | pool, fitness, pops = self.evaluate(pops, fitness, pool, num_pops) 461 | pops = elites + pops 462 | fitness = np.concatenate((elites_fitness, fitness), axis=0) 463 | pops, fitness, parents, elites, elites_fitness, num_parents, num_elites = self.select_parents(pops, fitness, num_parents, num_elites, num_pops) 464 | best_idx = 0 465 | best_sol = pops[best_idx] 466 | print(f'[Gen{g}] fitness: {fitness[best_idx]}') 467 | self.record_chkpt(pops, fitness, best_idx, g, num_gens, num_pops) 468 | # print(f'[Gen{g}] fitness: {fitness[best_idx]} Sol: {self.get_genome(best_sol)}') 469 | print(f'Achieved Fitness: {fitness[best_idx]}') 470 | self.create_timeloop_report(best_sol, dir_path=self.report_dir) 471 | self.clean_timeloop_output_files() 472 | 473 | def record_chkpt(self, pops, fitness, best_idx, gen, num_gens, num_pops): 474 | if self.save_chkpt: 475 | self.all_fitness_record.append(copy.deepcopy(fitness)) 476 | self.all_sol_record.append(copy.deepcopy(pops)) 477 | self.fitness_record.append(copy.deepcopy(fitness[best_idx])) 478 | self.sol_record.append(copy.deepcopy(pops[best_idx])) 479 | cur_gen = gen+1 480 | if cur_gen == num_gens or cur_gen%50==0: 481 | with open(os.path.join(self.report_dir, 'gamma_chkpt.plt'), 'wb') as fd: 482 | chkpt = { 483 | 'fitness_record': self.fitness_record, 484 | 'all_fitness_record':self.all_fitness_record, 485 | 'all_sol_record':self.all_sol_record, 486 | 'sol_record':self.sol_record, 487 | 'best_fitness': self.fitness_record[-1], 488 | 'num_gens': num_gens, 489 | 'num_pops': num_pops, 490 | 'sampled_points': num_gens * num_pops} 491 | pickle.dump(chkpt, fd) 492 | 493 | def get_genome(self, indv): 494 | l2_tile_size, l1_tile_size = indv['l2_tile_size'], indv['l1_tile_size'] 495 | l2_loop_order, l1_loop_order = indv['l2_loop_order'],indv['l1_loop_order'] 496 | l2_par, l1_par = indv['par_dims'] 497 | l2_tile_dict = self.get_dimension_dict(l2_tile_size) 498 | l1_tile_dict = self.get_dimension_dict(l1_tile_size) 499 | genome_l2 = [[l2_par, self.num_pes]] + [[d, l2_tile_dict[d]] for d in l2_loop_order] 500 | genome_l1 = [[l1_par, 1]] + [[d, l1_tile_dict[d]] for d in l1_loop_order] 501 | genome = genome_l2 + genome_l1 502 | return genome 503 | 504 | def clean_timeloop_output_files(self): 505 | shutil.rmtree(self.timeloop_configfile_path) 506 | out_prefix = "./timeloop-model." 507 | output_file_names = [] 508 | output_file_names.append( "tmp-accelergy.yaml") 509 | output_file_names.append(out_prefix + "accelergy.log") 510 | output_file_names.extend(glob.glob("*accelergy.log")) 511 | output_file_names.extend(glob.glob("*tmp-accelergy.yaml")) 512 | output_file_names.append(out_prefix + ".log") 513 | output_file_names.append(out_prefix + "ART.yaml") 514 | output_file_names.append(out_prefix + "ART_summary.yaml") 515 | output_file_names.append(out_prefix + "ERT.yaml") 516 | output_file_names.append(out_prefix + "ERT_summary.yaml") 517 | output_file_names.append(out_prefix + "flattened_architecture.yaml") 518 | output_file_names.append(out_prefix + "map+stats.xml") 519 | output_file_names.append(out_prefix + "map.txt") 520 | output_file_names.append(out_prefix + "stats.txt") 521 | for f in output_file_names: 522 | if os.path.exists(f): 523 | os.remove(f) 524 | 525 | 526 | 527 | 528 | 529 | 530 | 531 | 532 | -------------------------------------------------------------------------------- /src/in_config/arch.yaml: -------------------------------------------------------------------------------- 1 | architecture: 2 | subtree: 3 | - local: 4 | - attributes: 5 | block-size: 4 6 | width: 64 7 | word-bits: 16 8 | class: DRAM 9 | name: MainMemory 10 | name: System 11 | subtree: 12 | - attributes: 13 | technology: 40nm 14 | local: 15 | - attributes: 16 | block-size: 4 17 | depth: 16384 18 | width: 64 19 | word-bits: 16 20 | metadata_storage_depth: 655360 21 | metadata_storage_width: 4 22 | metadata_datawidth: 4 23 | class: SRAM 24 | name: GlobalBuffer 25 | name: Chip 26 | subtree: 27 | - local: 28 | - attributes: 29 | block-size: 1 30 | depth: 256 31 | width: 16 32 | word-bits: 16 33 | metadata_storage_depth: 327680 34 | metadata_storage_width: 4 35 | metadata_datawidth: 4 36 | class: SRAM 37 | name: RegisterFile[0..255] 38 | - attributes: 39 | datawidth: 16 40 | class: intmac 41 | name: MACC[0..1023] 42 | name: PEarrray 43 | version: 0.2 44 | -------------------------------------------------------------------------------- /src/in_config/problem.yaml: -------------------------------------------------------------------------------- 1 | problem: 2 | shape: 3 | name: Conv2D 4 | dimensions: [N, C, P, Q, R, S, M] 5 | data-spaces: 6 | - name: Weights 7 | projection: 8 | - [ [C] ] 9 | - [ [R] ] 10 | - [ [S] ] 11 | - [ [M] ] 12 | - name: Inputs 13 | projection: 14 | - [ [C] ] 15 | - [ [N] ] 16 | - [ [S], [Q] ] 17 | - [ [R], [P] ] 18 | - name: Outputs 19 | projection: 20 | - [ [N] ] 21 | - [ [P] ] 22 | - [ [Q] ] 23 | - [ [M] ] 24 | read-write: True 25 | 26 | instance: 27 | N: 16 28 | C: 144 29 | M: 1 30 | N: 16 31 | R: 3 32 | S: 3 33 | P: 56 34 | Q: 56 -------------------------------------------------------------------------------- /src/in_config/sparse.yaml: -------------------------------------------------------------------------------- 1 | sparse_optimizations: 2 | targets: 3 | - name: MainMemory 4 | representation-format: 5 | data-spaces: 6 | - name: Weights 7 | rank-application-order: inner-to-outer 8 | ranks: 9 | - format: UOP 10 | - format: UOP 11 | - format: UOP 12 | - format: UOP 13 | - format: UOP 14 | - format: UOP 15 | - format: UOP 16 | - format: UOP 17 | - format: UOP 18 | - format: UOP 19 | - format: UOP 20 | - format: UOP 21 | - format: UOP 22 | - format: UOP 23 | - format: UOP 24 | - format: UOP 25 | - format: UOP 26 | - format: UOP 27 | - format: UOP 28 | - format: UOP 29 | - format: UOP 30 | - format: UOP 31 | - format: UOP 32 | - format: UOP 33 | - format: UOP 34 | - format: UOP 35 | - format: UOP 36 | - format: UOP 37 | - format: UOP 38 | - format: UOP 39 | - format: UOP 40 | - format: UOP 41 | - format: UOP 42 | - format: UOP 43 | - format: UOP 44 | - format: UOP 45 | - format: UOP 46 | - format: UOP 47 | - format: UOP 48 | - format: UOP 49 | - format: UOP 50 | - format: UOP 51 | - format: UOP 52 | - format: UOP 53 | - format: UOP 54 | - format: CP 55 | - name: Inputs 56 | rank-application-order: inner-to-outer 57 | ranks: 58 | - format: UOP 59 | - format: UOP 60 | - format: UOP 61 | - format: UOP 62 | - format: UOP 63 | - format: UOP 64 | - format: UOP 65 | - format: UOP 66 | - format: UOP 67 | - format: UOP 68 | - format: UOP 69 | - format: UOP 70 | - format: UOP 71 | - format: UOP 72 | - format: UOP 73 | - format: UOP 74 | - format: UOP 75 | - format: UOP 76 | - format: UOP 77 | - format: UOP 78 | - format: UOP 79 | - format: UOP 80 | - format: UOP 81 | - format: UOP 82 | - format: UOP 83 | - format: UOP 84 | - format: UOP 85 | - format: UOP 86 | - format: UOP 87 | - format: UOP 88 | - format: UOP 89 | - format: UOP 90 | - format: UOP 91 | - format: UOP 92 | - format: UOP 93 | - format: UOP 94 | - format: UOP 95 | - format: UOP 96 | - format: UOP 97 | - format: UOP 98 | - format: UOP 99 | - format: UOP 100 | - format: UOP 101 | - format: UOP 102 | - format: UOP 103 | - format: UOP 104 | - format: UOP 105 | - format: UOP 106 | - format: UOP 107 | - format: CP 108 | - name: Outputs 109 | rank-application-order: inner-to-outer 110 | ranks: 111 | - format: UOP 112 | - format: UOP 113 | - format: UOP 114 | - format: UOP 115 | - format: UOP 116 | - format: UOP 117 | - format: UOP 118 | - format: UOP 119 | - format: UOP 120 | - format: UOP 121 | - format: UOP 122 | - format: UOP 123 | - format: UOP 124 | - format: UOP 125 | - format: UOP 126 | - format: UOP 127 | - format: UOP 128 | - format: UOP 129 | - format: UOP 130 | - format: UOP 131 | - format: UOP 132 | - format: UOP 133 | - format: UOP 134 | - format: UOP 135 | - format: UOP 136 | - format: UOP 137 | - format: UOP 138 | - format: UOP 139 | - format: UOP 140 | - format: UOP 141 | - format: UOP 142 | - format: UOP 143 | - format: UOP 144 | - format: UOP 145 | - format: UOP 146 | - format: UOP 147 | - format: UOP 148 | - format: UOP 149 | - format: UOP 150 | - format: UOP 151 | - format: CP 152 | action-optimization: 153 | - type: skipping 154 | options: 155 | - target: Inputs 156 | condition-on: [ Weights, Outputs ] 157 | - type: skipping 158 | options: 159 | - target: Weights 160 | condition-on: [ Inputs, Outputs ] 161 | - type: skipping 162 | options: 163 | - target: Outputs 164 | condition-on: [ Weights, Inputs ] 165 | - name: GlobalBuffer 166 | representation-format: 167 | data-spaces: 168 | - name: Weights 169 | rank-application-order: inner-to-outer 170 | ranks: 171 | - format: UOP 172 | - format: UOP 173 | - format: UOP 174 | - format: UOP 175 | - format: UOP 176 | - format: UOP 177 | - format: UOP 178 | - format: UOP 179 | - format: UOP 180 | - format: UOP 181 | - format: UOP 182 | - format: UOP 183 | - format: UOP 184 | - format: UOP 185 | - format: UOP 186 | - format: UOP 187 | - format: UOP 188 | - format: UOP 189 | - format: UOP 190 | - format: UOP 191 | - format: UOP 192 | - format: UOP 193 | - format: UOP 194 | - format: UOP 195 | - format: UOP 196 | - format: UOP 197 | - format: UOP 198 | - format: UOP 199 | - format: UOP 200 | - format: UOP 201 | - format: UOP 202 | - format: UOP 203 | - format: UOP 204 | - format: UOP 205 | - format: UOP 206 | - format: UOP 207 | - format: UOP 208 | - format: UOP 209 | - format: UOP 210 | - format: CP 211 | - name: Inputs 212 | rank-application-order: inner-to-outer 213 | ranks: 214 | - format: UOP 215 | - format: UOP 216 | - format: UOP 217 | - format: UOP 218 | - format: UOP 219 | - format: UOP 220 | - format: UOP 221 | - format: UOP 222 | - format: UOP 223 | - format: UOP 224 | - format: UOP 225 | - format: UOP 226 | - format: UOP 227 | - format: UOP 228 | - format: UOP 229 | - format: UOP 230 | - format: UOP 231 | - format: UOP 232 | - format: UOP 233 | - format: UOP 234 | - format: UOP 235 | - format: UOP 236 | - format: UOP 237 | - format: UOP 238 | - format: UOP 239 | - format: UOP 240 | - format: UOP 241 | - format: UOP 242 | - format: UOP 243 | - format: UOP 244 | - format: UOP 245 | - format: UOP 246 | - format: UOP 247 | - format: UOP 248 | - format: UOP 249 | - format: UOP 250 | - format: UOP 251 | - format: UOP 252 | - format: UOP 253 | - format: UOP 254 | - format: UOP 255 | - format: CP 256 | - name: Outputs 257 | rank-application-order: inner-to-outer 258 | ranks: 259 | - format: UOP 260 | - format: UOP 261 | - format: UOP 262 | - format: UOP 263 | - format: UOP 264 | - format: UOP 265 | - format: UOP 266 | - format: UOP 267 | - format: UOP 268 | - format: UOP 269 | - format: UOP 270 | - format: UOP 271 | - format: UOP 272 | - format: UOP 273 | - format: UOP 274 | - format: UOP 275 | - format: UOP 276 | - format: UOP 277 | - format: UOP 278 | - format: UOP 279 | - format: UOP 280 | - format: UOP 281 | - format: UOP 282 | - format: UOP 283 | - format: UOP 284 | - format: UOP 285 | - format: UOP 286 | - format: UOP 287 | - format: UOP 288 | - format: UOP 289 | - format: UOP 290 | - format: UOP 291 | - format: UOP 292 | - format: UOP 293 | - format: UOP 294 | - format: UOP 295 | - format: UOP 296 | - format: CP 297 | action-optimization: 298 | - type: skipping 299 | options: 300 | - target: Inputs 301 | condition-on: [ Weights, Outputs ] 302 | - type: skipping 303 | options: 304 | - target: Weights 305 | condition-on: [ Inputs, Outputs ] 306 | - type: skipping 307 | options: 308 | - target: Outputs 309 | condition-on: [ Weights, Inputs ] 310 | - name: RegisterFile 311 | representation-format: 312 | data-spaces: 313 | - name: Weights 314 | rank-application-order: inner-to-outer 315 | ranks: 316 | - format: UOP 317 | - format: UOP 318 | - format: UOP 319 | - format: UOP 320 | - format: UOP 321 | - format: UOP 322 | - format: UOP 323 | - format: UOP 324 | - format: UOP 325 | - format: UOP 326 | - format: UOP 327 | - format: UOP 328 | - format: UOP 329 | - format: UOP 330 | - format: UOP 331 | - format: UOP 332 | - format: UOP 333 | - format: UOP 334 | - format: UOP 335 | - format: UOP 336 | - format: UOP 337 | - format: UOP 338 | - format: UOP 339 | - format: UOP 340 | - format: UOP 341 | - format: UOP 342 | - format: UOP 343 | - format: UOP 344 | - format: UOP 345 | - format: UOP 346 | - format: UOP 347 | - format: UOP 348 | - format: UOP 349 | - format: UOP 350 | - format: CP 351 | - name: Inputs 352 | rank-application-order: inner-to-outer 353 | ranks: 354 | - format: UOP 355 | - format: UOP 356 | - format: UOP 357 | - format: UOP 358 | - format: UOP 359 | - format: UOP 360 | - format: UOP 361 | - format: UOP 362 | - format: UOP 363 | - format: UOP 364 | - format: UOP 365 | - format: UOP 366 | - format: UOP 367 | - format: UOP 368 | - format: UOP 369 | - format: UOP 370 | - format: UOP 371 | - format: UOP 372 | - format: UOP 373 | - format: UOP 374 | - format: UOP 375 | - format: UOP 376 | - format: UOP 377 | - format: UOP 378 | - format: UOP 379 | - format: UOP 380 | - format: UOP 381 | - format: UOP 382 | - format: UOP 383 | - format: UOP 384 | - format: UOP 385 | - format: UOP 386 | - format: UOP 387 | - format: UOP 388 | - format: CP 389 | - name: Outputs 390 | rank-application-order: inner-to-outer 391 | ranks: 392 | - format: UOP 393 | - format: UOP 394 | - format: UOP 395 | - format: UOP 396 | - format: UOP 397 | - format: UOP 398 | - format: UOP 399 | - format: UOP 400 | - format: UOP 401 | - format: UOP 402 | - format: UOP 403 | - format: UOP 404 | - format: UOP 405 | - format: UOP 406 | - format: UOP 407 | - format: UOP 408 | - format: UOP 409 | - format: UOP 410 | - format: UOP 411 | - format: UOP 412 | - format: UOP 413 | - format: UOP 414 | - format: UOP 415 | - format: UOP 416 | - format: UOP 417 | - format: UOP 418 | - format: UOP 419 | - format: UOP 420 | - format: UOP 421 | - format: UOP 422 | - format: UOP 423 | - format: UOP 424 | - format: UOP 425 | - format: UOP 426 | - format: CP 427 | action-optimization: 428 | - type: skipping 429 | options: 430 | - target: Inputs 431 | condition-on: [ Weights, Outputs] 432 | - type: skipping 433 | options: 434 | - target: Weights 435 | condition-on: [ Inputs, Outputs ] 436 | - type: skipping 437 | options: 438 | - target: Outputs 439 | condition-on: [ Weights, Inputs ] 440 | - name: MACC 441 | compute-optimization: 442 | - type: skipping -------------------------------------------------------------------------------- /src/main.py: -------------------------------------------------------------------------------- 1 | from gamma_timeloop_env import GammaTimeloopEnv 2 | import argparse 3 | 4 | if __name__ == '__main__': 5 | parser = argparse.ArgumentParser() 6 | parser.add_argument('--fitness1', type=str, default="energy", help='1st order fitness objective') 7 | parser.add_argument('--fitness2', type=str, default=None, help='2nd order fitness objective') 8 | parser.add_argument('--fitness3', type=str, default=None, help='3rd order fitness objective') 9 | parser.add_argument('--num_pops', type=int, default=5,help='number of populations') 10 | parser.add_argument('--epochs', type=int, default=5, help='number of generations/epochs') 11 | parser.add_argument('--config_path', type=str, default='./in_config', 12 | help='Configuration path, should include arch.yaml, problem.yaml, (and sparse.yaml if sparsity is considered)') 13 | parser.add_argument('--report_dir', type=str, default='report', help='The report directory') 14 | parser.add_argument('--density', type=str, default='0.5,1,1', help='The density of Input, Output, Weight Tenor') 15 | parser.add_argument('--save_chkpt', action='store_true', default=False, help='Create a checkpoint when finished') 16 | parser.add_argument('--use_sparse', action='store_true', default=False, help='Execute Map Space Exploration on sparse accelerator') 17 | parser.add_argument('--explore_bypass', action='store_true', default=False, 18 | help='Enable it can add bypass buffer option in to the search space') 19 | opt = parser.parse_args() 20 | opt.num_gens = opt.epochs 21 | fitness = [opt.fitness1] 22 | fitness.append(opt.fitness2) if opt.fitness2 is not None else None 23 | fitness.append(opt.fitness3) if opt.fitness3 is not None else None 24 | print(f'Fitness Objective: {fitness}') 25 | density = opt.density.split(',') 26 | density = {'Inputs': float(density[0]), 'Outputs': float(density[1]), 'Weights': float(density[2])} 27 | gamma_timeloop = GammaTimeloopEnv(fitness_obj=fitness, report_dir=opt.report_dir, use_pool=True, use_IO=False, 28 | debug=False, in_config_dir=opt.config_path, density=density, 29 | save_chkpt=opt.save_chkpt, use_sparse=opt.use_sparse, 30 | explore_bypass=opt.explore_bypass) 31 | 32 | 33 | gamma_timeloop.run(dimension=None, num_pops=opt.num_pops, num_gens=opt.num_gens) 34 | -------------------------------------------------------------------------------- /src/parse_timeloop_output.py: -------------------------------------------------------------------------------- 1 | ''' 2 | From https://github.com/NVlabs/timeloop/blob/master/scripts/parse_timeloop_output.py 3 | ''' 4 | import argparse 5 | import numpy as np 6 | import os 7 | import pickle 8 | import pprint 9 | import xml.etree.ElementTree as ET 10 | 11 | 12 | # Output file names. 13 | out_prefix = "timeloop-model." 14 | report_prefix = out_prefix + 'stats.txt' 15 | xml_file_name = out_prefix + "map+stats.xml" 16 | 17 | def set_out_prefix(new_out_prefix): 18 | global out_prefix, xml_file_name 19 | out_prefix = new_out_prefix 20 | xml_file_name = out_prefix + "map+stats.xml" 21 | 22 | def get_stat(stats, stat, cast): 23 | items = stats.findall(stat)[0].findall('PerDataSpace')[0].findall('item') 24 | count = len(items) 25 | out = np.array([0]*count, dtype=cast) 26 | for j in range(count): 27 | if stat == 'ingresses': 28 | value = sum([cast(i.text) for i in items[j].findall('item')]) 29 | else: 30 | value = cast(items[j].text) 31 | out[j] = value 32 | return out 33 | 34 | def parse_timeloop_stats(filename): 35 | if (os.path.isdir(filename)): 36 | filename = os.path.join(filename, xml_file_name) 37 | tree = ET.parse(filename) 38 | root = tree.getroot() 39 | 40 | # Parse out the problem shape 41 | problem_dims = root.findall('a')[0].findall('workload_')[0].findall('bounds_')[0].findall('item') 42 | problem = [ int(pd.findall('second')[0].text) for pd in problem_dims ] #FIXedME generalize for non-conv problems 43 | 44 | macs = np.prod(problem) 45 | 46 | topology = root.findall('engine')[0].findall('topology_')[0] 47 | 48 | # Get the list of storage/arithmetic levels 49 | levels = topology.findall('levels_')[0] 50 | num_levels = int(levels.findall('count')[0].text) 51 | level_ptrs = levels.findall('item') 52 | 53 | # Get the list of networks 54 | networks = topology.findall('networks_')[0] 55 | num_networks = int(networks.findall('count')[0].text) 56 | network_ptrs = networks.findall('item') 57 | 58 | # Initialize a dictionary that stores energy breakdown and other statistics 59 | energy_breakdown_pJ = {} 60 | 61 | arithmetic_level_found = False 62 | 63 | # felix================= 64 | cycles = 0 65 | # ========================= 66 | for level_id in range(len(level_ptrs)): 67 | 68 | level_ptr = level_ptrs[level_id] 69 | 70 | level = level_ptr.findall('px')[0] 71 | 72 | # The XML structure is interesting. Every Level gets a , but 73 | # only the first object of each type gets a full class_id descriptor. 74 | # For example, the first model::BufferLevel item will get: 75 | # 76 | # but subsequent levels will get something like: 77 | # 78 | # with increasing object_ids. We can keep a table of new class_ids as 79 | # we encounter them, but for now we'll just hack something that works. 80 | 81 | 82 | # felix 2022/03/08================================= 83 | try: 84 | cycles_cand = int(level.findall('stats_')[0].findall('cycles')[0].text) 85 | cycles =max(cycles, cycles_cand) 86 | except: 87 | pass 88 | 89 | # ===================================================== 90 | 91 | 92 | # Is this the Arithmetic level (the only one)? 93 | if 'class_id' in level.attrib and level.attrib['class_name'] == "model::ArithmeticUnits": 94 | assert arithmetic_level_found == False 95 | arithmetic_level_found = True 96 | # felix 2022/03/08================================= 97 | # cycles = int(level.findall('cycles_')[0].text) 98 | # ================================================ 99 | utilized_instances = float(level.findall('utilized_instances_')[0].text) 100 | total_instances_list = level.findall('specs_')[0].findall('instances')[0].findall('t_') 101 | if total_instances_list == []: # this happens when no mapping is returned by timeloop 102 | total_instances = 1 # dummy value 103 | else: 104 | total_instances = float(level.findall('specs_')[0].findall('instances')[0].findall('t_')[0].text) 105 | arithmetic_utilization = utilized_instances/total_instances 106 | energy_breakdown_pJ['MAC'] = {'energy': float(level.findall('energy_')[0].text), 'utilization': arithmetic_utilization} 107 | continue 108 | 109 | # If we are here, we are not an arithmetic level. 110 | 111 | # Level specifications and stats. 112 | specs = level.findall('specs_')[0] 113 | stats = level.findall('stats_')[0] 114 | 115 | generic_level_specs = specs.findall('LevelSpecs')[0] 116 | level_name = generic_level_specs.findall('level_name')[0].text 117 | 118 | # Storage access energy 119 | reads_per_instance = get_stat(stats, 'reads', int) 120 | updates_per_instance = get_stat(stats, 'updates', int) 121 | fills_per_instance = get_stat(stats, 'fills', int) 122 | accesses_per_instance = reads_per_instance + updates_per_instance + fills_per_instance 123 | 124 | utilized_capacity = get_stat(stats, 'utilized_capacity', int) 125 | instances = get_stat(stats, 'utilized_instances', int) 126 | clusters = get_stat(stats, 'utilized_clusters', int) 127 | 128 | total_instances_obj = specs.findall('instances')[0].findall('t_') 129 | if len(total_instances_obj) == 0: 130 | total_instances = sum(instances) 131 | else: 132 | total_instances = int(total_instances_obj[0].text) 133 | 134 | total_capacity_obj = specs.findall('size')[0].findall('t_') 135 | if len(total_capacity_obj) == 0: 136 | total_capacity = sum(utilized_capacity) 137 | else: 138 | total_capacity = int(total_capacity_obj[0].text) 139 | 140 | energy_per_access_per_instance = get_stat(stats, 'energy_per_access', float) 141 | storage_access_energy_in_pJ = energy_per_access_per_instance * accesses_per_instance * instances 142 | read_energy = energy_per_access_per_instance * reads_per_instance * instances 143 | 144 | # Find read-network connected to this storage level by looking at the first word 145 | # in the network's name. 146 | # FIXME: all this ugliness is because of legacy topology structure. We should 147 | # simply report networks independently. 148 | assert(level_id >= 1) 149 | for n in network_ptrs: 150 | network_name = n.findall('first')[0].text 151 | network_source = network_name.split(None, 1)[0] 152 | if network_source == level_name: 153 | network = n.findall('second')[0].findall('px')[0] 154 | break 155 | #network_ptr = network_ptrs[level_id-1] 156 | #network = network_ptr.findall('second')[0].findall('px')[0] 157 | 158 | # Network energy 159 | # network = level.findall('network_')[0] 160 | network_stats = network.findall('stats_')[0] 161 | 162 | #FIXedME when router energy !== zero, need to fetch total energy per instance 163 | num_hops = get_stat(network_stats, 'num_hops', float) 164 | energy_per_hop_per_instance = get_stat(network_stats, 'energy_per_hop', float) 165 | ingresses = get_stat(network_stats, 'ingresses', int) 166 | network_energy_per_instance_pJ = get_stat(network_stats, 'energy', float) 167 | network_energy_in_pJ = network_energy_per_instance_pJ * instances 168 | 169 | # Add multicast factors 170 | multicast = get_stat(network_stats, 'multicast_factor', int) 171 | dist_multicast = get_stat(network_stats, 'distributed_multicast', int) 172 | 173 | # Add energy 174 | spatial_add_energy_per_instance = get_stat(network_stats, 'spatial_reduction_energy', float) 175 | temporal_add_energy_per_instance = get_stat(stats, 'temporal_reduction_energy', float) 176 | temporal_add_energy = np.nansum(temporal_add_energy_per_instance * instances) 177 | spatial_add_energy = np.nansum(spatial_add_energy_per_instance * instances) 178 | 179 | # Address generation energy 180 | address_generation_energy_per_cluster = get_stat(stats, 'addr_gen_energy', float) 181 | address_generation_energy = np.nansum(address_generation_energy_per_cluster * clusters) 182 | 183 | # Special Case when the memory level is a dummy (capacity = 0) 184 | if total_capacity == 0: 185 | utilization = 0 186 | else: 187 | utilization = sum((utilized_capacity*instances)/(total_capacity*total_instances)) 188 | 189 | energy_breakdown_pJ[level_name] = {\ 190 | 'energy': np.nansum(storage_access_energy_in_pJ) + np.nansum(network_energy_in_pJ) + temporal_add_energy + spatial_add_energy + address_generation_energy,\ 191 | 'storage_access_energy': np.nansum(storage_access_energy_in_pJ),\ 192 | 'read_energy': np.nansum(read_energy),\ 193 | 'temporal_add_energy': temporal_add_energy,\ 194 | 'spatial_add_energy': spatial_add_energy,\ 195 | 'address_generation_energy': address_generation_energy,\ 196 | 'network_energy': np.nansum(network_energy_in_pJ),\ 197 | 'energy_per_access_per_instance': energy_per_access_per_instance,\ 198 | 'reads_per_instance': reads_per_instance,\ 199 | 'updates_per_instance': updates_per_instance,\ 200 | 'fills_per_instance': fills_per_instance,\ 201 | 'accesses_per_instance': accesses_per_instance,\ 202 | 'instances': instances,\ 203 | 'utilization': utilization,\ 204 | 'multicast': multicast,\ 205 | 'dist_multicast': dist_multicast,\ 206 | 'num_hops': num_hops,\ 207 | 'ingresses': ingresses,\ 208 | 'energy_per_hop_per_instance': energy_per_hop_per_instance} 209 | 210 | energy_pJ = sum([value['energy'] for key, value in energy_breakdown_pJ.items()]) 211 | 212 | # Crude check to find out if timeloop produced an output. 213 | if arithmetic_level_found: 214 | output = { 215 | 'problem': problem, 216 | 'utilization': arithmetic_utilization, 217 | 'cycles': cycles, 218 | 'energy_pJ': energy_pJ, 219 | 'energy_per_mac': energy_pJ/macs, 220 | 'macs': macs, 221 | 'energy_breakdown_pJ': energy_breakdown_pJ 222 | } 223 | else: 224 | output = {} 225 | 226 | return output 227 | 228 | 229 | 230 | def main(): 231 | parser = argparse.ArgumentParser( 232 | description='A simple tool for generating pickle files from timeloop output.') 233 | parser.add_argument('--infile', nargs='?', default=xml_file_name, type=str, 234 | help='raw Timeloop XML output file') 235 | parser.add_argument('--outfile', nargs='?', default='timeloop-output.pkl', type=argparse.FileType('wb'), 236 | help='write the output of infile to outfile') 237 | options = parser.parse_args() 238 | 239 | infile = options.infile 240 | outfile = options.outfile 241 | 242 | output = parse_timeloop_stats(infile) 243 | pprint.pprint(output) 244 | 245 | with outfile: 246 | pickle.dump(output, outfile, pickle.HIGHEST_PROTOCOL) 247 | print('Wrote output to %s.' % (outfile.name)) 248 | 249 | if __name__ == '__main__': 250 | # main() 251 | file_path = '/home/felix/Documents/my_code/gamma_timeloop/gamma_timeloop_src/report/timeloop-model.map+stats.xml' 252 | filename = '/home/felix/Documents/my_code/gamma_timeloop/gamma_timeloop_src/report/timeloop-model.stats.txt' 253 | # parse_timeloop_stats(file_path) 254 | get_perf(filename) 255 | -------------------------------------------------------------------------------- /src/report/Gamma-Timeloop.csv: -------------------------------------------------------------------------------- 1 | ,EDP (uJ cycles),Cycles,Energy (uJ) 2 | 0,-inf,-inf,-inf 3 | -------------------------------------------------------------------------------- /src/report/Gamma-Timeloop.txt: -------------------------------------------------------------------------------- 1 | Achieved Fitness: ['-4.29501e+03'] 2 | Statistics 3 | ['EDP (uJ cycles)', 'Cycles', 'Energy (uJ)'] 4 | [-inf, -inf, -inf] -------------------------------------------------------------------------------- /src/report/arch.yaml: -------------------------------------------------------------------------------- 1 | architecture: 2 | subtree: 3 | - local: 4 | - attributes: 5 | block-size: 4 6 | width: 64 7 | word-bits: 16 8 | class: DRAM 9 | name: MainMemory 10 | name: System 11 | subtree: 12 | - attributes: 13 | technology: 40nm 14 | local: 15 | - attributes: 16 | block-size: 4 17 | depth: 16384 18 | metadata_datawidth: 4 19 | metadata_storage_depth: 655360 20 | metadata_storage_width: 4 21 | width: 64 22 | word-bits: 16 23 | class: SRAM 24 | name: GlobalBuffer 25 | name: Chip 26 | subtree: 27 | - local: 28 | - attributes: 29 | block-size: 1 30 | depth: 256 31 | metadata_datawidth: 4 32 | metadata_storage_depth: 327680 33 | metadata_storage_width: 4 34 | width: 16 35 | word-bits: 16 36 | class: SRAM 37 | name: RegisterFile[0..255] 38 | - attributes: 39 | datawidth: 16 40 | class: intmac 41 | name: MACC[0..1023] 42 | name: PEarrray 43 | version: 0.2 44 | -------------------------------------------------------------------------------- /src/report/map.yaml: -------------------------------------------------------------------------------- 1 | mapping: 2 | - factors: N=1 M=1 C=18 P=2 Q=1 R=1 S=3 3 | permutation: QMRSNCP 4 | target: RegisterFile 5 | type: temporal 6 | - factors: N=1 M=1 C=1 P=1 Q=2 R=1 S=1 7 | permutation: QMRSNCP 8 | target: RegisterFile 9 | type: spatial 10 | - bypass: [] 11 | keep: 12 | - Inputs 13 | - Weights 14 | - Outputs 15 | target: RegisterFile 16 | type: bypass 17 | - factors: N=4 M=1 C=2 P=1 Q=1 R=3 S=1 18 | permutation: MPQRNSC 19 | target: GlobalBuffer 20 | type: temporal 21 | - factors: N=1 M=1 C=1 P=28 Q=2 R=1 S=1 22 | permutation: MPQRNSC 23 | target: GlobalBuffer 24 | type: spatial 25 | - bypass: [] 26 | keep: 27 | - Inputs 28 | - Weights 29 | - Outputs 30 | target: GlobalBuffer 31 | type: bypass 32 | - factors: N=4 M=1 C=4 P=1 Q=14 R=1 S=1 33 | permutation: SCQRMPN 34 | target: MainMemory 35 | type: temporal 36 | - bypass: [] 37 | keep: 38 | - Inputs 39 | - Weights 40 | - Outputs 41 | target: MainMemory 42 | type: bypass 43 | -------------------------------------------------------------------------------- /src/report/problem.yaml: -------------------------------------------------------------------------------- 1 | problem: 2 | instance: 3 | C: 144 4 | M: 1 5 | N: 16 6 | P: 56 7 | Q: 56 8 | R: 3 9 | S: 3 10 | shape: 11 | data-spaces: 12 | - name: Weights 13 | projection: 14 | - - - C 15 | - - - R 16 | - - - S 17 | - - - M 18 | - name: Inputs 19 | projection: 20 | - - - C 21 | - - - N 22 | - - - S 23 | - - Q 24 | - - - R 25 | - - P 26 | - name: Outputs 27 | projection: 28 | - - - N 29 | - - - P 30 | - - - Q 31 | - - - M 32 | read-write: true 33 | dimensions: 34 | - N 35 | - C 36 | - P 37 | - Q 38 | - R 39 | - S 40 | - M 41 | name: Conv2D 42 | -------------------------------------------------------------------------------- /src/report/sparse.yaml: -------------------------------------------------------------------------------- 1 | sparse_optimizations: 2 | targets: 3 | - action-optimization: 4 | - options: 5 | - condition-on: 6 | - Weights 7 | - Outputs 8 | target: Inputs 9 | type: skipping 10 | - options: 11 | - condition-on: 12 | - Inputs 13 | - Outputs 14 | target: Weights 15 | type: skipping 16 | - options: 17 | - condition-on: 18 | - Weights 19 | - Inputs 20 | target: Outputs 21 | type: skipping 22 | name: MainMemory 23 | representation-format: 24 | data-spaces: 25 | - name: Weights 26 | rank-application-order: inner-to-outer 27 | ranks: 28 | - format: UOP 29 | - format: UOP 30 | - format: UOP 31 | - format: UOP 32 | - format: UOP 33 | - format: UOP 34 | - format: UOP 35 | - format: UOP 36 | - format: UOP 37 | - format: UOP 38 | - format: UOP 39 | - format: UOP 40 | - format: UOP 41 | - format: UOP 42 | - format: UOP 43 | - format: UOP 44 | - format: UOP 45 | - format: UOP 46 | - format: UOP 47 | - format: UOP 48 | - format: UOP 49 | - format: UOP 50 | - format: UOP 51 | - format: UOP 52 | - format: UOP 53 | - format: UOP 54 | - format: UOP 55 | - format: UOP 56 | - format: UOP 57 | - format: UOP 58 | - format: UOP 59 | - format: UOP 60 | - format: UOP 61 | - format: UOP 62 | - format: UOP 63 | - format: UOP 64 | - format: UOP 65 | - format: UOP 66 | - format: UOP 67 | - format: UOP 68 | - format: UOP 69 | - format: UOP 70 | - format: UOP 71 | - format: UOP 72 | - format: UOP 73 | - format: CP 74 | - name: Inputs 75 | rank-application-order: inner-to-outer 76 | ranks: 77 | - format: UOP 78 | - format: UOP 79 | - format: UOP 80 | - format: UOP 81 | - format: UOP 82 | - format: UOP 83 | - format: UOP 84 | - format: UOP 85 | - format: UOP 86 | - format: UOP 87 | - format: UOP 88 | - format: UOP 89 | - format: UOP 90 | - format: UOP 91 | - format: UOP 92 | - format: UOP 93 | - format: UOP 94 | - format: UOP 95 | - format: UOP 96 | - format: UOP 97 | - format: UOP 98 | - format: UOP 99 | - format: UOP 100 | - format: UOP 101 | - format: UOP 102 | - format: UOP 103 | - format: UOP 104 | - format: UOP 105 | - format: UOP 106 | - format: UOP 107 | - format: UOP 108 | - format: UOP 109 | - format: UOP 110 | - format: UOP 111 | - format: UOP 112 | - format: UOP 113 | - format: UOP 114 | - format: UOP 115 | - format: UOP 116 | - format: UOP 117 | - format: UOP 118 | - format: UOP 119 | - format: UOP 120 | - format: UOP 121 | - format: UOP 122 | - format: UOP 123 | - format: UOP 124 | - format: UOP 125 | - format: UOP 126 | - format: CP 127 | - name: Outputs 128 | rank-application-order: inner-to-outer 129 | ranks: 130 | - format: UOP 131 | - format: UOP 132 | - format: UOP 133 | - format: UOP 134 | - format: UOP 135 | - format: UOP 136 | - format: UOP 137 | - format: UOP 138 | - format: UOP 139 | - format: UOP 140 | - format: UOP 141 | - format: UOP 142 | - format: UOP 143 | - format: UOP 144 | - format: UOP 145 | - format: UOP 146 | - format: UOP 147 | - format: UOP 148 | - format: UOP 149 | - format: UOP 150 | - format: UOP 151 | - format: UOP 152 | - format: UOP 153 | - format: UOP 154 | - format: UOP 155 | - format: UOP 156 | - format: UOP 157 | - format: UOP 158 | - format: UOP 159 | - format: UOP 160 | - format: UOP 161 | - format: UOP 162 | - format: UOP 163 | - format: UOP 164 | - format: UOP 165 | - format: UOP 166 | - format: UOP 167 | - format: UOP 168 | - format: UOP 169 | - format: UOP 170 | - format: CP 171 | - action-optimization: 172 | - options: 173 | - condition-on: 174 | - Weights 175 | - Outputs 176 | target: Inputs 177 | type: skipping 178 | - options: 179 | - condition-on: 180 | - Inputs 181 | - Outputs 182 | target: Weights 183 | type: skipping 184 | - options: 185 | - condition-on: 186 | - Weights 187 | - Inputs 188 | target: Outputs 189 | type: skipping 190 | name: GlobalBuffer 191 | representation-format: 192 | data-spaces: 193 | - name: Weights 194 | rank-application-order: inner-to-outer 195 | ranks: 196 | - format: UOP 197 | - format: UOP 198 | - format: UOP 199 | - format: UOP 200 | - format: UOP 201 | - format: UOP 202 | - format: UOP 203 | - format: UOP 204 | - format: UOP 205 | - format: UOP 206 | - format: UOP 207 | - format: UOP 208 | - format: UOP 209 | - format: UOP 210 | - format: UOP 211 | - format: UOP 212 | - format: UOP 213 | - format: UOP 214 | - format: UOP 215 | - format: UOP 216 | - format: UOP 217 | - format: UOP 218 | - format: UOP 219 | - format: UOP 220 | - format: UOP 221 | - format: UOP 222 | - format: UOP 223 | - format: UOP 224 | - format: UOP 225 | - format: UOP 226 | - format: UOP 227 | - format: UOP 228 | - format: UOP 229 | - format: UOP 230 | - format: UOP 231 | - format: UOP 232 | - format: UOP 233 | - format: UOP 234 | - format: UOP 235 | - format: CP 236 | - name: Inputs 237 | rank-application-order: inner-to-outer 238 | ranks: 239 | - format: UOP 240 | - format: UOP 241 | - format: UOP 242 | - format: UOP 243 | - format: UOP 244 | - format: UOP 245 | - format: UOP 246 | - format: UOP 247 | - format: UOP 248 | - format: UOP 249 | - format: UOP 250 | - format: UOP 251 | - format: UOP 252 | - format: UOP 253 | - format: UOP 254 | - format: UOP 255 | - format: UOP 256 | - format: UOP 257 | - format: UOP 258 | - format: UOP 259 | - format: UOP 260 | - format: UOP 261 | - format: UOP 262 | - format: UOP 263 | - format: UOP 264 | - format: UOP 265 | - format: UOP 266 | - format: UOP 267 | - format: UOP 268 | - format: UOP 269 | - format: UOP 270 | - format: UOP 271 | - format: UOP 272 | - format: UOP 273 | - format: UOP 274 | - format: UOP 275 | - format: UOP 276 | - format: UOP 277 | - format: UOP 278 | - format: UOP 279 | - format: UOP 280 | - format: CP 281 | - name: Outputs 282 | rank-application-order: inner-to-outer 283 | ranks: 284 | - format: UOP 285 | - format: UOP 286 | - format: UOP 287 | - format: UOP 288 | - format: UOP 289 | - format: UOP 290 | - format: UOP 291 | - format: UOP 292 | - format: UOP 293 | - format: UOP 294 | - format: UOP 295 | - format: UOP 296 | - format: UOP 297 | - format: UOP 298 | - format: UOP 299 | - format: UOP 300 | - format: UOP 301 | - format: UOP 302 | - format: UOP 303 | - format: UOP 304 | - format: UOP 305 | - format: UOP 306 | - format: UOP 307 | - format: UOP 308 | - format: UOP 309 | - format: UOP 310 | - format: UOP 311 | - format: UOP 312 | - format: UOP 313 | - format: UOP 314 | - format: UOP 315 | - format: UOP 316 | - format: UOP 317 | - format: UOP 318 | - format: UOP 319 | - format: UOP 320 | - format: UOP 321 | - format: CP 322 | - action-optimization: 323 | - options: 324 | - condition-on: 325 | - Weights 326 | - Outputs 327 | target: Inputs 328 | type: skipping 329 | - options: 330 | - condition-on: 331 | - Inputs 332 | - Outputs 333 | target: Weights 334 | type: skipping 335 | - options: 336 | - condition-on: 337 | - Weights 338 | - Inputs 339 | target: Outputs 340 | type: skipping 341 | name: RegisterFile 342 | representation-format: 343 | data-spaces: 344 | - name: Weights 345 | rank-application-order: inner-to-outer 346 | ranks: 347 | - format: UOP 348 | - format: UOP 349 | - format: UOP 350 | - format: UOP 351 | - format: UOP 352 | - format: UOP 353 | - format: UOP 354 | - format: UOP 355 | - format: UOP 356 | - format: UOP 357 | - format: UOP 358 | - format: UOP 359 | - format: UOP 360 | - format: UOP 361 | - format: UOP 362 | - format: UOP 363 | - format: UOP 364 | - format: UOP 365 | - format: UOP 366 | - format: UOP 367 | - format: UOP 368 | - format: UOP 369 | - format: UOP 370 | - format: UOP 371 | - format: UOP 372 | - format: UOP 373 | - format: UOP 374 | - format: UOP 375 | - format: UOP 376 | - format: UOP 377 | - format: UOP 378 | - format: UOP 379 | - format: UOP 380 | - format: UOP 381 | - format: CP 382 | - name: Inputs 383 | rank-application-order: inner-to-outer 384 | ranks: 385 | - format: UOP 386 | - format: UOP 387 | - format: UOP 388 | - format: UOP 389 | - format: UOP 390 | - format: UOP 391 | - format: UOP 392 | - format: UOP 393 | - format: UOP 394 | - format: UOP 395 | - format: UOP 396 | - format: UOP 397 | - format: UOP 398 | - format: UOP 399 | - format: UOP 400 | - format: UOP 401 | - format: UOP 402 | - format: UOP 403 | - format: UOP 404 | - format: UOP 405 | - format: UOP 406 | - format: UOP 407 | - format: UOP 408 | - format: UOP 409 | - format: UOP 410 | - format: UOP 411 | - format: UOP 412 | - format: UOP 413 | - format: UOP 414 | - format: UOP 415 | - format: UOP 416 | - format: UOP 417 | - format: UOP 418 | - format: UOP 419 | - format: CP 420 | - name: Outputs 421 | rank-application-order: inner-to-outer 422 | ranks: 423 | - format: UOP 424 | - format: UOP 425 | - format: UOP 426 | - format: UOP 427 | - format: UOP 428 | - format: UOP 429 | - format: UOP 430 | - format: UOP 431 | - format: UOP 432 | - format: UOP 433 | - format: UOP 434 | - format: UOP 435 | - format: UOP 436 | - format: UOP 437 | - format: UOP 438 | - format: UOP 439 | - format: UOP 440 | - format: UOP 441 | - format: UOP 442 | - format: UOP 443 | - format: UOP 444 | - format: UOP 445 | - format: UOP 446 | - format: UOP 447 | - format: UOP 448 | - format: UOP 449 | - format: UOP 450 | - format: UOP 451 | - format: UOP 452 | - format: UOP 453 | - format: UOP 454 | - format: UOP 455 | - format: UOP 456 | - format: UOP 457 | - format: CP 458 | - compute-optimization: 459 | - type: skipping 460 | name: MACC 461 | -------------------------------------------------------------------------------- /src/timeloop_env.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import numpy as np 4 | import yaml 5 | import os, sys 6 | import copy 7 | from subprocess import Popen, PIPE, call 8 | from parse_timeloop_output import parse_timeloop_stats 9 | from pytimeloop.app import Model 10 | from pytimeloop import ConfigDict 11 | from utils import * 12 | import re 13 | class TimeloopEnv(object): 14 | def __init__(self, config_path='./out_config', in_config_dir= './in_config', debug=False, use_sparse=False, density=None): 15 | 16 | self.config_path = config_path 17 | self.use_sparse = use_sparse 18 | with open(os.path.join(in_config_dir, 'arch.yaml'), 'r') as fd: 19 | self.arch = yaml.load(fd, Loader = yaml.SafeLoader) 20 | with open(os.path.join(in_config_dir, 'problem.yaml'), 'r') as fd: 21 | self.problem = yaml.load(fd,Loader = yaml.SafeLoader) 22 | if self.use_sparse: 23 | with open(os.path.join(in_config_dir, 'sparse.yaml'), 'r') as fd: 24 | self.sparse = yaml.load(fd,Loader = yaml.SafeLoader) 25 | 26 | buffer_name_list, buffer_size_list, buffer_spmap_cstr, user_specified_spmaps, num_buffer_levels, num_pes = self.get_buffer_info() 27 | self.buffer_name_list = buffer_name_list 28 | self.buffer_size_list = buffer_size_list 29 | self.buffer_spmap_cstr = buffer_spmap_cstr 30 | self.user_specified_spmaps = user_specified_spmaps 31 | self.buffers_with_spmap = set([key for key, value in self.buffer_spmap_cstr.items() if value > 1]) 32 | self.num_buffer_level = num_buffer_levels 33 | self.num_pes = num_pes 34 | self._executable = 'timeloop-model' 35 | self.debug = debug 36 | self.buf_energy_cost = self.get_default_buffer_energy_cost() 37 | self.density = density 38 | 39 | def get_default_buffer_energy_cost(self): 40 | buf_energy_cost = {'DRAM': 200, 41 | 'l2': 2.2, 42 | 'l1': 1.12, 43 | 'MAC': 1.0, 44 | } 45 | return buf_energy_cost 46 | 47 | def get_num_buffer_levels(self): 48 | return self.num_buffer_level 49 | 50 | def get_buffer_spmap_cstr(self): 51 | return self.buffer_spmap_cstr 52 | 53 | def get_buffers_with_spmap(self): 54 | return self.buffers_with_spmap 55 | 56 | 57 | def get_problem_info(self): 58 | dim_note = 'NKCYXRS' 59 | problem = copy.deepcopy(self.problem) 60 | dimension = [] 61 | dimension_dicts = {} 62 | for key in dim_note: 63 | value = problem['problem']['instance'][self.get_timeloop_notation(key)] 64 | dimension.append(value) 65 | dimension_dicts[key] = value 66 | return dimension, dimension_dicts 67 | 68 | def get_buffer_info(self): 69 | arch = copy.deepcopy(self.arch) 70 | num_instances = [] 71 | buffer_name_list = [] 72 | buffer_size_list = [] 73 | num_buffer_levels = 0 74 | user_specified_spmaps = [] 75 | arch = arch['architecture'] 76 | num_pe = 0 77 | while 1: 78 | try: 79 | user_specified_spmap = False 80 | instances = 1 81 | arch = arch['subtree'][0] 82 | attrubutes = arch['local'][0]['attributes'] 83 | depth = attrubutes['depth'] if 'depth' in attrubutes else float('Inf') 84 | word_bits = attrubutes['word-bits'] if 'word-bits' in attrubutes else 8 85 | width = attrubutes['width'] if 'width' in attrubutes else 8 86 | buffer_size = depth * width / word_bits 87 | buffer_name = arch['local'][0]['name'] 88 | macc = arch['local'][1]['name'] if len(arch['local'])>1 else None 89 | re_ret = re.search('.*\[', buffer_name) 90 | if re_ret: 91 | instances = int(buffer_name.split('..')[1].split(']')[0]) + 1 92 | buffer_name = re_ret.group(0)[:-1] 93 | user_specified_spmap = True 94 | buffer_name_list.append(buffer_name) 95 | buffer_size_list.append(buffer_size) 96 | num_instances.append(instances) 97 | user_specified_spmaps.append(user_specified_spmap) 98 | num_buffer_levels += 1 99 | except: 100 | instances = int(macc.split('..')[1].split(']')[0]) + 1 101 | num_pe = instances 102 | num_instances.append(instances) 103 | break 104 | sp_cstr = [] 105 | for i in range(len(num_instances)-1): 106 | allowed_sp_size = num_instances[i+1]//num_instances[i] 107 | sp_cstr.append(allowed_sp_size) 108 | if num_instances[i+1] % num_instances[i] !=0: 109 | raise ValueError('Invalid Architecture File. ' 110 | 'Buffer hierarchy not perfectly divisible.') 111 | user_specified_spmaps.pop(0) 112 | user_specified_spmaps.append(False) 113 | return {f'l{level}': name for level, name in zip(np.arange(num_buffer_levels, 0, -1), buffer_name_list)}, \ 114 | {f'l{level}': name for level, name in zip(np.arange(num_buffer_levels, 0, -1), buffer_size_list)}, \ 115 | {f'l{level}': name for level, name in zip(np.arange(num_buffer_levels, 0, -1), sp_cstr)}, \ 116 | set([f'l{level}' for level, user_sp in zip(np.arange(num_buffer_levels, 0, -1), user_specified_spmaps) if user_sp]), \ 117 | num_buffer_levels, \ 118 | num_pe 119 | 120 | def get_timeloop_notation(self, g): 121 | timeloop_dict = {'N': 'N', 'K': 'M', 'C': 'C', 'Y': 'P', 'X': 'Q', 'R': 'R', 'S': 'S'} 122 | return timeloop_dict[g] 123 | 124 | def get_gamma_notation(self, t): 125 | gamma_dict = {'N': 'N','M': 'K','C': 'C','P': 'Y','Q': 'X','R': 'R','S': 'S'} 126 | return gamma_dict[t] 127 | 128 | def get_dimension_dict(self, dim_value): 129 | dim_note = 'NKCYXRS' 130 | return {note: value for note, value in zip(dim_note, dim_value)} 131 | 132 | def init_tp_tile_size(self): 133 | series = [f'{self.get_timeloop_notation(note)}={1}' for note in 'NKCYXRS'] 134 | return ' '.join(series) 135 | 136 | def get_tp_tile_size(self, dim_value): 137 | series = [f'{self.get_timeloop_notation(note)}={value}' for note, value in dim_value.items()] 138 | return ' '.join(series) 139 | 140 | def get_tp_sp_tile_size(self, dim_value, sp_dim, timeloop_notation=True): 141 | if timeloop_notation: 142 | temporal_series = [f'{self.get_timeloop_notation(note)}={value if note not in sp_dim else 1}' for note, value in dim_value.items()] 143 | spatial_series = [f'{self.get_timeloop_notation(note)}={value if note in sp_dim else 1}' for note, value in dim_value.items()] 144 | return ' '.join(temporal_series), ' '.join(spatial_series) 145 | else: 146 | temporal_series = [dim_value[note] if note not in sp_dim else 1 for note in 'NKCYXRS'] 147 | spatial_series = [dim_value[note] if note in sp_dim else 1 for note in 'NKCYXRS'] 148 | return np.array(temporal_series), np.array(spatial_series) 149 | 150 | def get_loop_order(self, loop_order): 151 | series = [self.get_timeloop_notation(g) for g in loop_order] 152 | return ''.join(series) 153 | 154 | def get_implicit_l3_tile_size(self, dim_value, l2_tile_size, l1_tile_size): 155 | l3_tile_size = [int(d/(l2*l1)) for d, l2, l1 in zip(dim_value, l2_tile_size, l1_tile_size)] 156 | l3_tile_size_mode = [d%(l2*l1) for d, l2, l1 in zip(dim_value, l2_tile_size, l1_tile_size)] 157 | if np.sum(l3_tile_size_mode) == 0: 158 | return l3_tile_size 159 | else: 160 | print('Tile size not divisible') 161 | return None 162 | 163 | 164 | def create_pool_env(self, num_pools, dimension, indv, use_IO=False): 165 | os.makedirs(self.config_path, exist_ok=True) 166 | if use_IO: 167 | arch_paths, problem_paths, map_paths, sparse_paths, pool_paths = [], [], [], [], [] 168 | for i in range(num_pools): 169 | pool_dir = os.path.join(self.config_path, f'pool-{i}') 170 | os.makedirs(pool_dir, exist_ok=True) 171 | pool_paths.append(pool_dir) 172 | arch_paths.append(os.path.abspath(os.path.join(pool_dir, 'arch.yaml'))) 173 | problem_paths.append(os.path.abspath(os.path.join(pool_dir, 'problem.yaml'))) 174 | map_paths.append(os.path.abspath(os.path.join(pool_dir, 'map.yaml'))) 175 | sparse_paths.append(os.path.abspath(os.path.join(pool_dir, 'sparse.yaml'))) 176 | self.arch_path, self.problem_path, self.map_path, self.sparse_path, self.pool_path = arch_paths, problem_paths, map_paths, sparse_paths, pool_paths 177 | else: 178 | arch, problem, map = self.get_configs(dimension, indv) 179 | cfg = {} 180 | cfg.update(arch) 181 | cfg.update(map) 182 | cfg.update(problem) 183 | if self.use_sparse: 184 | cfg.update(self.sparse) 185 | # cfg.update({'sparse_optimizations': self.sparse}) 186 | config = ConfigDict(cfg) 187 | with stdout_redirected(): 188 | timeloop_app = Model(config, self.config_path) 189 | with open(os.path.join(self.config_path, 'timeloop-model.ART.yaml'), 'r') as fd: 190 | art = yaml.load(fd, Loader = yaml.SafeLoader) 191 | with open(os.path.join(self.config_path, 'timeloop-model.ERT.yaml'), 'r') as fd: 192 | ert = yaml.load(fd, Loader = yaml.SafeLoader) 193 | cfg.update(art) 194 | cfg.update(ert) 195 | self.art = art 196 | self.ert = ert 197 | self.shared_cfg = cfg 198 | 199 | def get_arch_configs(self, l2_size, l1_size, num_pes): 200 | arch = copy.deepcopy(self.arch) 201 | arch['architecture']['subtree'][0]['subtree'][0]['local'][0]['attributes']['depth'] = l2_size 202 | arch['architecture']['subtree'][0]['subtree'][0]['subtree'][0]['local'][0]['name']=f'RegisterFile[0..{num_pes}]' 203 | arch['architecture']['subtree'][0]['subtree'][0]['subtree'][0]['local'][0]['attributes']['depth'] = l1_size 204 | arch['architecture']['subtree'][0]['subtree'][0]['subtree'][0]['local'][1]['name']=f'MACC[0..{num_pes}]' 205 | return arch 206 | 207 | 208 | 209 | def get_problem_configs(self, dimension): 210 | problem = copy.deepcopy(self.problem) 211 | dimension_dict = self.get_dimension_dict(dimension) 212 | for key, value in dimension_dict.items(): 213 | problem['problem']['instance'][self.get_timeloop_notation(key)] = value 214 | if self.use_sparse: 215 | problem['problem']['instance']['densities'] = {} 216 | for key in ['Inputs', 'Weights', 'Outputs']: 217 | cur_density = self.density[key] 218 | if cur_density < 1: 219 | problem['problem']['instance']['densities'][key] = {} 220 | problem['problem']['instance']['densities'][key]['distribution'] = 'fixed-structured' 221 | # problem['problem']['instance']['densities'][key]['distribution'] = 'hypergeometric' 222 | problem['problem']['instance']['densities'][key]['density'] = cur_density 223 | return problem 224 | 225 | def get_prod(self, dicts): 226 | ret_value = 1 227 | for k, v in dicts.items(): 228 | ret_value *= ((int(k))**v) 229 | return ret_value 230 | 231 | def get_bypass(self, bypass): 232 | to_pass = [k for k, v in bypass.items() if v] 233 | to_keep = [k for k, v in bypass.items() if not v] 234 | return to_pass, to_keep 235 | 236 | def get_input_weight_output_tile(self, tiles): 237 | N, K, C, Y, X, R, S = tiles 238 | input_tile, weight_tile, output_tile = N*(Y+R-1)*(X+S-1)*C, K*R*S*C, Y*X*K*N 239 | return input_tile, weight_tile, output_tile 240 | 241 | def get_ideal_perf(self, dimension): 242 | N, K, C, Y, X, R, S = dimension 243 | input_size, weight_size, output_size = [N*Y*X*C, R*S*C*K, N*Y*X*K] # Input, weight, output 244 | num_flops = N*R*S*C*Y*X*K 245 | energys = {} 246 | for level in range(1, self.num_buffer_level+1): 247 | if level == 1: 248 | buf_energy_cost = self.buf_energy_cost['l1'] 249 | elif level == self.num_buffer_level: 250 | buf_energy_cost = self.buf_energy_cost['DRAM'] 251 | else: 252 | buf_energy_cost = self.buf_energy_cost['l2'] 253 | energys[f'l{level}-Inputs'] = input_size * buf_energy_cost 254 | energys[f'l{level}-Weights'] = weight_size * buf_energy_cost 255 | energys[f'l{level}-Outputs'] = output_size * buf_energy_cost 256 | energys['compute'] = num_flops * self.buf_energy_cost['MAC'] 257 | energy = sum(e for e in energys.values()) * 1e-6 # energy_uJ 258 | # cycles = num_flops/self.num_pes 259 | cycles = num_flops/(self.num_pes-1) 260 | edp = cycles * energy 261 | return edp, cycles, energy 262 | 263 | 264 | def check_tile_fit_buffer(self, indv): 265 | len_dim = len('NKCYXRS') 266 | tile_prods = {} 267 | tile_prod = np.ones((len_dim,)) 268 | for level in range(1, self.num_buffer_level): 269 | tile_sizes = {dim_note:self.get_prod(values) for dim_note, values in indv[f'l{level}']['tile_size'].items()} 270 | par_dims = indv[f'l{level}']['par_dims'] 271 | tp_tile_sizes, sp_tile_sizes = self.get_tp_sp_tile_size(tile_sizes, par_dims, timeloop_notation=False) 272 | tile_prod = (tile_prod * tp_tile_sizes * sp_tile_sizes) 273 | tile_prods[f'l{level}'] = tile_prod 274 | for level in range(1, self.num_buffer_level): 275 | input_tile, weight_tile, output_tile = self.get_input_weight_output_tile(tile_prods[f'l{level}']) 276 | total_tile = 0 277 | total_tile += input_tile if indv[f'l{level}']['bypass']['Inputs'] is False else 0 278 | total_tile += weight_tile if indv[f'l{level}']['bypass']['Weights'] is False else 0 279 | total_tile += output_tile if indv[f'l{level}']['bypass']['Outputs'] is False else 0 280 | if total_tile > self.buffer_size_list[f'l{level}']: 281 | return False 282 | return True 283 | 284 | def get_tile_buf_size(self, indv): 285 | len_dim = len('NKCYXRS') 286 | tile_prods = {} 287 | tile_prod = np.ones((len_dim,)) 288 | for level in range(1, self.num_buffer_level+1): 289 | tile_sizes = {dim_note:self.get_prod(values) for dim_note, values in indv[f'l{level}']['tile_size'].items()} 290 | par_dims = indv[f'l{level}']['par_dims'] 291 | tp_tile_sizes, sp_tile_sizes = self.get_tp_sp_tile_size(tile_sizes, par_dims, timeloop_notation=False) 292 | tile_prod = (tile_prod * tp_tile_sizes * sp_tile_sizes) 293 | tile_prods[f'l{level}'] = tile_prod 294 | ret = {} 295 | for level in range(1, self.num_buffer_level+1): 296 | input_tile, weight_tile, output_tile = self.get_input_weight_output_tile(tile_prods[f'l{level}']) 297 | total_tile = input_tile + weight_tile + output_tile 298 | ret[f'l{level}'] = {'Inputs': input_tile, 299 | 'Weights': weight_tile, 300 | 'Outputs':output_tile, 301 | 'Total':total_tile} 302 | # total_tile = 0 303 | # total_tile += input_tile if indv[f'l{level}']['bypass']['Inputs'] is False else 0 304 | # total_tile += weight_tile if indv[f'l{level}']['bypass']['Weights'] is False else 0 305 | # total_tile += output_tile if indv[f'l{level}']['bypass']['Outputs'] is False else 0 306 | # # total_tile = input_tile + weight_tile + output_tile 307 | # # print(f'Level-{level}: {total_tile}, {self.buffer_size_list[f"l{level}"]}') 308 | # ret[f'l{level}'] = total_tile 309 | return ret 310 | 311 | 312 | def check_tile_fit_buffer_temp(self, indv): 313 | 314 | len_dim = len('NKCYXRS') 315 | tile_prods = {} 316 | tile_prod = np.ones((len_dim,)) 317 | for level in range(1, self.num_buffer_level+1): 318 | tile_sizes = {dim_note:self.get_prod(values) for dim_note, values in indv[f'l{level}']['tile_size'].items()} 319 | par_dims = indv[f'l{level}']['par_dims'] 320 | tp_tile_sizes, sp_tile_sizes = self.get_tp_sp_tile_size(tile_sizes, par_dims, timeloop_notation=False) 321 | tile_prod = (tile_prod * tp_tile_sizes * sp_tile_sizes) 322 | tile_prods[f'l{level}'] = tile_prod 323 | ret = {} 324 | for level in range(1, self.num_buffer_level+1): 325 | input_tile, weight_tile, output_tile = self.get_input_weight_output_tile(tile_prods[f'l{level}']) 326 | total_tile = 0 327 | total_tile += input_tile if indv[f'l{level}']['bypass']['Inputs'] is False else 0 328 | total_tile += weight_tile if indv[f'l{level}']['bypass']['Weights'] is False else 0 329 | total_tile += output_tile if indv[f'l{level}']['bypass']['Outputs'] is False else 0 330 | # total_tile = input_tile + weight_tile + output_tile 331 | # print(f'Level-{level}: {total_tile}, {self.buffer_size_list[f"l{level}"]}') 332 | ret[f'l{level}'] = total_tile 333 | return ret 334 | 335 | 336 | def get_map_config(self, indv): 337 | mapping = [] 338 | for level in range(1, self.num_buffer_level+1): 339 | target = self.buffer_name_list[f'l{level}'] 340 | permutation = self.get_loop_order(indv[f'l{level}']['loop_order']) 341 | tile_sizes = {dim_note:self.get_prod(values) for dim_note, values in indv[f'l{level}']['tile_size'].items()} 342 | par_dims = indv[f'l{level}']['par_dims'] 343 | bypass = indv[f'l{level}']['bypass'] 344 | to_pass, to_keep = self.get_bypass(bypass) 345 | bypass_map = {'target': target, 346 | 'type': 'bypass', 347 | 'keep': to_keep, 348 | 'bypass': to_pass 349 | } 350 | # if 1c, axis=1)) and np.all(np.any(costs[i+1:]>c, axis=1)) 105 | return is_efficient 106 | 107 | 108 | # Fairly fast for many datapoints, less fast for many costs, somewhat readable 109 | def is_pareto_efficient_simple(costs): 110 | """ 111 | Find the pareto-efficient points 112 | :param costs: An (n_points, n_costs) array 113 | :return: A (n_points, ) boolean array, indicating whether each point is Pareto efficient 114 | """ 115 | is_efficient = np.ones(costs.shape[0], dtype = bool) 116 | for i, c in enumerate(costs): 117 | if is_efficient[i]: 118 | is_efficient[is_efficient] = np.any(costs[is_efficient]fitnesses[next_point_index], axis=1) 138 | nondominated_point_mask[next_point_index] = True 139 | is_efficient = is_efficient[nondominated_point_mask] # Remove dominated points 140 | fitnesses = fitnesses[nondominated_point_mask] 141 | next_point_index = np.sum(nondominated_point_mask[:next_point_index])+1 142 | if return_mask: 143 | is_efficient_mask = np.zeros(n_points, dtype = bool) 144 | is_efficient_mask[is_efficient] = True 145 | return is_efficient_mask, len(is_efficient) 146 | else: 147 | return is_efficient --------------------------------------------------------------------------------