├── docs ├── preview.adoc ├── index.rst ├── docs │ └── plugins │ │ └── instr_groups.rst ├── conf.py ├── adding_plugins.rst ├── intro.rst └── plugins.rst ├── riscv_application_profiler ├── __init__.py ├── requirements.txt ├── isac_port.py ├── dasm.sh ├── verif.py ├── main.py ├── utils.py ├── plugins │ ├── csr_compute.py │ ├── pattern.py │ ├── dependency.py │ ├── store_load_bypass.py │ ├── register_compute.py │ ├── jumps_ops.py │ ├── instr_groups.py │ └── branch_ops.py ├── profiler.py └── consts.py ├── .gitignore ├── setup.cfg ├── Makefile ├── CHANGELOG.md ├── make.bat ├── .readthedocs.yaml ├── sample_configs ├── cycle_accurate │ ├── config.yaml │ └── L2_configs │ │ └── config.yaml └── profiler_config │ ├── config.yaml │ └── L2_configs │ └── config.yaml ├── LICENSE ├── setup.py ├── CONTRIBUTING.md ├── README.md └── CODE_OF_CONDUCT.md /docs/preview.adoc: -------------------------------------------------------------------------------- 1 | RISC-V Application Profiler 2 | =========================== 3 | 4 | This is the preface for the docs in rap. -------------------------------------------------------------------------------- /riscv_application_profiler/__init__.py: -------------------------------------------------------------------------------- 1 | '''Top Level Package for riscv_application_profiler''' 2 | 3 | __version__ = '1.0.0' 4 | -------------------------------------------------------------------------------- /riscv_application_profiler/requirements.txt: -------------------------------------------------------------------------------- 1 | click>=7.0.0 2 | riscv_isac==0.18.1 3 | pathlib 4 | riscv_config 5 | pycachesim==0.3.1 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Pycache files 2 | *.pyc 3 | 4 | # package information 5 | *.egg-info/ 6 | dist/ 7 | .python-version 8 | 9 | # build files 10 | build/ 11 | 12 | # riscv opcodes 13 | rvop_decoder 14 | 15 | #files from isac build 16 | constants.py 17 | rvopcodesdecoder.py 18 | 19 | #cubic larger than 100MB 20 | tests/cubic -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 1.0.0 3 | commit = True 4 | tag = True 5 | 6 | [bumpversion:file:setup.py] 7 | search = version='{current_version}' 8 | replace = version='{new_version}' 9 | 10 | [bumpversion:file:riscv_application_profiler/__init__.py] 11 | search = __version__ = '{current_version}' 12 | replace = __version__ = '{new_version}' 13 | 14 | [bdist_wheel] 15 | universal = 1 16 | 17 | [flake8] 18 | exclude = docs 19 | 20 | [aliases] 21 | -------------------------------------------------------------------------------- /riscv_application_profiler/isac_port.py: -------------------------------------------------------------------------------- 1 | # See LICENSE for licensing information. 2 | 3 | # this file is a porting mechanism for using riscv-isac in riscv-application-profiler 4 | 5 | from git import Repo 6 | import os 7 | import shutil 8 | import sys 9 | 10 | def isac_setup_routine(): 11 | ''' 12 | Sets up the riscv-isac environment. 13 | ''' 14 | if not os.path.exists('rvop_decoder'): 15 | os.system('riscv_isac setup') 16 | sys.path.append(os.path.join(os.getcwd(), 'rvop_decoder')) 17 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. RISC-V Application Profiler documentation master file, created by 2 | sphinx-quickstart on Wed Sep 13 16:33:50 2023. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to RISC-V Application Profiler's documentation! 7 | ======================================================= 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Contents: 12 | 13 | intro 14 | preview 15 | adding_plugins 16 | plugins 17 | 18 | 19 | -------------------------------------------------------------------------------- /riscv_application_profiler/dasm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cat > .original 4 | cat .original | awk '{match($0, /0x[0-9a-fA-F]+/); print substr($0, RSTART, RLENGTH)}' > .original.tmp.swp ; 5 | cat .original | awk -F'[()]' '{print "DASM(" $2 ")"}' | spike-dasm > .dasm.tmp.swp ; 6 | exec 3<.original.tmp.swp 7 | exec 4<.dasm.tmp.swp 8 | echo "" > .merged.tmp.swp 9 | while read -r line1 <&3 && read -r line2 <&4; do 10 | echo "$line1 :: $line2" >> .merged.tmp.swp 11 | done 12 | exec 3<&- 13 | exec 4<&- 14 | cat .merged.tmp.swp > $1 15 | rm -f .original 16 | rm -f .original.tmp.swp 17 | rm -f .dasm.tmp.swp 18 | rm -f .merged.tmp.swp -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/docs/plugins/instr_groups.rst: -------------------------------------------------------------------------------- 1 | Grouping Instructions 2 | ===================== 3 | 4 | We iterate through a list of all entries in the provided execution log and make a classify all the instructions into groups. The groups are defined by the following rules: 5 | 6 | * If the instruction is a branch, it is placed in a group of its own. 7 | * If the instruction is a memory instruction, it is placed in a group of its own. 8 | 9 | The remaining instructions are placed in groups based on the following rules: 10 | 11 | * If the instruction is a load instruction, it is placed in a group of its own. 12 | * If the instruction is a store instruction, it is placed in a group of its own. 13 | * If the instruction is a call instruction, it is placed in a group of its own. -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # CHANGELOG 2 | 3 | This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 4 | 5 | ## [1.0.1] - 2025-04-2025 6 | - minor fix 7 | 8 | ## [1.0.0] - 2023-09-13 9 | - Support including a config file 10 | - Changed store_load_bypass to track bytes 11 | - Improved build times 12 | - Added docs 13 | - Added a nsichneu from embench-iot to tests 14 | - Added macro op plugin 15 | 16 | ## [0.2.0] - 2023-07-25 17 | - Add cache plugins 18 | - Add register_compute plugins 19 | - Add jumps_ops plugins 20 | - Add register_compute plugins 21 | - Add store_load_bypass plugins 22 | - Add csr_compute plugins 23 | - Add data dependency plugins 24 | 25 | ## [0.1.0] - 2023-05-05 26 | - Add branch plugins 27 | - Add instruction groups plugin 28 | - use riscv_isac for decoding 29 | - use riscv_config for ISA validation 30 | 31 | ## [0.0.1] - 2023-03-08 32 | - Added sample log file. 33 | -------------------------------------------------------------------------------- /make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # For the full list of built-in configuration values, see the documentation: 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 5 | 6 | # -- Project information ----------------------------------------------------- 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 8 | 9 | project = 'RISC-V Application Profiler' 10 | copyright = '2023, Mahendra Vamshi A' 11 | author = 'Mahendra Vamshi A' 12 | release = '1.0.0' 13 | 14 | # -- General configuration --------------------------------------------------- 15 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 16 | 17 | extensions = [] 18 | 19 | templates_path = ['_templates'] 20 | exclude_patterns = [] 21 | 22 | 23 | 24 | # -- Options for HTML output ------------------------------------------------- 25 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 26 | 27 | html_theme = 'alabaster' 28 | html_static_path = ['_static'] 29 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file for Sphinx projects 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | # Required 5 | version: 2 6 | 7 | # Set the OS, Python version and other tools you might need 8 | build: 9 | os: ubuntu-22.04 10 | tools: 11 | python: "3.7" 12 | # You can also specify other tool versions: 13 | # nodejs: "20" 14 | # rust: "1.70" 15 | # golang: "1.20" 16 | 17 | # Build documentation in the "docs/" directory with Sphinx 18 | sphinx: 19 | configuration: docs/conf.py 20 | # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs 21 | # builder: "dirhtml" 22 | # Fail on all warnings to avoid broken references 23 | # fail_on_warning: true 24 | 25 | # Optionally build your docs in additional formats such as PDF and ePub 26 | # formats: 27 | # - pdf 28 | # - epub 29 | 30 | # Optional but recommended, declare the Python requirements required 31 | # to build your documentation 32 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html 33 | # python: 34 | # install: 35 | # - requirements: docs/requirements.txt -------------------------------------------------------------------------------- /sample_configs/cycle_accurate/config.yaml: -------------------------------------------------------------------------------- 1 | # YAML without l2 cache congifuration 2 | cycles: 3 | # start_cycles: 100 4 | instructions_cycles: 5 | {add: 6 | {latency : 1, 7 | throughput : 1}, 8 | sub: 9 | {latency : 1, 10 | throughput : 1}, 11 | mul: 12 | {latency : 1, 13 | throughput : 1}, 14 | div: 15 | {latency : 1, 16 | throughput : 1}, 17 | rem: 18 | {latency : 1, 19 | throughput : 1}, 20 | csr: 21 | {latency : 1, 22 | throughput : 1}, 23 | s(a-z)l*: 24 | {latency : 1, 25 | throughput : 1}, 26 | s(a-z)r*: 27 | {latency : 1, 28 | throughput : 1}, 29 | j: 30 | {latency : 1, 31 | throughput : 1}, 32 | c.: 33 | {latency : 1, 34 | throughput : 1} 35 | } 36 | flush_cycles: 37 | csr: 1 38 | branch: 0 39 | fence.i: 1 40 | reset_cycles: 69 41 | pipeline_depth: 2 42 | bus_latency: # this is inaccurate 43 | data: 8 44 | instruction: 9 45 | bypass_latency: 1 46 | structural_hazards: 47 | data_cache: 1 48 | bus: 8 # bus capacity in terms of words 49 | # replacemnt_latency: 257 50 | mem_latency: 51 | cacheable: 52 | data: 53 | hit: 1 54 | miss: 8 55 | instruction: 56 | hit: 1 57 | miss: 8 58 | non_cacheable: 59 | data: 60 | miss: 8 #bus_latency 61 | instruction: 62 | miss: 9 #bus_latency 63 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) <2023> . 2 | 3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 4 | 5 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 6 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 7 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 8 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /sample_configs/cycle_accurate/L2_configs/config.yaml: -------------------------------------------------------------------------------- 1 | # YAML with l2 cache congifuration 2 | cycles: 3 | # start_cycles: 100 4 | instructions_cycles: 5 | {add: 6 | {latency : 1, 7 | throughput : 1}, 8 | sub: 9 | {latency : 1, 10 | throughput : 1}, 11 | mul: 12 | {latency : 1, 13 | throughput : 1}, 14 | div: 15 | {latency : 1, 16 | throughput : 1}, 17 | rem: 18 | {latency : 1, 19 | throughput : 1}, 20 | csr: 21 | {latency : 1, 22 | throughput : 1}, 23 | s(a-z)l*: 24 | {latency : 1, 25 | throughput : 1}, 26 | s(a-z)r*: 27 | {latency : 1, 28 | throughput : 1}, 29 | j: 30 | {latency : 1, 31 | throughput : 1}, 32 | c.: 33 | {latency : 1, 34 | throughput : 1} 35 | } 36 | flush_cycles: 37 | csr: 1 38 | branch: 0 39 | fence.i: 1 40 | reset_cycles: 69 41 | pipeline_depth: 2 42 | bus_latency: # this is inaccurate 43 | data: 8 44 | instruction: 9 45 | bypass_latency: 1 46 | structural_hazards: 47 | data_cache: 1 48 | bus: 8 # bus capacity in terms of words 49 | # replacemnt_latency: 257 50 | mem_latency: 51 | cacheable: 52 | data: 53 | hit: 1 54 | miss: 1 55 | instruction: 56 | hit: 1 57 | miss: 1 58 | L2: 59 | hit: 1 60 | miss: 16 #bus_latency + (size of cache line in bytes /4) eg: 8 + (32/4) = 16 61 | non_cacheable: 62 | data: 63 | miss: 8 #bus_latency 64 | instruction: 65 | miss: 9 #bus_latency 66 | 67 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """The setup script.""" 2 | 3 | import os 4 | from setuptools import setup, find_packages 5 | import codecs 6 | 7 | # Base directory of package 8 | here = os.path.abspath(os.path.dirname(__file__)) 9 | 10 | 11 | def read(*parts): 12 | with codecs.open(os.path.join(here, *parts), 'r') as fp: 13 | return fp.read() 14 | def read_requires(): 15 | with open(os.path.join(here, "riscv_application_profiler/requirements.txt"), 16 | "r") as reqfile: 17 | return reqfile.read().splitlines() 18 | 19 | 20 | # Long Description 21 | with open("README.md", "r") as fh: 22 | readme = fh.read() 23 | 24 | setup_requirements = [] 25 | 26 | test_requirements = [] 27 | 28 | setup( 29 | name='riscv-application-profiler', 30 | version='1.0.0', 31 | description="RISC-V Application Profiler", 32 | long_description=readme + '\n\n', 33 | classifiers=[ 34 | "Programming Language :: Python :: 3.7", 35 | "License :: OSI Approved :: BSD License", 36 | "Development Status :: 4 - Beta" 37 | ], 38 | url='https://github.com/mahendraVamshi/riscv-application-profiler', 39 | author="PES University + InCore Semiconductors", 40 | author_email='', 41 | license="BSD-3-Clause", 42 | packages=find_packages(), 43 | package_dir={'riscv_application_profiler': 'riscv_application_profiler'}, 44 | package_data={'riscv_application_profiler': ['requirements.txt']}, 45 | install_requires=read_requires(), 46 | python_requires='>=3.7.0', 47 | entry_points={ 48 | 'console_scripts': ['riscv_application_profiler=riscv_application_profiler.main:cli'], 49 | }, 50 | include_package_data=True, 51 | keywords='riscv_application_profiler', 52 | tests_require=test_requirements, 53 | zip_safe=False, 54 | ) 55 | -------------------------------------------------------------------------------- /riscv_application_profiler/verif.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | from riscv_application_profiler.consts import * 3 | import riscv_application_profiler.consts as consts 4 | from riscv_application_profiler.utils import Utilities 5 | import re 6 | import os 7 | 8 | def verify(check): 9 | count = 0 10 | # utils = Utilities(check) 11 | # utils.metadata() 12 | with open(check, 'r+') as check_file: 13 | 14 | # Iterate through each line in the log file. 15 | for line in check_file: 16 | match = re.match('\[\s+(\d+)\]', line) 17 | if match is not None: 18 | 19 | x = int(match.group(1)) 20 | if x is not None: 21 | count += x 22 | 23 | print('Actual number of cycles: ') 24 | print(count//10) 25 | 26 | def modi(check, mast_dict): 27 | with open(check, 'r') as check_file, open("mine.txt", 'w') as mine, open("error.txt", 'w') as error: 28 | l = list(mast_dict.values()) 29 | l1 = list(mast_dict.keys()) 30 | for idx,line in enumerate(check_file): 31 | line = line.strip() 32 | entry = l1[idx] 33 | n_line = line+ '\t'+ '-------- ' + entry.instr_name + '\t'+ '['+str(l[idx])+']' 34 | mine.writelines(n_line+'\n') 35 | match = re.match('\[\s+(\d+)\]', line) 36 | if match is not None: 37 | x = (int(match.group(1)))//10 38 | if x is not None: 39 | if x != l[idx]: 40 | error.writelines(n_line+'\n') 41 | error.writelines('Expected: '+str(l[idx])+'\n') 42 | error.writelines('Actual: '+str(x)+'\n') 43 | error.writelines('Difference: '+str(l[idx]-x)+'\n') 44 | error.writelines('Line number: '+str(idx + 1)+'\n') 45 | error.writelines('-----------------------------\n') 46 | 47 | -------------------------------------------------------------------------------- /docs/adding_plugins.rst: -------------------------------------------------------------------------------- 1 | How to Add Plugins to the Profiler 2 | ================================== 3 | 4 | Plugins can be added to the profiler to extend its functionality and analyze program behavior. This guide explains the steps to create and integrate a new plugin into the profiler. 5 | 6 | Creating a New Plugin 7 | ---------------------- 8 | 9 | To create a new plugin, follow these steps: 10 | 11 | 1. Create a New File: 12 | 13 | Create a new Python file in the ``plugins`` folder of the project. 14 | 15 | 2. Define Inputs: 16 | 17 | Your plugin should accept the following inputs: 18 | 19 | - ``master_inst_list``: A list of all instructions in the program, represented as a list of ``Instruction`` objects. 20 | - ``ops_dict``: A dictionary containing all operations in the program. Keys represent operation names, and values are operation objects. 21 | - ``extension_used``: A boolean value indicating whether the program uses an extension. This is used to determine whether to include extension-specific operations in the analysis. 22 | 23 | Any custom inputs must be defined in the configuration YAML file. These will be treated as keyword args to the function. 24 | 25 | 3. Define Outputs: 26 | 27 | Your plugin should return a dictionary containing the results of your analysis. The keys of this dictionary should be operation names, and the values are metrics that were computed in the plugin itself. All returned values will be tabulated. Eventually, this will become a custom class. 28 | 29 | Adding a Plugin to the YAML File 30 | -------------------------------- 31 | 32 | To execute your plugin, you must add it to the configuration YAML file. To do this, follow these steps: 33 | 34 | 1. Add ``plugin_name``: 35 | 36 | Add the plugin file name to your YAML file, under `profiles:config:metric`. Under that add your plugin function name along with a header name as `plugin_file_name:`. 37 | 38 | 2. Execute the Plugin: 39 | 40 | Sit back and run the profiler as usual. Your plugin will be executed along with the rest of the analysis. 41 | -------------------------------------------------------------------------------- /riscv_application_profiler/main.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import shutil 3 | import click 4 | from riscv_application_profiler import __version__ 5 | from riscv_application_profiler.profiler import run 6 | from riscv_application_profiler.isac_port import isac_setup_routine 7 | from riscv_isac.log import logger 8 | import riscv_application_profiler.consts 9 | import riscv_isac.plugins.spike as isac_spike_plugin 10 | import os 11 | from git import Repo 12 | import yaml 13 | #remove later 14 | from riscv_application_profiler.verif import verify 15 | 16 | @click.group() 17 | @click.version_option(version=__version__) 18 | def cli(): 19 | '''Command Line Interface for riscv_application_profiler''' 20 | 21 | @cli.command() 22 | # CLI option 'log'. 23 | # Expects an ISA string. 24 | @click.option( 25 | '-l', 26 | '--log', 27 | help= 28 | 'This option expects the path to an execution log.', 29 | required=True) 30 | 31 | # CLI option 'output. 32 | # Expects a directory. 33 | @click.option( 34 | '-o', 35 | '--output', 36 | help="Path to the output file.", 37 | default='./build', 38 | show_default=True, 39 | required=False, 40 | ) 41 | 42 | # CLI option 'config'. 43 | # Expects a YAML file. 44 | @click.option('-c', '--config', help="Path to the YAML configuration file.", required=True) 45 | 46 | # CLI option 'cycle accurate config'. 47 | # Expects a YAML file. 48 | @click.option('-ca', '--cycle_accurate_config', help="Path to the YAML cycle accurate configuration file.", required=False) 49 | 50 | # CLI option 'verbose'. 51 | # Expects a string. 52 | @click.option('-v', '--verbose', default='info', help='Set verbose level', type=click.Choice(['info','error','debug'],case_sensitive=False)) 53 | 54 | # if one has files of log along with latency they can enable check option below 55 | # required changes for those are commented 56 | # @click.option('-ch', '--check', help="Path to the dump file which has cycle latency.", required=False) 57 | 58 | def profile(config, log, output, verbose, cycle_accurate_config): #, check): 59 | ''' 60 | Generates the hardware description of the decoder 61 | ''' 62 | with open(config, 'r') as config_file: 63 | ia_config = yaml.safe_load(config_file) 64 | if cycle_accurate_config: 65 | with open(cycle_accurate_config, 'r') as cycle_accurate_config_file: 66 | ca_config = yaml.safe_load(cycle_accurate_config_file) 67 | # if check: 68 | # check_file = str(Path(check).absolute()) 69 | # verify(check_file) 70 | # else: 71 | # check_file = None 72 | else: 73 | ca_config = None 74 | default_commitlog_regex = ia_config['profiles']['cfg']['commitlog_regex'] 75 | default_privilege_mode_regex = ia_config['profiles']['cfg']['privilege_mode_regex'] 76 | isa = ia_config['profiles']['cfg']['isa'] 77 | log_file = str(Path(log).absolute()) 78 | output_dir = str(Path(output).absolute()) 79 | 80 | # clone riscv_opcodes and copy decoder plugin 81 | 82 | isac_setup_routine() 83 | 84 | logger.level(verbose) 85 | logger.info("**********************************") 86 | logger.info(f"RISC-V Application Profiler v{__version__}") 87 | logger.info("**********************************") 88 | logger.info("ISA Extension used: " + isa) 89 | 90 | logger.info(f"\nLog file: {log_file}") 91 | logger.info(f"Output directory: {output_dir}") 92 | 93 | # Invoke the actual profiler 94 | if ca_config != None: 95 | run(log_file, isa, output_dir, verbose, ia_config, ca_config)# ,check_file) 96 | else: 97 | run(log_file, isa, output_dir, verbose, ia_config, None)# ,None) 98 | logger.info("Done profiling.") 99 | logger.info(f"Reports in {output_dir}/reports.") 100 | 101 | def main(): 102 | cli() 103 | 104 | if __name__ == '__main__': 105 | main() -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | .. See LICENSE.incore for details 2 | 3 | .. highlight:: shell 4 | 5 | ============ 6 | Contributing 7 | ============ 8 | 9 | Your inputs are welcome and greatly appreciated! We want to make contributing to this project as easy and transparent as possible, whether it's: 10 | 11 | - Reporting a bug 12 | - Discussing the current state of the code 13 | - Submitting a bug fix 14 | - Proposing new features 15 | - Becoming a maintainer 16 | - Adding a new plugin 17 | 18 | We develop with Github 19 | ---------------------- 20 | 21 | We use github to host code, to track issues and feature requests, as well as accept pull requests. 22 | 23 | All changes happen through Pull Requests 24 | ---------------------------------------- 25 | 26 | Pull requests are the best way to propose changes to the codebase. We actively welcome your pull requests: 27 | 28 | 1. Fork the repo and create your branch from `master`. 29 | 2. If you have updated the docs, ensure that they render correctly in the respective format. 30 | 3. Make sure to create an entry in the CHANGELOG.md. Please refer to the section on versioning below 31 | to choose an appropriate version number. 32 | 4. Ensure the existing framework is not broken and still passes the basic checks. 33 | 5. Please include a comment with the SPDX license identifier in all source files, for example: 34 | ``` 35 | // SPDX-License-Identifier: BSD-3-Clause 36 | ``` 37 | 6. Bump the version of the tool to patch/minor/major as per the entry made in the CHANGELOG.md 38 | 7. Issue that pull request! 39 | 40 | Checks for a PR 41 | --------------- 42 | 43 | Make sure your PR meets all the following requirements: 44 | 45 | 1. You have made an entry in the CHANGELOG.md. 46 | 2. You have bumped the version of the tool using bumpversion utility described below. 47 | 3. The commit messages are verbose. 48 | 4. You PR doesn't break existing framework. 49 | 50 | Versioning 51 | ---------- 52 | 53 | When issuing pull requests, an entry in the CHANGELOG.md is mandatory. The arch-test-repo adheres to 54 | the [`Semantic Versioning`](https://semver.org/spec/v2.0.0.html) scheme. Following guidelines must 55 | be followed while assigning a new version number : 56 | 57 | - Patch-updates: all doc updates (like typos, more clarification,etc) will be patches. Beautification enhancements will also be treated as patch updates. Certain bug fixes to existing code may be treated as patches as well. 58 | - Minor-updates: Updates to code with new extensions, features, run time optimizations can be 59 | treated as minor updates. 60 | - Major-updates: Changes to the framework flow (backward compatible or incompatible) will be treated 61 | as major updates. 62 | 63 | Note: You can have either a patch or minor or major update. 64 | Note: In case of a conflict, the maintainers will decide the final version to be assigned. 65 | 66 | All contributions will be under the permissive open-source License 67 | ------------------------------------------------------------------ 68 | 69 | In short, when you submit code changes, your submissions are understood to be under a permissive open source license like BSD-3, Apache-2.0 and CC, etc that covers the project. Feel free to contact the maintainers if that's a concern. 70 | 71 | Report bugs using Github's `issues `_ 72 | ------------------------------------------------------------------------------------ 73 | 74 | We use GitHub issues to track public bugs. Report a bug by `opening a new issue `_ it's that easy! 75 | 76 | Write bug reports with detail, background, and sample code 77 | ---------------------------------------------------------- 78 | 79 | **Great Bug Reports** tend to have: 80 | 81 | - A quick summary and/or background 82 | - Steps to reproduce 83 | - Be specific! 84 | - Give sample code if you can. 85 | - What you expected would happen 86 | - What actually happens 87 | - Notes (possibly including why you think this might be happening, or stuff you tried that didn't work) 88 | 89 | 90 | Version Bumping made simple 91 | --------------------------- 92 | 93 | Each PR will require the tools version to be bumped. This can be achieved using the following 94 | commands:: 95 | 96 | $ bumpversion --allow-dirty --no-tag --config-file setup.cfg patch #options: major / minor / patch 97 | 98 | -------------------------------------------------------------------------------- /riscv_application_profiler/utils.py: -------------------------------------------------------------------------------- 1 | # See LICENCE for licence details. 2 | 3 | from riscv_isac.log import * 4 | import pytablewriter as ptw 5 | import os 6 | import riscv_application_profiler.consts as consts 7 | import pprint as pp 8 | 9 | class Utilities: 10 | def __init__(self, log, output) -> None: 11 | os.makedirs(f'{output}/reports', exist_ok=True) 12 | self.log = log 13 | self.tables_file = open(f'{output}/reports/tables.adoc', 'w') 14 | self.tables_file.write(f'# Reports from the RISC-V Application Profiler\n\n') 15 | 16 | def metadata(self): 17 | ''' 18 | Prints the metadata of the application being profiled. 19 | ''' 20 | logger.debug("Printing metadata.") 21 | self.tables_file.write('## Application metadata\n\n') 22 | self.tables_file.write(f'Execution log file: {self.log}\n\n') 23 | self.tables_file.write('\n\n') 24 | def print_stats(self, op_dict, counts): 25 | ''' 26 | Prints the statistics of the grouped instructions. 27 | 28 | Args: 29 | - op_dict: A dictionary with the operations as keys and a list of InstructionEntry 30 | objects as values. 31 | - counts: A dictionary with the operations as keys and the number of instructions 32 | in each group as values. 33 | ''' 34 | logger.debug("Printing statistics.") 35 | for op in op_dict.keys(): 36 | logger.info(f'{op}: {counts[op]}') 37 | logger.debug("Done.") 38 | 39 | def tabulate_stats (self, in_dict, header_name): 40 | 41 | logger.debug("Tabulating statistics.") 42 | table = [] 43 | if in_dict == None: 44 | logger.error("Dictionary is empty.") 45 | exit(1) 46 | key_list = list(in_dict.keys()) 47 | length=len(key_list) 48 | for i in range(len(in_dict[key_list[0]])): 49 | l1=[] 50 | for j in range(length): 51 | l1.append(in_dict[key_list[j]][i]) 52 | table.append(l1) 53 | 54 | 55 | self.tables_file.write(f'## {header_name}\n') 56 | if header_name is None: 57 | header_name = "Name" 58 | writer = ptw.AsciiDocTableWriter() 59 | writer.table_name = "" 60 | writer.headers = key_list 61 | writer.value_matrix = table 62 | self.tables_file.write(writer.dumps()) 63 | 64 | self.tables_file.write('\n\n') 65 | logger.debug("Done.") 66 | 67 | def remove_dups(self, target: list) -> list: 68 | ''' 69 | Removes duplicates from a list. 70 | 71 | Args: 72 | - target: The list to remove duplicates from. 73 | 74 | Returns: 75 | - A list with no duplicates. 76 | ''' 77 | temp_list = [] 78 | for entry in target: 79 | if entry not in temp_list: 80 | temp_list.append(entry) 81 | return temp_list 82 | 83 | def compute_ops_dict(self, args_list: list, ext_list: list, isa_arg: str) -> dict: 84 | ''' 85 | compute the current ops dict out of the master ops db 86 | 87 | Args: 88 | - ext_list: A list of extensions to be supported. 89 | - isa_arg: The ISA to be supported. 90 | 91 | Returns: 92 | - A dictionary containing the supported operations. 93 | ''' 94 | 95 | temp_ops_dict = {entry:[] for entry in args_list} 96 | if isa_arg == 'RV32': 97 | master_ops_dict = consts.ops_dict['RV32'] 98 | elif isa_arg == 'RV64': 99 | master_ops_dict = consts.ops_dict['RV32'] 100 | for ext in ext_list: 101 | for op_type in args_list: 102 | master_ops_dict[ext][op_type].extend(consts.ops_dict['RV64'][ext][op_type]) 103 | else: 104 | logger.error(f'XLEN {isa_arg} not supported.') 105 | exit(1) 106 | for ext in ext_list: 107 | for op_type in args_list: 108 | temp_ops_dict[op_type] += master_ops_dict[ext][op_type] 109 | result_dict = {entry:self.remove_dups(temp_ops_dict[entry]) for entry in temp_ops_dict} 110 | return result_dict 111 | 112 | 113 | 114 | -------------------------------------------------------------------------------- /sample_configs/profiler_config/config.yaml: -------------------------------------------------------------------------------- 1 | 2 | # '''Create one cache level out of given configuration. 3 | 4 | # :param sets: total number of sets, if 1 cache will be full-associative 5 | # :param ways: total number of ways, if 1 cache will be direct mapped 6 | # :param cl_size: number of bytes that can be addressed individually 7 | # :param replacement_policy: FIFO, LRU (default), MRU or RR 8 | # :param write_back: if true (default), write back will be done on evict. 9 | # Otherwise write-through is used. If false, write through 10 | # :param write_allocate: if true (default), a load will be issued on a 11 | # write miss. If false, write no allocate 12 | # :param write_combining: if true, this cache will combine writes and 13 | # issue them on evicts(default is false) 14 | # :param subblock_size: the minimum blocksize that write-combining can 15 | # handle 16 | # :param load_from: the cache level to forward a load in case of a load 17 | # miss or write-allocate, if None, assumed to be main 18 | # memory 19 | # :param store_to: the cache level to forward a store to in case of 20 | # eviction of dirty lines, if None, assumed to be main 21 | # memory 22 | # :param victims_to: the cache level to forward any evicted lines to 23 | # (dirty or not) 24 | 25 | # The total cache size is the product of sets*ways*cl_size. 26 | # Internally all addresses are converted to cacheline indices. 27 | 28 | # Instantization has to happen from last level cache to first level 29 | # cache, since each subsequent level requires a reference of the other 30 | # level. 31 | # ''' 32 | 33 | # YAML without l2 cache congifuration 34 | 35 | profiles: 36 | cfg: 37 | isa: RV64IMFDCZicsr 38 | metrics: # Metrics such as grouping instructs by operation and privledge mode are hard coded in the profiler.py file as the input to these functions is the commit log. 39 | branch_ops: 40 | group_by_branch_offset: 41 | - Grouping Branchs by Offset Size. 42 | group_by_branch_sign: 43 | - Grouping Branchs by Direction. 44 | loop_compute: 45 | - Nested loop computation. 46 | register_compute: 47 | register_compute: 48 | - Register computation. 49 | fregister_compute: 50 | - Floating point register computation. 51 | jumps_ops: 52 | jumps_compute: 53 | - Grouping jumps by jump direction. 54 | jump_size: 55 | - Grouping jumps by jump size. 56 | cache: 57 | data_cache_simulator: 58 | - Data cache utilization. 59 | instruction_cache_simulator: 60 | - Instruction cache utilization. 61 | dependency: 62 | raw_compute: 63 | - RAW dependency Computation. 64 | csr_compute: 65 | csr_compute: 66 | - CSR computation. 67 | store_load_bypass: 68 | store_load_bypass: 69 | - Store load bypass Computation. 70 | pattern: 71 | group_by_pattern: 72 | - Pattern computation. 73 | commitlog_regex : '^core\s+\d+:\s+(\d*)\s+(0x[0-9a-fA-F]+)\s+\((0x[0-9a-fA-F]+)\)\s*(x[0-9]*)?(c[0-9]+[_a-z]*)?(mem)?\s*(0x[0-9a-fA-F]*)?\s*(x[0-9]*)?(c[0-9]+[_a-z]*)?(mem)?\s*(0x[0-9a-fA-F]*)?\s*(x[0-9]*)?(c[0-9]+[_a-z]*)?(mem)?\s*(0x[0-9a-fA-F]*)?' 74 | privilege_mode_regex : '^core\s+\d+:\s+(\d+)' 75 | uarch: 76 | bypass_depth: 2 77 | data_cache: 78 | range: 79 | start: 0x80000000 80 | end: 0x8fffffff 81 | no_of_sets : 8 82 | no_of_ways : 4 83 | line_size : 32 84 | replacement_policy: "FIFO" #round robin 85 | write_back : True # false for write through 86 | write_allocate : True # false for write no allocate 87 | structural_latency: 7 #number of words - 1 88 | instr_cache: 89 | range: 90 | start: 0x80000000 91 | end: 0x8fffffff 92 | no_of_sets : 16 93 | no_of_ways : 2 94 | line_size : 32 95 | replacement_policy: "LFSR" 96 | write_back : True # false for write through 97 | write_allocate : True # false for write no allocate 98 | structural_latency: 7 #number of words - 1 99 | 100 | # interface: 101 | # L1: cache_line 102 | # L2: 32 bits -------------------------------------------------------------------------------- /riscv_application_profiler/plugins/csr_compute.py: -------------------------------------------------------------------------------- 1 | from riscv_isac.log import * 2 | from riscv_application_profiler.consts import * 3 | import riscv_application_profiler.consts as consts 4 | 5 | def csr_compute(master_inst_dict: dict, ops_dict: dict, extension_used: list, config, cycle_accurate_config): 6 | ''' 7 | Computes the number of reads and writes to each CSR. 8 | 9 | Args: 10 | - master_inst_dict: A dictionary of InstructionEntry objects. 11 | - ops_dict: A dictionary containing the operations as keys and a list of InstructionEntry objects as values. 12 | - extension_used: A list of extensions used in the application. 13 | - config: A yaml with the configuration information. 14 | - cycle_accurate_config: A dyaml with the cycle accurate configuration information. 15 | 16 | Returns: 17 | - A dictionary with the CSR names as keys and a list of reads and writes as values. 18 | ''' 19 | 20 | # Initialize dictionaries and lists 21 | csr = {} 22 | csr_reg_list = [] 23 | ret_dict = {'CSR': [], 'Reads': [], 'Writes': []} 24 | prev_inst_csr = None 25 | 26 | # Logging the CSR computation process 27 | logger.info("Computing CSRs.") 28 | for entry in master_inst_dict: 29 | # Loop through CSR instructions 30 | if entry in ops_dict['csrs']: 31 | # If no CSR value is specified 32 | if entry.csr is None: 33 | if 'f' in entry.instr_name: 34 | csr_reg = entry.instr_name[0] + entry.instr_name[2:] 35 | 36 | if csr_reg not in csr_reg_list: 37 | # Create a new entry for the CSR 38 | csr_reg_list.append(csr_reg) 39 | csr[csr_reg] = {'read': 0, 'write': 0} 40 | 41 | # Update read/write counts for the corresponding CSR 42 | if 'fr' in entry.instr_name: 43 | csr[csr_reg]['read'] += 1 44 | elif 'fs' in entry.instr_name: 45 | csr[csr_reg]['write'] += 1 46 | #for flushing pipe checking if write is happening to CSR register 47 | prev_inst_csr = csr[csr_reg] 48 | # If a CSR value is specified 49 | else: 50 | csr_hex = hex(entry.csr) 51 | csr_reg = consts.csr_file.get(csr_hex) 52 | 53 | if csr_reg is not None and csr_reg not in csr_reg_list: 54 | # Create a new entry for the CSR 55 | csr_reg_list.append(csr_reg) 56 | csr[csr_reg] = {'read': 0, 'write': 0} 57 | 58 | if csr_reg is not None: 59 | # Update read/write counts for the corresponding CSR 60 | if 'rw' in entry.instr_name: 61 | rd = str(entry.rd[1]) + str(entry.rd[0]) 62 | if rd == 'x0': 63 | csr[csr_reg]['write'] += 1 64 | prev_inst_csr = csr[csr_reg] 65 | else: 66 | csr[csr_reg]['read'] += 1 67 | csr[csr_reg]['write'] += 1 68 | prev_inst_csr = csr[csr_reg] 69 | elif 'rs' in entry.instr_name or 'rc' in entry.instr_name: 70 | if entry.rs1 is not None: 71 | rs1 = str(entry.rs1[1]) + str(entry.rs1[0]) 72 | else: 73 | rs1 = None 74 | if entry.imm == 0 or rs1 == 'x0': 75 | csr[csr_reg]['read'] += 1 76 | else: 77 | csr[csr_reg]['read'] += 1 78 | csr[csr_reg]['write'] += 1 79 | prev_inst_csr = csr[csr_reg] 80 | elif cycle_accurate_config != None: 81 | # if there's a writing to a csr instr, then we have to flush the pipe 82 | # so we have to add those flush instr to the next instruction 83 | if prev_inst_csr != None: 84 | for op in ops_dict.keys(): 85 | if entry in ops_dict[op]: 86 | ops_dict[op][entry] = ops_dict[op][entry] + cycle_accurate_config['cycles']['flush_cycles']['csr'] 87 | master_inst_dict[entry] = ops_dict[op][entry] 88 | prev_inst_csr = None 89 | # Populate the ret_dict with CSR information 90 | for entry in csr_reg_list: 91 | ret_dict['CSR'].append(entry) 92 | ret_dict['Reads'].append(csr[entry]['read']) 93 | ret_dict['Writes'].append(csr[entry]['write']) 94 | 95 | logger.info("Done.") 96 | 97 | # Return the final results 98 | return ret_dict 99 | 100 | -------------------------------------------------------------------------------- /riscv_application_profiler/plugins/pattern.py: -------------------------------------------------------------------------------- 1 | from riscv_isac.log import * 2 | from riscv_application_profiler.consts import * 3 | import re 4 | from riscv_application_profiler import consts 5 | 6 | def group_by_pattern(master_inst_dict: dict, ops_dict: dict, extension_used: list, config, cycle_accurate_config): 7 | ''' 8 | Groups instructions based on the operation. 9 | 10 | Args: 11 | - master_inst_dict: A dictionary of InstructionEntry objects. 12 | - ops_dict: A dictionary containing the operations as keys and a list of InstructionEntry objects as values. 13 | - extension_used: A list of extensions used in the application. 14 | - config: A yaml with the configuration information. 15 | - cycle_accurate_config: A dyaml with the cycle accurate configuration information. 16 | 17 | 18 | Returns: 19 | - A dictionary with pattern counts, instructions, PC, cycles, and cycles reduced as keys and lists of values. 20 | ''' 21 | # Log the start of the process for getting the pattern. 22 | logger.info("Getting Pattern.") 23 | 24 | 25 | # Initialize dictionaries to hold address counts, patterns, names, and cycle information. 26 | count_dict = {} 27 | pattern_dict = {} 28 | address_name_dict = {} 29 | address_pc_dict = {} 30 | address_cycle_dict = {} 31 | prev = None 32 | # Loop through each entry in the master_inst_dict. 33 | for entry in master_inst_dict: 34 | name = entry.instr_name+ ' ' 35 | if entry.rs1 is not None: 36 | name=name+'rs1: '+str(entry.rs1[1])+str(entry.rs1[0])+' ' 37 | if entry.rs2 is not None: 38 | name=name+'rs2: '+str(entry.rs2[1])+str(entry.rs2[0])+' ' 39 | if entry.rd is not None: 40 | name=name+'rd: '+str(entry.rd[1])+str(entry.rd[0])+' ' 41 | # Check if the instruction address is not in the count_dict. 42 | if hex(entry.instr_addr) not in count_dict: 43 | count_dict[hex(entry.instr_addr)] = 0 44 | address_name_dict[hex(entry.instr_addr)] = name 45 | address_pc_dict[hex(entry.instr_addr)] = hex(entry.instr_addr) 46 | address_cycle_dict[hex(entry.instr_addr)] = master_inst_dict[entry] 47 | count_dict[hex(entry.instr_addr)] += 1 48 | 49 | # Group instructions based on their occurrence count. 50 | for entry in count_dict: 51 | if count_dict[entry] not in pattern_dict: 52 | pattern_dict[count_dict[entry]] = list() 53 | pattern_dict[count_dict[entry]].append(entry) 54 | if 1 in pattern_dict: 55 | del pattern_dict[1] 56 | 57 | # Sort the patterns by occurrence count in descending order. 58 | sort_count_list = sorted(pattern_dict.items(), key=lambda x: x[0], reverse=True) 59 | 60 | # Remove single instructions or patterns with count 1. 61 | sort_count_list = [entry for entry in sort_count_list if len(entry[1]) > 1] 62 | 63 | # Initialize a dictionary to store sorted pattern information. 64 | s_dict = {'count': [], 'instr': [], 'PC': [], 'cycles': [], 'cycles_reduced': []} 65 | 66 | # Process sorted patterns. 67 | for entry in sort_count_list: 68 | adj_inst = [address_name_dict[entry[1][0]]] 69 | adj_pc = [address_pc_dict[entry[1][0]]] 70 | adj_cycles = [address_cycle_dict[entry[1][0]]] 71 | prev = entry[1][0] 72 | for i in entry[1][1:]: 73 | # Check if the difference between addresses is 4 or 2. 74 | if (int(i, 16) - int(prev, 16)) == 4 or (int(i, 16) - int(prev, 16)) == 2: 75 | adj_inst.append(address_name_dict[i]) 76 | adj_pc.append(address_pc_dict[i]) 77 | adj_cycles.append(address_cycle_dict[i]) 78 | elif len(adj_inst) > 1: 79 | # Store the current pattern information. 80 | # if adj_cycles in s_dict['cycles']: 81 | # continue 82 | s_dict['instr'].append(adj_inst) 83 | s_dict['PC'].append(adj_pc) 84 | s_dict['cycles'].append(adj_cycles) 85 | s_dict['count'].append(entry[0]) 86 | adj_inst = [address_name_dict[i]] 87 | adj_pc = [i] 88 | adj_cycles = [address_cycle_dict[i]] 89 | prev = i 90 | if len(adj_inst) > 1: 91 | s_dict['count'].append(entry[0]) 92 | s_dict['cycles'].append(adj_cycles) 93 | s_dict['instr'].append(adj_inst) 94 | s_dict['PC'].append(adj_pc) 95 | 96 | # Calculate improved performance for each pattern. 97 | for i in range(len(s_dict['count'])): 98 | imp_performance = s_dict['count'][i] * (sum(s_dict['cycles'][i]) - 1) 99 | s_dict['cycles_reduced'].append(imp_performance) 100 | 101 | # Log the completion of pattern computation. 102 | logger.info("Pattern computed.") 103 | # Return the computed pattern information. 104 | return s_dict -------------------------------------------------------------------------------- /sample_configs/profiler_config/L2_configs/config.yaml: -------------------------------------------------------------------------------- 1 | 2 | # '''Create one cache level out of given configuration. 3 | 4 | # :param sets: total number of sets, if 1 cache will be full-associative 5 | # :param ways: total number of ways, if 1 cache will be direct mapped 6 | # :param cl_size: number of bytes that can be addressed individually 7 | # :param replacement_policy: FIFO, LRU (default), MRU or RR 8 | # :param write_back: if true (default), write back will be done on evict. 9 | # Otherwise write-through is used.If false, write through 10 | # :param write_allocate: if true (default), a load will be issued on a 11 | # write miss. If false, write no allocate 12 | # :param write_combining: if true, this cache will combine writes and 13 | # issue them on evicts(default is false) 14 | # :param subblock_size: the minimum blocksize that write-combining can 15 | # handle 16 | # :param load_from: the cache level to forward a load in case of a load 17 | # miss or write-allocate, if None, assumed to be main 18 | # memory 19 | # :param store_to: the cache level to forward a store to in case of 20 | # eviction of dirty lines, if None, assumed to be main 21 | # memory 22 | # :param victims_to: the cache level to forward any evicted lines to 23 | # (dirty or not) 24 | 25 | # The total cache size is the product of sets*ways*cl_size. 26 | # Internally all addresses are converted to cacheline indices. 27 | 28 | # Instantization has to happen from last level cache to first level 29 | # cache, since each subsequent level requires a reference of the other 30 | # level. 31 | # ''' 32 | 33 | # YAML with l2 cache congifuration 34 | 35 | profiles: 36 | cfg: 37 | isa: RV64IMFDCZicsr 38 | metrics: # Metrics such as grouping instructs by operation and privledge mode are hard coded in the profiler.py file as the input to these functions is the commit log. However, the metrics such as grouping instructs by CSRs and cache computation is mandatory for a cycle accurate profiling. 39 | branch_ops: 40 | group_by_branch_offset: 41 | - Grouping Branchs by Offset Size. 42 | group_by_branch_sign: 43 | - Grouping Branchs by Direction. 44 | loop_compute: 45 | - Nested loop computation. 46 | register_compute: 47 | register_compute: 48 | - Register computation. 49 | fregister_compute: 50 | - Floating point register computation. 51 | jumps_ops: 52 | jumps_compute: 53 | - Grouping jumps by jump direction. 54 | jump_size: 55 | - Grouping jumps by jump size. 56 | dependency: 57 | raw_compute: 58 | - RAW dependency Computation. 59 | csr_compute: 60 | csr_compute: 61 | - CSR computation. 62 | cache: 63 | data_cache_simulator: 64 | - Data cache utilization. 65 | instruction_cache_simulator: 66 | - Instruction cache utilization. 67 | # cache: 68 | # unified_L2_cache_simulator: 69 | # - Cache utilization. 70 | store_load_bypass: 71 | store_load_bypass: 72 | - Store load bypass Computation. 73 | pattern: 74 | group_by_pattern: 75 | - Pattern computation. 76 | commitlog_regex : '^core\s+\d+:\s+(\d*)\s+(0x[0-9a-fA-F]+)\s+\((0x[0-9a-fA-F]+)\)\s*(x[0-9]*)?(c[0-9]+[_a-z]*)?(mem)?\s*(0x[0-9a-fA-F]*)?\s*(x[0-9]*)?(c[0-9]+[_a-z]*)?(mem)?\s*(0x[0-9a-fA-F]*)?\s*(x[0-9]*)?(c[0-9]+[_a-z]*)?(mem)?\s*(0x[0-9a-fA-F]*)?' 77 | privilege_mode_regex : '^core\s+\d+:\s+(\d+)' 78 | uarch: 79 | bypass_depth: 2 80 | data_cache: 81 | range: 82 | start: 0x80000000 83 | end: 0x8fffffff 84 | no_of_sets : 64 85 | no_of_ways : 8 86 | line_size : 32 87 | replacement_policy: "FIFO" #round robin 88 | write_back : True # false for write through 89 | write_allocate : True # false for write no allocate 90 | structural_latency: 7 #number of words - 1 91 | instr_cache: 92 | range: 93 | start: 0x80000000 94 | end: 0x8fffffff 95 | no_of_sets : 32 96 | no_of_ways : 2 97 | line_size : 32 98 | replacement_policy: "LFSR" 99 | write_back : True # false for write through 100 | write_allocate : True # false for write no allocate 101 | structural_latency: 7 #number of words - 1 102 | l2_cache: 103 | range: 104 | start: 0x80000000 105 | end: 0x8fffffff 106 | no_of_sets : 32 107 | no_of_ways : 2 108 | line_size : 32 109 | replacement_policy: "LFSR" 110 | write_back : True # false for write through 111 | write_allocate : True # false for write no allocate 112 | structural_latency: 7 #number of words - 1 113 | 114 | # interface: 115 | # L1: cache_line 116 | # L2: 32 bits -------------------------------------------------------------------------------- /riscv_application_profiler/plugins/dependency.py: -------------------------------------------------------------------------------- 1 | from riscv_isac.log import * 2 | from riscv_application_profiler.consts import * 3 | import riscv_application_profiler.consts as consts 4 | import statistics 5 | 6 | def raw_compute(master_inst_dict: list, ops_dict: dict, extension_used: list, config, cycle_accurate_config): 7 | ''' 8 | Groups instructions based on the branch offset. 9 | 10 | Args: 11 | - master_inst_dict: A dictonary of InstructionEntry objects. 12 | - ops_dict: A dictionary with the operations as keys and a list of InstructionEntry objects as values. 13 | - extension_used: A list of extensions used in the application. 14 | - config: A yaml with the configuration information. 15 | - cycle_accurate_config: A dyaml with the cycle accurate configuration information. 16 | 17 | Returns: 18 | - A dictionary with the operations as keys and a list of InstructionEntry objects as values. 19 | ''' 20 | # Initialize the process of computing register reads after writes. 21 | logger.info("Computing register reads after writes.") 22 | 23 | # Get a list of all registers in the register file. 24 | reg_list = list(consts.reg_file.keys()) + list(consts.freg_file.keys()) 25 | 26 | # Initialize a dictionary to hold register information, initially all with a depth of 1. 27 | regs = {i: {'depth': 1} for i in reg_list} 28 | 29 | # Initialize dictionaries to store results and raw data. 30 | ret_dict = {'Instructions': [], 'Depth': [], 'Count': []} 31 | raw = {} 32 | 33 | # Initialize a list to store combined instructions. 34 | instruction_list = [] 35 | 36 | # Initialize a list to store names of previously encountered registers. 37 | prev_names = [] 38 | 39 | # Iterate through the list of instructions in master_inst_dict. 40 | for entry in master_inst_dict: 41 | # Check if the instruction uses rs1 register. 42 | if entry.rs1 is not None: 43 | name = str(entry.rs1[1]) + str(entry.rs1[0]) 44 | instr = str(entry.instr_name) 45 | 46 | # Check if this register name was encountered before. 47 | if name in prev_names: 48 | instruction = prev_instr + ' ' + instr 49 | 50 | # Check if the combined instruction is in raw data. 51 | if instruction in raw: 52 | # Check if the register depth matches raw depth. 53 | if regs[name]['depth'] == raw[instruction]['depth']: 54 | raw[instruction]['count'] += 1 55 | prev_names.remove(name) 56 | regs[name]['depth'] = 1 57 | else: 58 | raw[instruction] = {'depth': regs[name]['depth'], 'count': 1} 59 | instruction_list.append(instruction) 60 | prev_names.remove(name) 61 | regs[name]['depth'] = 1 62 | else: 63 | regs[name]['depth'] += 1 64 | 65 | # Check if the instruction uses rs2 register. 66 | if entry.rs2 is not None: 67 | name = str(entry.rs2[1]) + str(entry.rs2[0]) 68 | instr = str(entry.instr_name) 69 | 70 | # Check if this register name was encountered before. 71 | if name in prev_names: 72 | instruction = prev_instr + ' ' + instr 73 | 74 | # Check if the combined instruction is in raw data. 75 | if instruction in raw: 76 | # Check if the register depth matches raw depth. 77 | if regs[name]['depth'] == raw[instruction]['depth']: 78 | raw[instruction]['count'] += 1 79 | prev_names.remove(name) 80 | regs[name]['depth'] = 1 81 | else: 82 | raw[instruction] = {'depth': regs[name]['depth'], 'count': 1} 83 | instruction_list.append(instruction) 84 | prev_names.remove(name) 85 | regs[name]['depth'] = 1 86 | else: 87 | regs[name]['depth'] += 1 88 | 89 | # Check if the instruction defines a destination register (rd). 90 | if entry.rd is not None: 91 | name = str(entry.rd[1]) + str(entry.rd[0]) 92 | prev_instr = str(entry.instr_name) 93 | 94 | # Check if this register name was encountered before. 95 | if name not in prev_names: 96 | prev_names.append(name) 97 | else: 98 | regs[name]['depth'] = 1 99 | 100 | # Populate the result dictionary from raw data. 101 | 102 | if cycle_accurate_config != None: 103 | for entry in raw: 104 | if raw[entry]['depth'] < int(cycle_accurate_config['cycles']['pipeline_depth']): 105 | if cycle_accurate_config['cycles']['bypass_latency'] == None: 106 | ret_dict['Instructions'].append(entry) 107 | ret_dict['Count'].append(raw[entry]['count']) 108 | ret_dict['Depth'].append(raw[entry]['depth']) 109 | else: 110 | for entry in raw: 111 | ret_dict['Instructions'].append(entry) 112 | ret_dict['Count'].append(raw[entry]['count']) 113 | ret_dict['Depth'].append(raw[entry]['depth']) 114 | 115 | # Log the completion of the computation. 116 | logger.info("Done") 117 | 118 | # Return the result dictionary. 119 | return ret_dict -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RISC-V Application Profiler 2 | 3 | The RISC-V Application Profiler is a Python-based tool designed to help software developers optimize the performance of their applications on RISC-V hardware. It works by parsing execution logs and providing insights about the application's behavior. The tool has a modular design, where performance metrics are added as plugins, allowing developers to customize the profiler to their specific needs. The profiler is highly accessible, easy to use, and can be adapted to capture different types of performance metrics. Overall, the RISC-V Application Profiler is a flexible and customizable solution for software developers who want to ensure optimal performance of their applications on RISC-V platforms. 4 | 5 | Detailed documentation can be found [here](https://riscv-application-profiler.readthedocs.io/en/latest/). 6 | 7 | ## Installation 8 | 9 | Install `pycachesim`. This is a requirement to use the `caches` plugin in the profiler. 10 | 11 | ```shell 12 | git clone https://github.com/mahendraVamshi/pycachesim.git 13 | cd pycachesim 14 | pip install -e . 15 | cd .. 16 | ``` 17 | 18 | Install `riscv_isac`. This is a development version of isac. 19 | ```shell 20 | git clone https://github.com/mahendraVamshi/riscv-isac.git 21 | cd riscv-isac 22 | pip install -e . 23 | cd .. 24 | ``` 25 | 26 | Finally, install the profiler itself. 27 | ```shell 28 | git clone https://github.com/mahendraVamshi/riscv-application-profiler.git 29 | cd riscv-application-profiler 30 | pip install -e . 31 | ``` 32 | 33 | ## Usage 34 | 35 | To display the help message, run: 36 | ```shell 37 | riscv_application_profiler --help 38 | riscv_application_profiler profile --help 39 | ``` 40 | 41 | To generate a log file, run: 42 | ```shell 43 | spike --log-commits 44 | ``` 45 | 46 | **NOTE**: You need to use ``--enable-commitlog`` while configuring [spike](https://github.com/riscv-software-src/riscv-isa-sim#build-steps). 47 | 48 | To profile an application, run: 49 | ```shell 50 | riscv_application_profiler profile --log --output --config config.yaml 51 | ``` 52 | To profile an application with cycle accurate simulation, run: 53 | ```shell 54 | riscv_application_profiler profile --log --output --config config.yaml --cycle_accurate_config config.yaml 55 | ``` 56 | **Info**: 57 | 58 | Path to the log file is mandatory. Example log files can be found in the `sample_artifacts/logs` directory. 59 | 60 | Path to the output directory is optional. If not provided, the profiler will create a directory named `build` in the current working directory. 61 | 62 | Path to the config file is mandatory. Example `config.yaml` is located in `sample_configs/profiler_config` directory. L2 cache config files are located in `sample_configs/profiler_config/L2_configs` directory. 63 | 64 | Path to the cycle accurate config file is optional. Example `config.yaml` is located in `sample_configs/cycle_accurate` directory. Use this option only if you want to profile an application with cycle accurate simulation. L2 cache config files are located in `sample_configs/cycle_accurate/L2_configs` directory. 65 | 66 | Command line options to the `profile` command: 67 | 68 | ```text 69 | Options: 70 | -l, --log TEXT This option expects the path to an execution 71 | log. [required] 72 | -o, --output TEXT Path to the output file. [default: ./build] 73 | -c, --config TEXT Path to the YAML configuration file. 74 | [required] 75 | -ca, --cycle_accurate_config TEXT 76 | Path to the YAML cycle accurate 77 | configuration file. 78 | -v, --verbose [info|error|debug] 79 | Set verbose level 80 | --help Show this message and exit. 81 | ``` 82 | 83 | Example: 84 | 85 | To profile an application, run: 86 | 87 | ```shell 88 | riscv_application_profiler profile --log ./sample_artifacts/logs/hello.log --output ./build --config ./sample_configs/profiler_config/config.yaml 89 | ``` 90 | To profile an application with cycle accurate simulation, run: 91 | 92 | ```shell 93 | riscv_application_profiler profile --log ./sample_artifacts/logs/hello.log --output ./build --config ./sample_configs/profiler_config/L2_configs/config.yaml --cycle_accurate_config ./sample_configs/cycle_accurate/L2_configs/config.yaml 94 | ``` 95 | 96 | **Note**: The log file should be an execution log generated using spike as of today. Support for configuring log formats will be added in the future. 97 | 98 | **Note**: Metrics such as grouping instructs by operation and privledge mode are hard coded in the profiler.py file as the input to these functions is the commit log. However, the metrics such as grouping instructs by CSRs and cache computation is mandatory for a cycle accurate profiling. 99 | 100 | ## Features 101 | 102 | The profiler supports the following list of features as plugins: 103 | 104 | Grouping instructions by: 105 | - Type of operation performed. 106 | - Privilege mode used for execution. 107 | - Directions and Sizes (for jumps/branches). 108 | 109 | Lists: 110 | - Presence of Nested Loops. 111 | - Store-Load bypass. 112 | - Presence of RAW dependencies. 113 | - Pattern detection for custom instructions. 114 | 115 | Histogram for: 116 | - RegisterFile (XRF/FRF) usage. 117 | - CSR accesses. 118 | - D$/I$ Hits/Misses/Usage/Utilization. 119 | - Unifed L2 Cache Hits/Misses/Usage/Utilization. 120 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity 10 | and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | . 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series 86 | of actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or 93 | permanent ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within 113 | the community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.0, available at 119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 120 | 121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 122 | enforcement ladder](https://github.com/mozilla/diversity). 123 | 124 | [homepage]: https://www.contributor-covenant.org 125 | 126 | For answers to common questions about this code of conduct, see the FAQ at 127 | https://www.contributor-covenant.org/faq. Translations are available at 128 | https://www.contributor-covenant.org/translations. 129 | -------------------------------------------------------------------------------- /docs/intro.rst: -------------------------------------------------------------------------------- 1 | RISC-V Application Profiler 2 | =========================== 3 | 4 | The RISC-V Application Profiler is a Python-based tool designed to help software developers optimize the performance of their applications on RISC-V hardware. It works by parsing execution logs and providing insights about the application's behavior. The tool has a modular design, where performance metrics are added as plugins, allowing developers to customize the profiler to their specific needs. The profiler is highly accessible, easy to use, and can be adapted to capture different types of performance metrics. Overall, the RISC-V Application Profiler is a flexible and customizable solution for software developers who want to ensure optimal performance of their applications on RISC-V platforms. 5 | 6 | Installation 7 | ------------ 8 | 9 | Install `pycachesim`. This is a requirement to use the `caches` plugin in the profiler. 10 | 11 | .. code-block:: shell 12 | 13 | git clone https://github.com/mahendraVamshi/pycachesim.git 14 | cd pycachesim 15 | pip install -e . 16 | cd .. 17 | 18 | Install `riscv_isac`. This is a development version of isac. 19 | 20 | git clone https://github.com/mahendraVamshi/riscv-isac.git 21 | cd riscv-application-profiler 22 | pip install -e . 23 | cd .. 24 | 25 | Finally, install the profiler itself. 26 | 27 | git clone https://github.com/mahendraVamshi/riscv-application-profiler.git 28 | cd riscv-application-profiler 29 | pip install -e . 30 | 31 | Usage 32 | ----- 33 | 34 | To display the help message, run: 35 | 36 | .. code-block:: shell 37 | 38 | riscv_application_profiler --help 39 | riscv_application_profiler profile --help 40 | 41 | To generate a log file, run: 42 | 43 | .. code-block:: shell 44 | 45 | spike --log-commits 46 | 47 | **NOTE**: You need to use `--enable-commitlog` while configuring `spike` (https://github.com/riscv-software-src/riscv-isa-sim#build-steps). 48 | 49 | To profile an application, run: 50 | 51 | .. code-block:: shell 52 | 53 | riscv_application_profiler profile --log --output --config config.yaml 54 | 55 | To profile an application with cycle accurate simulation, run: 56 | 57 | .. code-block:: shell 58 | 59 | riscv_application_profiler profile --log --output --config config.yaml --cycle_accurate_config config.yaml 60 | 61 | **Info**: 62 | 63 | Path to the log file is mandatory. Example log files can be found in the `sample_artifacts/logs` directory. 64 | 65 | Path to the output directory is optional. If not provided, the profiler will create a directory named `build` in the current working directory. 66 | 67 | Path to the config file is mandatory. Example `config.yaml` is located in `sample_configs/profiler_config` directory. L2 cache config files are located in `sample_configs/profiler_config/L2_configs` directory. 68 | 69 | Path to the cycle accurate config file is optional. Example `config.yaml` is located in `sample_configs/cycle_accurate` directory. Use this option only if you want to profile an application with cycle accurate simulation. L2 cache config files are located in `sample_configs/cycle_accurate/L2_configs` directory. 70 | 71 | **Command line options to the `profile` command:** 72 | 73 | Options: 74 | -l, --log TEXT This option expects the path to an execution 75 | log. [required] 76 | -o, --output TEXT Path to the output file. [default: ./build] 77 | -c, --config TEXT Path to the YAML configuration file. 78 | [required] 79 | -ca, --cycle_accurate_config TEXT 80 | Path to the YAML cycle accurate 81 | configuration file. 82 | -v, --verbose [info|error|debug] 83 | Set verbose level 84 | --help Show this message and exit. 85 | 86 | **Example:** 87 | 88 | To profile an application, run: 89 | 90 | .. code-block:: shell 91 | 92 | riscv_application_profiler profile --log ./sample_artifacts/logs/hello.log --output ./build --config ./sample_config/config.yaml 93 | 94 | To profile an application with cycle accurate simulation, run: 95 | 96 | .. code-block:: shell 97 | 98 | riscv_application_profiler profile --log ./sample_artifacts/logs/hello.log --output ./build --config ./sample_configs/profiler_config/L2_configs/config.yaml --cycle_accurate_config ./sample_configs/cycle_accurate/L2_configs/config.yaml 99 | 100 | **Note**: The log file should be an execution log generated using `spike` as of today. Support for configuring log formats will be added in the future. 101 | 102 | **Note**: Metrics such as grouping instructs by operation and privledge mode are hard coded in the profiler.py file as the input to these functions is the commit log. However, the metrics such as grouping instructs by CSRs and cache computation is mandatory for a cycle accurate profiling. 103 | 104 | Features 105 | -------- 106 | 107 | The profiler supports the following list of features as plugins: 108 | 109 | - Grouping instructions by: 110 | - Type of operation performed. 111 | - Privilege mode used for execution. 112 | - Directions and Sizes (for jumps/branches). 113 | 114 | - Lists: 115 | - Presence of Nested Loops. 116 | - Store-Load bypass. 117 | - Presence of RAW dependencies. 118 | - Pattern of repeated instructions. 119 | 120 | - Histogram for: 121 | - RegisterFile (XRF/FRF) usage. 122 | - CSR accesses. 123 | - D$/I$ Hits/Misses/Usage/Utilization. 124 | - Unifed L2 Cache Hits/Misses/Usage/Utilization. 125 | 126 | -------------------------------------------------------------------------------- /riscv_application_profiler/plugins/store_load_bypass.py: -------------------------------------------------------------------------------- 1 | from riscv_isac.log import * 2 | from riscv_application_profiler.consts import * 3 | import riscv_application_profiler.consts as consts 4 | from pprint import pprint 5 | 6 | def store_load_bypass (master_inst_dict: dict, ops_dict: dict, extension_used: list, config, cycle_accurate_config): 7 | ''' 8 | Computes the number of instances of store load bypass. 9 | 10 | Args: 11 | - master_inst_dict: A dictionary of InstructionEntry objects. 12 | - ops_dict: A dictionary containing the operations as keys and a list of 13 | InstructionEntry objects as values. 14 | - extension_used: A list of extensions used in the application. 15 | - config: A yaml with the configuration information. 16 | - cycle_accurate_config: A dyaml with the cycle accurate configuration information. 17 | 18 | Returns: 19 | - A dictionary with the addresses, counts, depth and bypass width as keys and their values as values. 20 | 21 | ''' 22 | 23 | # Log the start of the process for computing store-load bypass. 24 | logger.info("Computing store load bypass.") 25 | 26 | # make a bypass dict 27 | bypass_dict = {} 28 | tracking = {} 29 | eff_addr = [] 30 | ret_dict = {'Address': [], 'Counts': [], 'Depth': [], 'Bypass Width': []} 31 | 32 | # iterate through master inst list 33 | # if a store is encountered, make a set of bytes touched and look out for loads from these bytes else continue 34 | # upon encountering a load that touches these bytes, freeze the depth and reset counts/depths 35 | 36 | for entry in master_inst_dict: 37 | if entry in ops_dict['stores']: # this is a store 38 | # Determine the base address for the memory access. 39 | reg_name = 'x2' if 'sp' in entry.instr_name else f'x{entry.rs1[0]}' 40 | base = int(consts.reg_file[reg_name], 16) 41 | address = hex(base + entry.imm) if entry.imm is not None else hex(base) 42 | access_sz = 8 if 'd' in entry.instr_name \ 43 | else 4 if 'w' in entry.instr_name \ 44 | else 2 if 'h' in entry.instr_name \ 45 | else 1 if 'b' in entry.instr_name \ 46 | else None 47 | 48 | # sanity check 49 | if access_sz is None: 50 | raise Exception(f'Invalid access size encountered: {entry.instr_name}') 51 | # make a set of all bytes touched by this store 52 | bytes_touched = {hex(int(address, 16) + i) for i in range(0, access_sz, 1)} 53 | for _entry in bytes_touched: 54 | tracking[_entry] = {} 55 | tracking[_entry]['depth'] = 0 56 | tracking[_entry]['s_access_sz'] = access_sz 57 | 58 | # look for loads 59 | if entry in ops_dict['loads']: 60 | # Determine the base address for the memory access. 61 | reg_name = 'x2' if 'sp' in entry.instr_name else f'x{entry.rs1[0]}' 62 | base = int(consts.reg_file[reg_name], 16) 63 | address = hex(base + entry.imm) if entry.imm is not None else hex(base) 64 | eff_addr.append(address) 65 | access_sz = 8 if 'd' in entry.instr_name \ 66 | else 4 if 'w' in entry.instr_name \ 67 | else 2 if 'h' in entry.instr_name \ 68 | else 1 if 'b' in entry.instr_name \ 69 | else None 70 | if access_sz is None: 71 | raise Exception(f'Invalid access size encountered: {entry.instr_name}') 72 | count = 0 73 | bytes_touched = {hex(int(address, 16) + i) for i in range(0, access_sz, 1)} 74 | for byte_entry in bytes_touched: 75 | if byte_entry in tracking: 76 | count += 1 77 | for _entry in bytes_touched: 78 | if _entry in tracking: 79 | if _entry in bypass_dict: 80 | if bypass_dict[_entry]['depth'] == tracking[_entry]['depth']: 81 | bypass_dict[_entry]['counts'] += 1 82 | 83 | else: 84 | bypass_dict[_entry] = {'counts': 1, 'depth': tracking[_entry]['depth'], 'bypass_width': count} 85 | tracking.pop(_entry) 86 | 87 | if entry.instr_name not in ops_dict['loads']: # this is a regular instruction which causes a deeper bypass 88 | for _entry in tracking: 89 | tracking[_entry]['depth'] += 1 90 | 91 | # Update register values based on commit information. 92 | if (entry.reg_commit is not None): 93 | if (entry.reg_commit[1] != '0'): 94 | consts.reg_file[f'x{int(entry.reg_commit[1])}'] = entry.reg_commit[2] 95 | 96 | keys_to_remove = [] 97 | 98 | # Iterate over the dictionary and identify keys to remove. 99 | for entry in bypass_dict: 100 | if entry not in eff_addr: 101 | keys_to_remove.append(entry) 102 | 103 | # Remove the identified keys from the dictionary. 104 | for key in keys_to_remove: 105 | bypass_dict.pop(key) 106 | 107 | 108 | 109 | 110 | # Reset register values. 111 | consts.reg_file = {f'x{i}': '0x00000000' for i in range(32)} 112 | 113 | # Populate the result dictionary with store-load bypass information. 114 | for address in bypass_dict: 115 | ret_dict['Address'].append(address) 116 | ret_dict['Counts'].append(bypass_dict[address]['counts']) 117 | ret_dict['Depth'].append(bypass_dict[address]['depth']) 118 | ret_dict['Bypass Width'].append(bypass_dict[address]['bypass_width']) 119 | 120 | # Log the completion of the store-load bypass computation. 121 | logger.info('Done.') 122 | 123 | # Return the resulting dictionary containing store-load bypass data. 124 | return ret_dict 125 | 126 | 127 | -------------------------------------------------------------------------------- /riscv_application_profiler/profiler.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | from riscv_application_profiler.consts import * 3 | import riscv_application_profiler.consts as consts 4 | from riscv_isac.log import * 5 | from riscv_isac.plugins.spike import * 6 | from riscv_application_profiler.plugins import instr_groups 7 | from riscv_application_profiler import verif 8 | from riscv_application_profiler import plugins 9 | import riscv_config.isa_validator as isaval 10 | from riscv_application_profiler.utils import Utilities 11 | import os 12 | import yaml 13 | 14 | # script_dir = os.path.dirname(os.path.abspath(__file__)) 15 | # config_path = os.path.join(script_dir, 'config.yaml') 16 | # with open(consts.config_path, 'r') as config_file: 17 | # config = yaml.safe_load(config_file) 18 | 19 | def print_stats(op_dict, counts): 20 | ''' 21 | Prints the statistics of the grouped instructions. 22 | 23 | Args: 24 | - op_dict: A dictionary with the operations as keys and a list of InstructionEntry 25 | objects as values. 26 | - counts: A dictionary with the operations as keys and the number of instructions 27 | in each group as values. 28 | ''' 29 | logger.info("Printing statistics.") 30 | for op in op_dict.keys(): 31 | logger.info(f'{op}: {counts[op]}') 32 | logger.info("Done.") 33 | 34 | def run(log, isa, output, verbose, config, cycle_accurate_config): #, check): 35 | from rvop_decoder.rvopcodesdecoder import disassembler 36 | spike_parser = spike() 37 | spike_parser.setup(trace=str(log), arch='rv64') 38 | iter_commitlog = spike_parser.__iter__() 39 | with open(log, 'r') as logfile: 40 | # Read the log file 41 | lines = logfile.readlines() 42 | cl_matches_list = [iter_commitlog.__next__() for i in range(len(lines))] 43 | isac_decoder = disassembler() 44 | isac_decoder.setup(arch='rv64') 45 | # master_inst_list = [] 46 | master_inst_dict = {} 47 | for entry in cl_matches_list: 48 | if entry.instr is None: 49 | continue 50 | temp_entry = isac_decoder.decode(entry) 51 | # master_inst_list.append(temp_entry) 52 | master_inst_dict[temp_entry] = 1 53 | # master_inst_dict = {entry: 1 for entry in master_inst_list} 54 | logger.info(f'Parsed {len(master_inst_dict)} instructions.') 55 | logger.info("Decoding...") 56 | logger.info("Done decoding instructions.") 57 | logger.info("Starting to profile...") 58 | 59 | utils = Utilities(log, output) 60 | utils.metadata() 61 | 62 | # Grouping by operations 63 | groups = [ 64 | 'loads', 65 | 'stores', 66 | 'imm computes', 67 | 'imm shifts', 68 | 'reg computes', 69 | 'reg shifts', 70 | 'jumps', 71 | 'branches', 72 | "compares", 73 | "conversions", 74 | "moves", 75 | "classifies", 76 | "csrs", 77 | "fence", 78 | ] 79 | 80 | (extension_list, err, err_list) = isaval.get_extension_list(isa) 81 | 82 | for e in err_list: 83 | logger.error(e) 84 | if err: 85 | raise SystemExit(1) 86 | 87 | isa_arg = isa.split('I')[0] 88 | 89 | ret_dict, extension_instruction_list, op_dict = instr_groups.group_by_operation(groups, isa_arg, extension_list, master_inst_dict, config, cycle_accurate_config) 90 | if (len(extension_instruction_list)<=len(master_inst_dict)): 91 | # left_out=[] 92 | # for i in master_inst_list: 93 | # if i not in extension_instruction_list: 94 | # left_out.append(i) 95 | # print(i) 96 | logger.warning("Check the extension input.") 97 | 98 | curr_ops_dict = utils.compute_ops_dict(args_list=groups, isa_arg=isa_arg, ext_list=extension_list) 99 | 100 | if 'C' in extension_list: 101 | logger.warning("riscv-isac does not decode immediate fields for compressed instructions. \ 102 | Value based metrics on branch ops may be inaccurate.") 103 | 104 | 105 | utils.tabulate_stats(ret_dict, header_name='Grouping instructions by Operation') 106 | ret_dict = instr_groups.privilege_modes(log,config) 107 | utils.tabulate_stats(ret_dict, header_name='Privilege Mode') 108 | 109 | if cycle_accurate_config != None: 110 | 111 | if 'cache' not in config['profiles']['cfg']['metrics'] or 'csr_compute' not in config['profiles']['cfg']['metrics']: 112 | logger.error("Cache and CSR compute metrics are not enabled. Please enable them for cycle accurate profiling.") 113 | raise SystemExit(1) 114 | 115 | for metric in config['profiles']['cfg']['metrics']: 116 | # Finding the new plugin file mentioned in the yaml file 117 | spec = importlib.util.spec_from_file_location("plugins", f"riscv_application_profiler/plugins/{metric}.py") 118 | # Converting file to a module 119 | metric_module = importlib.util.module_from_spec(spec) 120 | # Importing the module 121 | spec.loader.exec_module(metric_module) 122 | 123 | for funct in config['profiles']['cfg']['metrics'][metric]: 124 | funct_to_call = getattr(metric_module, funct) 125 | ret_dict1 = funct_to_call(master_inst_dict, ops_dict=op_dict, extension_used=extension_list, config= config, cycle_accurate_config=cycle_accurate_config) 126 | utils.tabulate_stats(ret_dict1, header_name=funct) 127 | 128 | # total_cycles = op_dict['total_cycles'] 129 | total_cycles = sum([master_inst_dict[entry] for entry in master_inst_dict]) + cycle_accurate_config['cycles']['reset_cycles'] 130 | ret_dict = {"Total Cycles": [total_cycles]} 131 | utils.tabulate_stats(ret_dict, header_name='Total Cycles') 132 | 133 | else: 134 | for metric in config['profiles']['cfg']['metrics']: 135 | # Finding the new plugin file mentioned in the yaml file 136 | spec = importlib.util.spec_from_file_location("plugins", f"riscv_application_profiler/plugins/{metric}.py") 137 | # Converting file to a module 138 | metric_module = importlib.util.module_from_spec(spec) 139 | # Importing the module 140 | spec.loader.exec_module(metric_module) 141 | 142 | for funct in config['profiles']['cfg']['metrics'][metric]: 143 | funct_to_call = getattr(metric_module, funct) 144 | ret_dict1 = funct_to_call(master_inst_dict, ops_dict=op_dict, extension_used=extension_list, config= config, cycle_accurate_config=cycle_accurate_config) 145 | utils.tabulate_stats(ret_dict1, header_name=funct) 146 | -------------------------------------------------------------------------------- /riscv_application_profiler/plugins/register_compute.py: -------------------------------------------------------------------------------- 1 | from riscv_isac.log import * 2 | from riscv_application_profiler.consts import * 3 | import riscv_application_profiler.consts as consts 4 | import statistics 5 | 6 | def register_compute(master_inst_dict: dict, ops_dict: dict, extension_used: list, config, cycle_accurate_config): 7 | ''' 8 | Computes the number of reads and writes to each register. 9 | Args: 10 | - master_inst_dict: A dictionary of InstructionEntry objects. 11 | - ops_dict: A dictionary containing the operations as keys and a list of InstructionEntry objects as values. 12 | - extension_used: A list of extensions used in the application. 13 | - config: A yaml with the configuration information. 14 | - cycle_accurate_config: A dyaml with the cycle accurate configuration information. 15 | 16 | 17 | Returns: 18 | - A dictionary with the registers as keys and a list of reads and writes as values. 19 | ''' 20 | # Log the start of the process for computing register read and write counts. 21 | logger.info("Computing register read writes.") 22 | 23 | # Get a list of all registers in the register file. 24 | reg_list = list(consts.reg_file.keys()) 25 | 26 | # Initialize a dictionary to track read and write counts for each register. 27 | regs = {i: {'write_count': 0, 'read_count': 0} for i in reg_list} 28 | 29 | # Initialize dictionaries to hold the resulting data. 30 | ret_dict = {'Register': [], 'Reads': [], 'Writes': []} 31 | 32 | # Iterate through the list of instructions in master_inst_dict. 33 | for entry in master_inst_dict: 34 | inst_name = str(entry.instr_name) 35 | if 'f' in inst_name: 36 | continue 37 | # Check if the instruction uses rs1 register. 38 | if entry.rs1 is not None: 39 | name = str(entry.rs1[1]) + str(entry.rs1[0]) 40 | regs[name]['read_count'] += 1 41 | # Check if the instruction uses rs2 register. 42 | if entry.rs2 is not None: 43 | name = str(entry.rs2[1]) + str(entry.rs2[0]) 44 | regs[name]['read_count'] += 1 45 | # Check if the instruction defines a destination register (rd). 46 | if entry.rd is not None: 47 | name = str(entry.rd[1]) + str(entry.rd[0]) 48 | regs[name]['write_count'] += 1 49 | # if (entry.reg_commit is None): 50 | # if 'fence' in entry.instr_name or 'j' in entry.instr_name: 51 | # continue 52 | # # print(entry) 53 | # else: 54 | # if 'l' in entry.instr_name or 's' in entry.instr_name: 55 | # continue 56 | # print(entry) 57 | 58 | # Populate the result dictionary with register read and write counts. 59 | for reg in reg_list: 60 | ret_dict['Register'].append(reg) 61 | ret_dict['Reads'].append(regs[reg]['read_count']) 62 | ret_dict['Writes'].append(regs[reg]['write_count']) 63 | 64 | logger.info('Done.') 65 | 66 | # Return the resulting dictionary containing register read and write counts. 67 | return ret_dict 68 | 69 | 70 | def fregister_compute(master_inst_dict: dict, ops_dict: dict, extension_used: list, config, cycle_accurate_config): 71 | ''' 72 | Computes the number of reads and writes to each floating point register. 73 | Args: 74 | - master_inst_dict: A dictionary of InstructionEntry objects. 75 | - ops_dict: A dictionary containing the operations as keys and a list of InstructionEntry objects as values. 76 | - extension_used: A list of extensions used in the application. 77 | - config: A yaml with the configuration information. 78 | - cycle_accurate_config: A dyaml with the cycle accurate configuration information. 79 | 80 | 81 | Returns: 82 | - A dictionary with the registers as keys and a list of reads and writes as values. 83 | ''' 84 | # Log the start of the process for computing F_register read and write counts. 85 | logger.info("Computing F_register read writes.") 86 | 87 | # Initialize an empty list to store F_register names and a dictionary to track counts. 88 | reg_list = [] 89 | regs = {} 90 | 91 | # Initialize dictionaries to hold the resulting data. 92 | ret_dict = {'F_Register': [], 'Reads': [], 'Writes': []} 93 | 94 | # Check if 'F' and 'D' extensions are present, if not, return empty lists and dictionary. 95 | if 'F' not in extension_used or 'D' not in extension_used: 96 | return (ret_dict) 97 | 98 | # Log that the process of computing register read and write counts is starting. 99 | logger.info("Computing register read writes.") 100 | 101 | # Get a list of all F_registers in the F_register file. 102 | reg_list = list(consts.freg_file.keys()) 103 | 104 | # Initialize a dictionary to track read and write counts for each F_register. 105 | regs = {i: {'write_count': 0, 'read_count': 0} for i in reg_list} 106 | 107 | # Initialize dictionaries to hold the resulting data. 108 | ret_dict = {'F_Register': [], 'Reads': [], 'Writes': []} 109 | 110 | # Iterate through the list of instructions in master_inst_dict. 111 | for entry in master_inst_dict: 112 | inst_name = str(entry.instr_name) 113 | # Check if the instruction involves F_registers. 114 | if 'f' in inst_name: 115 | # Check if the instruction uses rs1 F_register. 116 | if entry.rs1 is not None and 'x' not in entry.rs1[1]: 117 | name = str(entry.rs1[1]) + str(entry.rs1[0]) 118 | regs[name]['read_count'] += 1 119 | # Check if the instruction uses rs2 F_register. 120 | if entry.rs2 is not None and 'x' not in entry.rs2[1]: 121 | name = str(entry.rs2[1]) + str(entry.rs2[0]) 122 | regs[name]['read_count'] += 1 123 | # Check if the instruction defines a destination F_register (rd). 124 | if entry.rd is not None and 'x' not in entry.rd[1]: 125 | name = str(entry.rd[1]) + str(entry.rd[0]) 126 | regs[name]['write_count'] += 1 127 | 128 | # Populate the result dictionary with F_register read and write counts. 129 | for reg in reg_list: 130 | ret_dict['F_Register'].append(reg) 131 | ret_dict['Reads'].append(regs[reg]['read_count']) 132 | ret_dict['Writes'].append(regs[reg]['write_count']) 133 | 134 | # Log the completion of F_register read and write computation. 135 | logger.info('Done.') 136 | 137 | # Return the resulting dictionary containing F_register read and write counts. 138 | return ret_dict -------------------------------------------------------------------------------- /riscv_application_profiler/plugins/jumps_ops.py: -------------------------------------------------------------------------------- 1 | from riscv_isac.log import * 2 | from riscv_application_profiler.consts import * 3 | import riscv_application_profiler.consts as consts 4 | 5 | def jumps_compute(master_inst_dict: dict ,ops_dict: dict, extension_used: list,config, cycle_accurate_config): 6 | ''' 7 | Computes the number of jumps in the program. 8 | 9 | Args: 10 | - master_inst_dict: A dictionary of InstructionEntry objects. 11 | - ops_dict: A dictionary containing the operations as keys and a list of 12 | InstructionEntry objects as values. 13 | - extension_used: A list of extensions used in the application. 14 | - config: A yaml with the configuration information. 15 | - cycle_accurate_config: A dyaml with the cycle accurate configuration information. 16 | 17 | 18 | Returns: 19 | - A dictionary with the jumps as keys and the number of jumps which are forward and backward. 20 | ''' 21 | # Log the start of the process for computing jumps. 22 | logger.info("Computing jumps.") 23 | 24 | # Initialize dictionaries to hold jump data and direction information. 25 | op_dict = {'forward': [], 'backward': []} 26 | direc_list = ['forward', 'backward'] 27 | direc_dict = {'forward': {'count': 0}, 'backward': {'count': 0}} 28 | 29 | # Initialize a dictionary to hold the resulting direction and count data. 30 | ret_dict = {'Direction': direc_list, 'Count': []} 31 | 32 | # Iterate through each instruction in master_inst_dict. 33 | for entry in master_inst_dict: 34 | 35 | # Check if the instruction is a jump operation. 36 | if entry in ops_dict['jumps']: 37 | if str(entry.instr_name) == 'jalr': 38 | rs1 = str(entry.rs1[1]) + str(entry.rs1[0]) 39 | rd = str(entry.rd[1]) + str(entry.rd[0]) 40 | jump_value = entry.imm + int(consts.reg_file[rs1], 16) 41 | consts.reg_file[rd] = hex(int(entry.instr_addr) + 4) 42 | else: 43 | jump_value = entry.imm 44 | 45 | # Handle the case where jump_value is None or negative. 46 | if jump_value is None: 47 | if 'c.jr' in entry.instr_name or 'c.jalr' in entry.instr_name: 48 | rs1 = str(entry.rs1[1]) + str(entry.rs1[0]) 49 | jump_value = int(entry.instr_addr) + int(consts.reg_file[rs1], 16) 50 | if 'c.jalr' in entry.instr_name: 51 | consts.reg_file['x1'] = hex(int(entry.instr_addr) + 2) 52 | if jump_value < 0: 53 | op_dict['backward'].append(entry) 54 | direc_dict['backward']['count'] += 1 55 | else: 56 | op_dict['forward'].append(entry) 57 | direc_dict['forward']['count'] += 1 58 | 59 | 60 | # Update register values based on commit information. 61 | if (entry.reg_commit is not None): 62 | name = str(entry.reg_commit[0]) + str(entry.reg_commit[1]) 63 | if (name != 'x0'): 64 | consts.reg_file[name] = entry.reg_commit[2] 65 | 66 | # Reset register values. 67 | consts.reg_file = {f'x{i}': '0x00000000' for i in range(32)} 68 | 69 | # Log the completion of jump computation. 70 | logger.info('Done.') 71 | 72 | # Populate the result dictionary with direction and count information. 73 | ret_dict['Count'].append(direc_dict['forward']['count']) 74 | ret_dict['Count'].append(direc_dict['backward']['count']) 75 | 76 | # Return the resulting dictionary containing jump direction and count data. 77 | return ret_dict 78 | 79 | 80 | def jump_size(master_inst_dict: dict, ops_dict: dict, extension_used: list, config, cycle_accurate_config): 81 | ''' 82 | Computes the number of jumps in the program. 83 | 84 | Args: 85 | - master_inst_dict: A dict of InstructionEntry objects. 86 | - ops_dict: A dictionary containing the operations as keys and a list of 87 | InstructionEntry objects as values. 88 | - extension_used: A list of extensions used in the application. 89 | - config: A yaml with the configuration information. 90 | - cycle_accurate_config: A dyaml with the cycle accurate configuration information. 91 | 92 | 93 | Returns: 94 | - A dictionary with the jumps as keys and the number of jumps and jump size. 95 | 96 | ''' 97 | # Log the start of the process for computing jump size. 98 | logger.info("Computing jump size.") 99 | 100 | # Initialize dictionaries and lists to hold jump instruction data. 101 | jump_instr = {} # Dictionary to store information about jump instructions. 102 | target_address = [] # List to store target addresses for jumps. 103 | ret_dict = {'Instruction name':[],'count':[],'size':[]} # Dictionary to store return data. 104 | 105 | # Iterate through each instruction in master_inst_dict. 106 | for entry in master_inst_dict: 107 | # Check if the instruction is a jump operation. 108 | if entry in ops_dict['jumps']: 109 | instr = '' # Initialize instruction string. 110 | size = 0 # Initialize size of the jump. 111 | 112 | # Calculate the target address for the jump. 113 | if entry.imm is not None: 114 | if entry.instr_name == 'jalr': 115 | rs1 = f"{entry.rs1[1]}{entry.rs1[0]}" 116 | rd = f"{entry.rd[1]}{entry.rd[0]}" 117 | ta = int(consts.reg_file[rs1], 16) + int(entry.imm) 118 | instr = f"{entry.instr_name} {rd}, {entry.imm}({rs1})" 119 | consts.reg_file[rd] = hex(int(entry.instr_addr) + 4) 120 | else: 121 | jump_value = entry.imm 122 | ta = int(entry.instr_addr) + int(jump_value) 123 | if entry.instr_name == 'c.jal': 124 | instr = f"{entry.instr_name} {entry.imm}" 125 | consts.reg_file['x1'] = hex(int(entry.instr_addr) + 2) 126 | elif entry.instr_name == 'c.j': 127 | instr = f"{entry.instr_name} {entry.imm}" 128 | elif entry.instr_name == 'jal': 129 | rd = f"{entry.reg_commit[1]}{entry.reg_commit[0]}" 130 | instr = f"{entry.instr_name} {rd}, {entry.imm}" 131 | consts.reg_file['x1'] = hex(int(entry.instr_addr) + 4) 132 | elif entry.instr_name in {'c.jr', 'c.jalr'}: 133 | rs1 = f"{entry.rs1[1]}{entry.rs1[0]}" 134 | ta = int(consts.reg_file[rs1], 16) 135 | if 'c.jalr' in entry.instr_name: 136 | consts.reg_file['x1'] = hex(int(entry.instr_addr) + 2) 137 | instr = f"{entry.instr_name} {rs1}" 138 | else: 139 | logger.debug(f"Immediate value not found for: {entry}") 140 | 141 | # Calculate the size of the jump instruction. 142 | size = abs(int(entry.instr_addr) - ta) 143 | 144 | # Update jump_instr dictionary with jump information. 145 | if instr not in jump_instr or (hex(ta) not in target_address and str(size) not in jump_instr[instr]['size(bytes)']): 146 | jump_instr[instr] = {'count': 1, 'size(bytes)': str(size)} 147 | target_address.append(hex(ta)) 148 | else: 149 | jump_instr[instr]['count'] += 1 150 | 151 | # Update register values based on commit information. 152 | if entry.reg_commit is not None and entry.rd is not None: 153 | name = f"{entry.rd[1]}{entry.rd[0]}" 154 | if name != 'x0': 155 | consts.reg_file[name] = entry.reg_commit[2] 156 | 157 | # Reset register values. 158 | consts.reg_file = {f'x{i}': '0x00000000' for i in range(32)} 159 | # Populate the return dictionary with jump instruction data. 160 | ret_dict['Instruction name'] = list(jump_instr.keys()) 161 | ret_dict['count'] = [jump_instr[key]['count'] for key in jump_instr.keys()] 162 | ret_dict['size'] = [jump_instr[key]['size(bytes)'] for key in jump_instr.keys()] 163 | 164 | # Log the completion of jump size computation. 165 | logger.info('Done.') 166 | 167 | # Return the dictionary. 168 | return ret_dict 169 | 170 | 171 | 172 | -------------------------------------------------------------------------------- /riscv_application_profiler/plugins/instr_groups.py: -------------------------------------------------------------------------------- 1 | # See LICENSE for licensing information. 2 | 3 | # this file is a plugin for riscv_application_profiler 4 | # this file classifies instructions into groups based on the conditions defined by the user. 5 | 6 | from riscv_isac.log import * 7 | from riscv_application_profiler.consts import * 8 | import re 9 | from riscv_application_profiler import consts 10 | 11 | def group_by_operation(operations: list, isa, extension_list, master_inst_dict: dict, config, cycle_accurate_config): 12 | 13 | 14 | ''' 15 | Groups instructions based on the operation. 16 | 17 | Args: 18 | - operations: A list of operations to group by. 19 | - master_inst_dict: A dictionary of InstructionEntry objects. 20 | - isa: The ISA used in the application. 21 | - extension_list: A list of extensions used in the application. 22 | - config: A yaml with the configuration information. 23 | - cycle_accurate_config: A dyaml with the cycle accurate configuration information. 24 | 25 | Returns: 26 | - dictionaries containing grouped instructions and counts. 27 | 28 | ''' 29 | # Log the start of the process for grouping instructions by operation. 30 | logger.info("Grouping instructions by operation.") 31 | 32 | # Create a dictionary to hold instructions grouped by operation. 33 | op_dict = {f'{op}': {} for op in operations} 34 | 35 | # Create a dictionary to keep track of instruction counts per operation. 36 | ops_count = {f'{op}': {'counts': 0} for op in operations} 37 | 38 | # Create a dictionary to hold the resulting counts and operation names. 39 | ret_dict = {'Operation': [f'{op}' for op in operations], 'Counts': []} 40 | 41 | # Initialize a list to store extension-related instructions. 42 | extension_instruction_list = [] 43 | 44 | prev_instr_name = None 45 | prev_instr_addr = None 46 | # Iterate through the list of instructions in master_inst_dict. 47 | for entry in master_inst_dict: 48 | for extension in extension_list: 49 | for op in operations: 50 | try: 51 | # Check if the current instruction belongs to the specified operation. 52 | if entry.instr_name in ops_dict[isa][extension][op]: 53 | # Append the instruction to the corresponding operation group. 54 | if cycle_accurate_config != None: 55 | matched = False 56 | for inst in cycle_accurate_config['cycles']['instructions_cycles']: 57 | if re.match(inst, entry.instr_name) != None: 58 | # assigning latency to instructions 59 | op_dict[op][entry] = cycle_accurate_config['cycles']['instructions_cycles'][inst]['latency'] 60 | master_inst_dict[entry] = cycle_accurate_config['cycles']['instructions_cycles'][inst]['latency'] 61 | 62 | if prev_instr_addr != entry.instr_addr and prev_instr_name == entry.instr_name: 63 | # checking if curent instr is equal to prev instr in case it can be parallelised 64 | if (op_dict[op][prev_instr] - cycle_accurate_config['cycles']['instructions_cycles'][inst]['throughput'] > 0): 65 | op_dict[op][entry] -= op_dict[op][prev_instr] - cycle_accurate_config['cycles']['instructions_cycles'][inst]['throughput'] 66 | master_inst_dict[entry] -= master_inst_dict[prev_instr] - cycle_accurate_config['cycles']['instructions_cycles'][inst]['throughput'] 67 | 68 | #DEBUG 69 | # if 'rem' in prev_instr_name or 'div' in prev_instr_name: 70 | # op_dict[op][entry] += 1 71 | # master_inst_dict[entry] += 1 72 | 73 | prev_instr = entry 74 | prev_instr_name = entry.instr_name 75 | prev_instr_addr = entry.instr_addr 76 | 77 | matched = True 78 | break 79 | if matched == False: 80 | op_dict[op][entry] = 1 81 | master_inst_dict[entry] = 1 82 | 83 | #DEBUG 84 | # if 'rem' in prev_instr_name or 'div' in prev_instr_name: 85 | # op_dict[op][entry] += 1 86 | # master_inst_dict[entry] += 1 87 | 88 | prev_instr = entry 89 | prev_instr_name = entry.instr_name 90 | prev_instr_addr = entry.instr_addr 91 | 92 | 93 | else: 94 | op_dict[op][entry]=1 95 | 96 | # Increment the instruction count for the operation. 97 | ops_count[op]['counts'] += 1 98 | 99 | # Append the instruction to the extension instruction list. 100 | extension_instruction_list.append(entry) 101 | except KeyError as e: 102 | # Handle the case where the extension is not supported. 103 | logger.error(f'Extension {e} not supported.') 104 | exit(1) 105 | 106 | # Populate the 'Counts' field in the ret_dict with the instruction counts per operation. 107 | ret_dict['Counts'] = [len(op_dict[op]) for op in operations] 108 | # Log the completion of the computation. 109 | logger.info("Done") 110 | 111 | # Return the resulting dictionaries containing grouped instructions and counts. 112 | return (ret_dict,extension_instruction_list,op_dict) 113 | 114 | 115 | def privilege_modes(log,config): 116 | ''' 117 | Computes the privilege modes. 118 | 119 | Args: 120 | - log: The path to the log file. 121 | 122 | Returns: 123 | - A list of privilege modes. 124 | - A dictionary with the privilege modes as keys and the number of instructions in each group as values. 125 | ''' 126 | # Log the start of the process for computing privilege modes. 127 | logger.info("Computing privilege modes.") 128 | privilege_mode_regex = config['profiles']['cfg']['privilege_mode_regex'] 129 | 130 | # List of privilege modes to track: user, supervised, and machine. 131 | mode_list = ['user', 'supervised', 'machine'] 132 | 133 | # Initialize a dictionary to track the counts of privilege modes. 134 | mode_dict = {'user': {'count': 0}, 'supervised': {'count': 0}, 'machine': {'count': 0}} 135 | 136 | # Initialize a dictionary to hold the resulting counts and privilege mode names. 137 | ret_dict = {'Privilege Mode': mode_list, 'Counts': []} 138 | 139 | # Open the specified log file for reading. 140 | with open(log, 'r') as log_file: 141 | # Iterate through each line in the log file. 142 | for line in log_file: 143 | # Attempt to match the line against the privilege mode regex pattern. 144 | match = re.match(privilege_mode_regex, line) 145 | if match is not None: 146 | # Extract the privilege mode value from the regex match. 147 | x = int(match.group(1)) 148 | if x is not None: 149 | # Update the counts for each privilege mode based on the extracted value. 150 | if x == 0: 151 | mode_dict['user']['count'] += 1 152 | elif x == 1: 153 | mode_dict['supervised']['count'] += 1 154 | elif x == 3: 155 | mode_dict['machine']['count'] += 1 156 | 157 | # Populate the 'Counts' field in the ret_dict with the privilege mode counts. 158 | ret_dict['Counts'] = [mode_dict[mode]['count'] for mode in mode_list] 159 | 160 | # Log the completion of the privilege mode computation. 161 | logger.info('Done.') 162 | 163 | # Return the resulting dictionary containing privilege mode counts. 164 | return ret_dict -------------------------------------------------------------------------------- /riscv_application_profiler/plugins/branch_ops.py: -------------------------------------------------------------------------------- 1 | # See LICENSE for licensing information. 2 | 3 | # this file is a plugin for riscv_application_profiler 4 | # this file classifies instructions into groups based on +ve/-ve branch offsets. 5 | # this file classifies instructions into 'long' and 'short' branches based on branch offsets. 6 | 7 | from riscv_isac.log import * 8 | from riscv_application_profiler.consts import * 9 | import riscv_application_profiler.consts as consts 10 | import statistics 11 | import pprint as pp 12 | 13 | def compute_threshold(master_inst_dict: dict, ops_dict: dict) -> int: 14 | ''' 15 | compute the mean plus two standard deviations as the threshold 16 | 17 | Args: 18 | - master_inst_dict: A dictionary of InstructionEntry objects. 19 | - ops_dict: A dictionary containing the operations as keys and a list of InstructionEntry objects as values. 20 | ''' 21 | 22 | # compute the list of branch offsets from the master_inst_dict where each entry has an imm field 23 | branch_offsets = [entry.imm for entry in ops_dict['branches'] if entry.imm is not None] 24 | 25 | # compute the mean and standard deviation of the branch offsets 26 | if len(branch_offsets) == 0: 27 | return 0 28 | mean = statistics.mean(branch_offsets) 29 | std_dev = statistics.stdev(branch_offsets) 30 | 31 | # compute the threshold as the mean plus two standard deviations 32 | threshold = mean + 2*std_dev 33 | 34 | return int(threshold) 35 | 36 | def group_by_branch_offset(master_inst_dict: dict, ops_dict: dict, extension_used: list, config, cycle_accurate_config): 37 | ''' 38 | Groups instructions based on the branch offset. 39 | 40 | Args: 41 | - master_inst_dict: A dictionary of InstructionEntry objects. 42 | - branch_threshold: The threshold for a branch to be considered 'long'. 43 | - ops_dict: A dictionary containing the operations as keys and a list of InstructionEntry objects as values. 44 | - extension_used: A list of extensions used in the application. 45 | - config: A yaml with the configuration information. 46 | - cycle_accurate_config: A dyaml with the cycle accurate configuration information. 47 | 48 | Returns: 49 | - A dictionary with the branch offset sizes and count as keys and values respectively. 50 | ''' 51 | # Logging the grouping process 52 | logger.info("Grouping instructions by branch offset.") 53 | 54 | branch_threshold = compute_threshold(master_inst_dict, ops_dict) 55 | 56 | # Initializing dictionaries and lists 57 | size_list = ['long', 'short'] 58 | size_dict = {size: {'count': 0} for size in size_list} 59 | ret_dict = {'Offset Size': size_list, 'Count': []} 60 | 61 | # loop though the branch instructions 62 | for entry in ops_dict['branches']: 63 | if entry.imm is None: 64 | continue 65 | # Determine whether the branch is long or short based on the threshold 66 | size = 'short' if entry.imm < branch_threshold else 'long' 67 | size_dict[size]['count'] += 1 68 | 69 | # Logging completion of the grouping process 70 | logger.info('Done.') 71 | 72 | # Appending the counts to the result dictionary 73 | ret_dict['Count'].append(size_dict['long']['count']) 74 | ret_dict['Count'].append(size_dict['short']['count']) 75 | 76 | # Return the final results 77 | return ret_dict 78 | 79 | 80 | def group_by_branch_sign(master_inst_dict: dict, ops_dict: dict, extension_used: list, config, cycle_accurate_config): 81 | ''' 82 | Groups instructions based on the sign bit of the branch offset. 83 | 84 | Args: 85 | - master_inst_dict: A dictionary of InstructionEntry objects. 86 | - ops_dict: A dictionary with the operations as keys and a list of InstructionEntry. 87 | - extension_used: A list of extensions used in the application. 88 | - config: A yaml with the configuration information. 89 | - cycle_accurate_config: A dyaml with the cycle accurate configuration information. 90 | 91 | 92 | Returns: 93 | -A list of directions, which in this case are 'positive' and 'negative'. 94 | A dictionary direc_dict containing the counts of instructions in each direction. 95 | The keys are 'positive' and 'negative', and the values are dictionaries containing the 96 | 'count' of instructions with positive and negative branch offsets. 97 | 98 | ''' 99 | # Logging the grouping process 100 | logger.info("Grouping instructions by branch offset sign.") 101 | 102 | # Initializing dictionaries and lists 103 | direc_list = ['positive', 'negative'] 104 | direc_dict = {direc: {'count': 0} for direc in direc_list} 105 | ret_dict = {'Direction': direc_list, 'Count': []} 106 | 107 | # Loop through branch instructions 108 | for entry in ops_dict['branches']: 109 | if entry.imm is None: 110 | continue 111 | # Determine whether the branch offset is positive or negative 112 | direction = 'positive' if entry.imm >= 0 else 'negative' 113 | direc_dict[direction]['count'] += 1 114 | 115 | # Logging completion of the grouping process 116 | logger.info('Done.') 117 | 118 | # Appending the counts to the result dictionary 119 | ret_dict['Count'].append(direc_dict['positive']['count']) 120 | ret_dict['Count'].append(direc_dict['negative']['count']) 121 | 122 | # Return the final results 123 | return ret_dict 124 | 125 | 126 | 127 | def loop_compute (master_inst_dict: dict, ops_dict: dict, extension_used: list, config, cycle_accurate_config): 128 | ''' 129 | Groups instructions based on the branch offset. 130 | 131 | Args: 132 | - master_inst_dict: A dictionary of InstructionEntry objects. 133 | - ops_dict: A dictionary with the operations as keys and a list of InstructionEntry. 134 | - extension_used: A list of extensions used in the application. 135 | - config: A yaml with the configuration information. 136 | - cycle_accurate_config: A dyaml with the cycle accurate configuration information. 137 | 138 | Returns: 139 | - A dictionary loop_instr containing the counts of instructions in each loop. 140 | The keys are the branch instructions, and the values are dictionaries containing the 141 | 'target address', 'depth', 'count' and 'size' of the loop. 142 | ''' 143 | # Logging the loop computation process 144 | logger.info("Computing loops.") 145 | 146 | # Initializing dictionaries, lists, and result dictionary 147 | loop_instr = {} 148 | target_address = {} 149 | loop_list = [] 150 | ret_dict = {'Branch Instruction': loop_list, 'Depth': [], 'Count': [], 'Size(bytes)': []} 151 | 152 | # Loop through branch instructions 153 | for entry in ops_dict['branches']: 154 | if entry.imm is None: 155 | continue 156 | # Determine the instruction and its target address 157 | if entry.rs2 is not None: 158 | instr = f"{entry.instr_name} {entry.rs1[1]}{entry.rs1[0]},{entry.rs2[1]}{entry.rs2[0]}" 159 | else: 160 | instr = f"{entry.instr_name} {entry.rs1[1]}{entry.rs1[0]}" 161 | ta = int(entry.instr_addr) + int(entry.imm) 162 | 163 | # Update loop information in the dictionaries 164 | if instr not in loop_instr or hex(ta) not in target_address.get(instr, []): 165 | loop_instr[instr] = {'depth': 1, 'count': 1, 'size(bytes)': abs(int(entry.instr_addr) - ta)} 166 | target_address.setdefault(instr, []).append(hex(ta)) 167 | else: 168 | loop_instr[instr]['count'] = loop_instr[instr]['count'] + 1 169 | 170 | # Calculate the number of loops 171 | number_of_loops = len(loop_instr) 172 | 173 | # Initialize loop_list based on conditions 174 | loop_list = list(loop_instr.keys()) 175 | for i in range(number_of_loops - 1): 176 | if loop_list[i + 1] < loop_list[i]: 177 | loop_instr[loop_list[i + 1]]['depth'] = loop_instr[loop_list[i]]['depth'] + 1 178 | 179 | # Populate the ret_dict with loop information 180 | for i in range(number_of_loops): 181 | ret_dict['Branch Instruction'].append(loop_list[i]) 182 | ret_dict['Depth'].append(loop_instr[loop_list[i]]['depth']) 183 | ret_dict['Count'].append(loop_instr[loop_list[i]]['count']) 184 | ret_dict['Size(bytes)'].append(loop_instr[loop_list[i]]['size(bytes)']) 185 | 186 | # Logging completion of the loop computation process 187 | logger.info('Done.') 188 | 189 | # Return the final results 190 | return ret_dict 191 | 192 | -------------------------------------------------------------------------------- /docs/plugins.rst: -------------------------------------------------------------------------------- 1 | Plugins Functions 2 | =========================== 3 | 4 | Instruction Groups 5 | ------------ 6 | 7 | By categorizing instructions based on their type, 8 | developers can identify which types of instructions are 9 | executed most frequently. This information can help 10 | identify performance bottlenecks in the code and guide 11 | optimization efforts 12 | 13 | By analyzing the instruction mix, developers can identify 14 | areas of the code where optimizations can be made. For 15 | example, if load instructions are executed more 16 | frequently than store instructions, it may be possible to 17 | optimize the code by reducing the number of load 18 | instructions or by using more efficient data structures 19 | 20 | By comparing instruction mixes from different runs of the 21 | same code, developers can track changes in performance 22 | over time. This can help identify performance regressions 23 | and ensure that optimizations are having the desired 24 | effect 25 | 26 | In some architectures, such as RISC-V, profiling hardware 27 | events can provide insight into the code execution 28 | behavior on various micro-architectural units. By 29 | categorizing instructions based on their type, developers 30 | can gain insights into how different types of instructions 31 | affect hardware performance 32 | 33 | 34 | Privilege Modes 35 | ------------ 36 | 37 | By categorizing instructions based on their privilege 38 | mode, developers can identify which privilege modes are 39 | executed most frequently and which ones take the 40 | longest time to execute. This information can help 41 | identify performance bottlenecks in the code and guide 42 | optimization efforts. 43 | 44 | By analyzing the instruction mix based on privilege 45 | modes, developers can identify areas of the code where 46 | optimizations can be made. For example, if a large 47 | number of instructions are executed in machine mode, it 48 | may be possible to optimize the code by reducing the 49 | number of machine mode instructions or by using more 50 | efficient algorithms 51 | 52 | In RISC-V, profiling hardware events can provide insight 53 | into the code execution behavior on various microarchitectural units. By categorizing instructions based on 54 | their privilege mode, developers can gain insights into 55 | how different privilege modes affect hardware 56 | performance 57 | 58 | By providing a clear separation between privileged and 59 | non-privileged instructions, developers can identify and 60 | fix issues more quickly and easily. This can help in 61 | debugging and diagnosing problems with the operating 62 | system and applications 63 | 64 | 65 | Grouping Branches by Offset Size 66 | ------------ 67 | 68 | The "Grouping Branches by Offset Size" serves as a 69 | valuable profiling tool for understanding the behavior of 70 | branch instructions within a program. This metric 71 | essentially categorizes branches into different groups 72 | based on the size of their offset, which is the numerical 73 | distance between the branch instruction and its target 74 | destination. 75 | 76 | For instance, a scenario where a program exhibits a high 77 | frequency of branches with small offset sizes could imply 78 | that the program frequently jumps to nearby instructions. 79 | This pattern might lead to increased pipeline stalls and 80 | reduced overall execution efficiency, as the processor has 81 | to frequently change its execution path. Conversely, when 82 | a program has a notable number of branches with large 83 | offset sizes, it suggests that the program is frequently 84 | making longer jumps to more distant instructions. This 85 | behavior can also negatively influence performance due 86 | to the potential disruption of the processor's instruction 87 | fetching and execution pipelines. 88 | 89 | Analyzing the "Grouping Branches by Offset Size" metric 90 | offers developers a window into areas of the code that 91 | might benefit from optimization. For example, if a 92 | substantial number of small offset branches are detected, 93 | it could indicate opportunities to consolidate code 94 | segments or use techniques like loop unrolling to reduce 95 | the frequency of branching. 96 | 97 | Similarly, addressing excessive large offset branches 98 | might prompt developers to reorganize the code to 99 | minimize the need for distant jumps, thus enhancing 100 | execution speed 101 | 102 | 103 | Grouping Branches by Direction 104 | ------------ 105 | 106 | By grouping branches based on their direction, 107 | developers can identify which types of branches are 108 | executed most frequently and which ones take the 109 | longest time to execute. This information can help 110 | identify performance bottlenecks in the code and guide 111 | optimization efforts 112 | 113 | By analyzing the branch mix based on sign, developers 114 | can identify areas of the code where optimizations can be 115 | made. For example, if a large number of branches are 116 | taken when the sign is negative, it may be possible to 117 | optimize the code by reducing the number of negative 118 | branches or by using more efficient algorithms 119 | 120 | In RISC-V, profiling hardware events can provide insight 121 | into the code execution behavior on various microarchitectural units. By grouping branches based on their 122 | sign, developers can gain insights into how different 123 | types of branches affect hardware performance 124 | 125 | By providing a clear separation between branches based 126 | on their sign, developers can identify and fix issues more 127 | quickly and easily. This can help in debugging and 128 | diagnosing problems with the operating system and 129 | applications 130 | 131 | 132 | Nested Loops 133 | ------------ 134 | 135 | The "Nested Loop Computation" metric provides insights 136 | into the performance characteristics of nested loops 137 | within a program. Nested loops are a common 138 | programming construct where one loop is contained 139 | within another. These loops can significantly impact 140 | program performance, and analyzing the "Nested Loop 141 | Computation" metric helps developers understand and 142 | optimize these loop structures. 143 | 144 | Nested loops can lead to repeated execution of the inner 145 | loop code, potentially causing a significant computational 146 | load. By measuring the "Nested Loop Computation" 147 | metric, developers can identify which loops are nested 148 | and gain insights into how many times the inner loop is 149 | executed. This information highlights potential 150 | performance bottlenecks arising from inefficient loop 151 | structures. 152 | 153 | Resource Utilization: Nested loops can strain the 154 | resources of the processor, memory hierarchy, and 155 | caches due to frequent memory accesses and 156 | computational demands. Profiling the "Nested Loop 157 | Computation" metric can help in assessing how 158 | effectively these resources are utilized and whether 159 | improvements in memory access patterns or cache usage 160 | are needed. 161 | 162 | Optimization Opportunities: Analyzing the "Nested Loop 163 | Computation" metric can reveal optimization 164 | opportunities. Developers can explore strategies like loop 165 | fusion (combining nested loops with similar iteration 166 | counts), loop unrolling (reducing loop overhead by 167 | processing multiple loop iterations at once), and 168 | optimizing data access patterns within the nested loops. 169 | These optimizations can lead to reduced execution time 170 | and improved program efficiency. 171 | 172 | Parallelism Potential: Depending on the independence of 173 | computations within nested loops, developers might 174 | identify opportunities for parallel execution using 175 | techniques like multithreading or SIMD (Single 176 | Instruction, Multiple Data) vectorization. Profiling the 177 | nested loop metric helps in determining whether such 178 | parallelism can be effectively exploited. 179 | 180 | Algorithmic Analysis: Sometimes, the presence of deeply 181 | nested loops can indicate inefficient algorithmic choices. 182 | By analyzing the "Nested Loop Computation" metric, 183 | developers can assess whether alternative algorithms or 184 | algorithmic improvements could lead to better overall 185 | performance 186 | 187 | 188 | Grouping Jumps by Direction 189 | ------------ 190 | 191 | The "Jumps Direction" metric provides valuable insights 192 | into the distribution and behavior of jump instructions 193 | within a program based on their directions, i.e., whether 194 | the jumps are forward or backward in terms of memory 195 | addresses. This metric focuses specifically on 196 | understanding the control flow patterns and potential 197 | performance implications associated with the jump 198 | instructions. 199 | 200 | Control Flow Analysis: By categorizing jump instructions 201 | into forward and backward jumps, developers can 202 | understand the structure and complexity of a program's 203 | control flow. Forward jumps typically indicate regular 204 | program execution, while backward jumps might indicate 205 | loop structures or other instances where the program is 206 | revisiting previous instructions. 207 | 208 | Loop Identification: Backward jumps often correspond to 209 | loop constructs in the code. Analyzing the distribution 210 | and frequency of these backward jumps can help 211 | developers identify loops and understand their 212 | characteristics. This is crucial for optimizing loops, as 213 | they often represent hotspots where performance 214 | improvements can have a significant impact on overall 215 | execution time. 216 | 217 | Code Layout Optimization: Understanding the 218 | distribution of forward and backward jumps can provide 219 | insights into the placement of code in memory. 220 | Minimizing the number of backward jumps or strategically 221 | arranging instructions can help reduce branch 222 | mispredictions and improve the efficiency of instruction 223 | fetching and execution. 224 | 225 | Optimization Opportunities: By studying the jump 226 | directions, developers can identify opportunities to 227 | optimize code. For instance, loops with high-frequency 228 | backward jumps might be candidates for loop unrolling or 229 | other loop optimization techniques to reduce branch 230 | overhead and improve instruction-level parallelism. 231 | 232 | 233 | Grouping Jumps by Jump size 234 | ------------ 235 | The "Jumps Size" metric provides insights into the 236 | distances that the program's jump instructions cover 237 | when transitioning from one part of the code to another. 238 | This metric focuses specifically on the size of the jumps, 239 | which refers to the numerical difference between the 240 | source and target addresses of jump instructions, often 241 | measured in terms of instructions or bytes. 242 | 243 | Branching Behavior: Different jump sizes can indicate 244 | various types of branching behavior. Small jump sizes 245 | may suggest tight loops or frequently executed code 246 | segments, while large jump sizes might indicate less 247 | frequent transitions between more distant parts of the 248 | program. This information is crucial for optimizing branch 249 | prediction mechanisms and mitigating the effects of 250 | mispredicted branches. 251 | 252 | Performance Bottlenecks: Unusually large jump sizes may 253 | highlight potential performance bottlenecks. These could 254 | be caused by jumps to distant code regions that might 255 | result in cache misses, pipeline stalls, or other 256 | inefficiencies. Identifying such bottlenecks can guide 257 | developers in reorganizing code or applying optimization 258 | techniques to minimize the impact of these large jumps. 259 | Function Call Patterns: The "Jumps Size" metric can 260 | provide insights into function call patterns. 261 | 262 | Frequent small jumps could indicate the presence of 263 | short and frequently called functions, while occasional 264 | large jumps may point to functions with longer code 265 | bodies. Optimizing the layout of frequently used 266 | functions can lead to better cache utilization and 267 | reduced instruction fetch latencies. 268 | 269 | Profiling for Optimization: Analyzing the "Jumps Size" 270 | metric can help developers identify opportunities for 271 | code optimization. For instance, if a certain range of jump 272 | sizes is observed frequently, it might be worth 273 | investigating if those transitions can be made more 274 | efficient by reordering code, introducing inline functions, 275 | or applying loop transformations. 276 | 277 | 278 | Register Usage 279 | ------------ 280 | The "Analysis of Registers" metric pertains to the 281 | examination of register usage within a program. In RISC-V 282 | architecture, registers are small storage units within the 283 | CPU used to hold temporary data and operands during 284 | program execution. Analyzing register usage can provide 285 | valuable insights into how a program utilizes registers and 286 | can help developers identify potential areas for 287 | optimization and performance improvement. 288 | 289 | Identifying Hotspots: Registers that are frequently read 290 | from or written to can indicate hotspots in the code. 291 | Hotspots are sections of code that are executed 292 | frequently and have a significant impact on overall 293 | performance. By focusing optimization efforts on these 294 | hotspots, developers can achieve substantial 295 | performance gains. 296 | 297 | Resource Balancing: Profiling register reads and writes 298 | can aid in resource balancing within the processor. 299 | Modern processors have limited resources, and 300 | understanding how registers are utilized can help balance 301 | other resources like execution units, cache utilization, 302 | and memory bandwidth. 303 | 304 | Compiler Optimization: Profiling register usage provides 305 | valuable information to compilers for making 306 | optimization decisions. Compilers can use this 307 | information to perform register allocation, instruction 308 | scheduling, and other transformations to improve code 309 | efficiency 310 | 311 | 312 | Read After Write 313 | ------------ 314 | The RAW metric helps in profiling by identifying 315 | situations where a read operation follows a write 316 | operation to the same location. This indicates a potential 317 | data dependency, where the result of a write operation is 318 | needed for a subsequent read operation. 319 | 320 | Dependency Analysis: By tracking RAW dependencies, 321 | developers can identify instructions that are 322 | interdependent due to their order of execution. These 323 | dependencies can impact the order in which instructions 324 | can be executed in parallel, potentially leading to stalls 325 | and inefficiencies in the pipeline. 326 | 327 | Pipeline Stalls: When a read operation follows a write 328 | operation to the same location, the processor needs to 329 | ensure that the write operation is completed before the 330 | read operation can proceed. This can introduce pipeline 331 | stalls, where the processor has to wait for the write data 332 | to be available before it can continue executing 333 | subsequent instructions. Identifying and minimizing such 334 | stalls can significantly improve pipeline efficiency. 335 | 336 | Out-of-Order Execution: Modern processors often 337 | employ techniques like out-of-order execution to 338 | mitigate the impact of data dependencies. However, 339 | excessive RAW dependencies can still limit the 340 | effectiveness of these techniques. Profiling RAW 341 | dependencies can help developers understand the 342 | limitations of out-of-order execution and find 343 | opportunities to reorder instructions for better 344 | performance. 345 | 346 | Instruction Scheduling: By analyzing the RAW metric, 347 | developers can make informed decisions about 348 | instruction scheduling. This involves reordering 349 | instructions to maximize parallel execution while 350 | minimizing the impact of data dependencies. Strategic 351 | scheduling can lead to better resource utilization and 352 | improved overall program performance. 353 | 354 | Register Allocation: In architectures with limited 355 | registers, managing RAW dependencies becomes crucial 356 | for efficient register allocation. By identifying where 357 | registers are being overwritten and immediately read 358 | afterward, developers can make decisions about register 359 | usage and potentially optimize the register allocation 360 | strategy. 361 | 362 | 363 | Store Load Bypass 364 | ------------ 365 | The "Store Load Bypass" metric plays a crucial role in 366 | profiling and optimizing programs by providing insights 367 | into memory access patterns and potential performance 368 | bottlenecks. This metric refers to the behavior of the 369 | processor's memory subsystem when it encounters a 370 | sequence of instructions that involve both storing data 371 | into memory and subsequently loading that data back 372 | from memory. 373 | 374 | In a RISC-V processor, a store-load bypass occurs when a 375 | load instruction depends on a preceding store instruction 376 | that has not yet completed. The bypass mechanism 377 | allows the load instruction to fetch the stored data 378 | directly from the internal data path, bypassing the 379 | memory hierarchy. This can prevent unnecessary delays 380 | that would have occurred if the load instruction had to 381 | wait for the store instruction to fully commit to memory 382 | before retrieving the data 383 | 384 | A high frequency of store-load bypasses can indicate 385 | potential performance bottlenecks. If loads are 386 | frequently stalled due to pending stores, the processor's 387 | execution pipeline could experience significant delays. 388 | This might highlight areas in the code where the 389 | frequency of stores and loads could be optimized to 390 | reduce such stalls. 391 | 392 | Dependency Analysis: By studying the occurrence of 393 | store-load bypasses, developers can identify 394 | dependencies between store and load instructions. This 395 | understanding can guide them in rearranging code or 396 | using memory access optimizations like prefetching to 397 | reduce the impact of these dependencies on overall 398 | execution speed. 399 | 400 | Memory Access Patterns: The metric can reveal patterns 401 | in memory access behavior. For example, frequent storeload bypasses might suggest that the program is 402 | modifying data and then quickly accessing it again, which 403 | could provide insights into potential opportunities for 404 | caching or buffering mechanisms. 405 | 406 | Cache Utilization: The presence of frequent store-load 407 | bypasses could also point to potential inefficiencies in 408 | cache utilization. Addressing these inefficiencies might 409 | involve adjusting cache parameters or reconsidering the 410 | order of memory accesses to minimize conflicts and 411 | improve cache hit rates. 412 | 413 | Compiler Optimizations: Profiling store-load bypasses 414 | can inform compiler optimizations. The compiler might be 415 | able to reorder instructions to minimize the impact of 416 | dependencies, or even employ advanced techniques like 417 | software pipelining to overlap memory accesses and 418 | computations more effectively 419 | 420 | 421 | Data Cache Utilization 422 | ------------ 423 | The "Data Cache" metric pertains to the behavior and 424 | efficiency of the data cache, which is a crucial 425 | component of the memory hierarchy in modern 426 | processors. This metric provides insights into how 427 | effectively the processor's data cache is being utilized by 428 | a program and can play a significant role in profiling and 429 | optimizing the program's performance. 430 | 431 | Here's how the "Data Cache" metric in RISC-V helps in 432 | profiling: 433 | 434 | Cache Hit Rate Analysis: The metric helps in tracking the 435 | cache hit rate, which indicates how often the processor 436 | successfully retrieves data from the cache without 437 | needing to access main memory. A high cache hit rate 438 | suggests that the data cache is effectively storing 439 | frequently accessed data, leading to improved execution 440 | speed. Conversely, a low hit rate may point to cache 441 | inefficiencies or poor memory access patterns. 442 | 443 | Cache Misses: By monitoring cache misses, developers 444 | can identify instances where data requested by the 445 | program is not present in the cache and must be fetched 446 | from main memory. Frequent cache misses can lead to 447 | performance bottlenecks, as accessing main memory is 448 | much slower than accessing the cache. 449 | 450 | Cache Line Utilization: This metric can help in 451 | understanding how effectively cache lines are utilized. 452 | Cache lines are the smallest units of data that the cache 453 | stores. If a program frequently only uses a small portion 454 | of a cache line, it might lead to inefficient cache usage, 455 | and optimization strategies such as data padding or 456 | rearrangement might be necessary. 457 | 458 | 459 | Instruction Cache Utilization 460 | ------------ 461 | The "Instruction Cache Utilization" is a valuable tool for 462 | understanding how efficiently the instruction cache of a 463 | processor is being utilized by a program. The instruction 464 | cache is a small, fast memory component that stores 465 | frequently used instructions, allowing the processor to 466 | fetch and execute them quickly without having to access 467 | the slower main memory. 468 | 469 | The utilization of the instruction cache is crucial for 470 | achieving high performance in a program, as cache hits 471 | (when the required instruction is found in the cache) 472 | result in faster execution, while cache misses (when the 473 | instruction is not in the cache and needs to be fetched 474 | from main memory) lead to performance slowdowns due 475 | to longer memory access times. 476 | 477 | The "Instruction Cache Utilization" metric provides 478 | insights into how effectively the cache is being used by a 479 | program, and it can help in profiling in the following ways: 480 | Cache Hit Rate Analysis: By monitoring the instruction 481 | cache utilization, developers can determine the 482 | percentage of instructions that are found in the cache 483 | when needed. A high cache hit rate indicates that the 484 | program is efficiently using the cache, resulting in faster 485 | execution. Conversely, a low hit rate suggests that the 486 | cache might not be adequately sized for the program's 487 | working set or that the program's memory access 488 | patterns are not cache-friendly. 489 | 490 | Cache Miss Analysis: Alongside the hit rate, analyzing the 491 | cache miss rate is equally important. A high cache miss 492 | rate suggests that the cache is frequently being 493 | bypassed, leading to more memory accesses and longer 494 | execution times. Profiling cache misses can help identify 495 | specific code sections or memory access patterns that 496 | are causing cache inefficiencies. 497 | 498 | Optimization Targets: Understanding instruction cache 499 | utilization guides developers to optimize their code to 500 | enhance cache efficiency. Techniques such as code 501 | reordering, loop unrolling, and optimizing memory access 502 | patterns can help reduce cache misses and improve 503 | overall performance. 504 | 505 | Cache Size Evaluation: The "Instruction Cache Utilization" 506 | metric can also aid in evaluating whether the current size 507 | of the instruction cache is sufficient for the program's 508 | needs. If the cache is frequently being thrashed (high 509 | miss rate), it might indicate that the cache is too small to 510 | accommodate the program's working set of instructions, 511 | necessitating a larger cache size. 512 | 513 | Profiling for Different Architectures: Different RISC-V 514 | processors might have varying cache sizes and 515 | configurations. Profiling instruction cache utilization 516 | helps tailor code optimization strategies to the specific 517 | cache characteristics of the target architecture. 518 | 519 | 520 | CSR Histogram 521 | ------------ 522 | A histogram that provides information about the usage of 523 | control and status registers. 524 | 525 | Identifying CSR usage: A CSR histogram can help identify 526 | which control and status registers are being accessed 527 | most frequently during program execution. This 528 | information can be valuable in understanding the 529 | behavior of the program and identifying potential 530 | bottlenecks or areas for optimization. 531 | 532 | Analyzing performance impact: By analyzing the CSR 533 | histogram, developers can gain insights into how the 534 | usage of control and status registers affects the 535 | performance of the program. This can help in identifying 536 | areas where the program may be spending excessive time 537 | accessing CSRs and optimizing those sections of code to 538 | improve overall performance. 539 | 540 | Comparing CSR usage: By comparing CSR histograms 541 | from different runs of the same code or different versions 542 | of the program, developers can track changes in CSR 543 | usage over time. This can help identify any unexpected 544 | changes in behavior and guide optimization efforts. 545 | 546 | 547 | Repeating Sequences 548 | ------------ 549 | Identifying code patterns: By finding repeating sequences 550 | of instructions, developers can identify common patterns 551 | in the code. This can provide insights into the structure 552 | and behavior of the program, helping to understand its 553 | overall design and logic. 554 | 555 | Optimizing code: Analyzing repeating instruction 556 | sequences can help identify areas where code 557 | optimizations can be applied. By optimizing frequently 558 | executed sequences, developers can improve the overall 559 | performance of the program. This may involve reducing 560 | the number of instructions, optimizing memory access 561 | patterns, or applying algorithmic improvements. 562 | 563 | Identifying hotspots: Repeating instruction sequences 564 | often indicate hotspots in the code, where a significant 565 | amount of time is spent during program execution. By 566 | identifying these hotspots, developers can focus their 567 | optimization efforts on the most critical parts of the 568 | code, leading to more effective performance 569 | improvements. 570 | 571 | Profiling hardware events: Identifying repeating 572 | instruction sequences can provide insights into the 573 | behavior of the program on the underlying hardware. This 574 | information can be used to profile hardware events and 575 | understand how different instructions impact the 576 | performance of the processor. -------------------------------------------------------------------------------- /riscv_application_profiler/consts.py: -------------------------------------------------------------------------------- 1 | ops_dict = { 2 | "RV32": { 3 | "I": { 4 | "loads": [ 5 | "lb", 6 | "lbu", 7 | "lh", 8 | "lhu", 9 | "lw", 10 | ], 11 | "stores": ["sb", "sh", "sw"], 12 | "imm computes": [ 13 | "addi", 14 | "andi", 15 | "ori", 16 | "xori", 17 | "slti", 18 | "sltiu", 19 | "auipc", 20 | "lui", 21 | ], 22 | "imm shifts": ["slli", "srli", "srai"], 23 | "reg computes": ["add", "sub", "slt", "sltu", "xor", "or", "and"], 24 | "reg shifts": ["sll", "srl", "sra"], 25 | "jumps": ["jal", "jalr"], 26 | "branches": ["bge", "bgeu", "blt", "bltu", "beq", "bne"], 27 | "compares":[], 28 | "conversions":[], 29 | "moves":[], 30 | "classifies":[], 31 | "csrs":[], 32 | "fence":["fence","fence.i"], 33 | }, 34 | "M": { 35 | "loads": [], 36 | "stores": [], 37 | "imm computes": [], 38 | "imm shifts": [], 39 | "reg computes": [ 40 | "div", 41 | "divu", 42 | "mul", 43 | "mulh", 44 | "mulhsu", 45 | "mulhu", 46 | "rem", 47 | "remu", 48 | ], 49 | "reg shifts": [], 50 | "jumps": [], 51 | "branches": [], 52 | "compares":[], 53 | "conversions":[], 54 | "moves":[], 55 | "classifies":[], 56 | "csrs":[], 57 | "fence":[], 58 | }, 59 | "F": { 60 | "loads": ["flw","flwsp","fld","fldsp"], 61 | "stores": ["fsw","fswsp","fsd","fsdsp"], 62 | "imm computes": [], 63 | "imm shifts": [], 64 | "reg computes": [ 65 | "fmadd.s", 66 | "fmsub.s", 67 | "fadd.s", 68 | "fsub.s", 69 | "fmul.s", 70 | "fdiv.s", 71 | "fmin.s", 72 | "fmax.s", 73 | "fsqrt.s", 74 | "fmadd.s", 75 | "fmsub.s", 76 | "fnmsub.s" 77 | "fnmadd.s" 78 | ], 79 | "reg shifts": [], 80 | "jumps": [], 81 | "compares": ["flt.s","feq.s","fle.s"], 82 | "conversions":[ 83 | "fcvt.w.s", 84 | "fcvt.wu.s", 85 | "fcvt.s.w", 86 | "fcvt.s.wu", 87 | "fsgnj.s", 88 | "fsgnjn.s", 89 | "fsgnjx.s", 90 | ], 91 | "moves":["fmv.s","fmv.x.w","fmv.w.x"], 92 | "classifies":["fclass.s"], 93 | "branches": [], 94 | "csrs":["frcsr.s","fscsr.s","frrm","fsrm","fsrmi",], 95 | "fence":[], 96 | }, 97 | "D": { 98 | "loads": ["fld","fldsp"], 99 | "stores": ["fsd","fsdsp"], 100 | "imm computes": [], 101 | "imm shifts": [], 102 | "reg computes": [ 103 | "fmadd.d", 104 | "fmsub.d", 105 | "fadd.d", 106 | "fsub.d", 107 | "fmul.d", 108 | "fdiv.d", 109 | "fmin.d", 110 | "fmax.d", 111 | "fsqrt.d", 112 | "fmadd.d", 113 | "fmsub.d", 114 | "fnmsub.d" 115 | "fnmadd.d" 116 | ], 117 | "reg shifts": [], 118 | "jumps": [], 119 | "compares": ["flt.d","feq.d","fle.d"], 120 | "conversions":[ 121 | "fcvt.w.d", 122 | "fcvt.wu.d", 123 | "fcvt.d.w", 124 | "fcvt.d.wu", 125 | "fsgnj.d", 126 | "fsgnjn.d", 127 | "fsgnjx.d", 128 | ], 129 | "moves":["fmv.x.d","fmv.d.x"], 130 | "classifies":["fclass.d"], 131 | "branches": [], 132 | "csrs":["frcsr","fscsr","frrm","fsrm","fsrmi",], 133 | "fence":[], 134 | 135 | }, 136 | "C": { 137 | "loads": [ 138 | "c.lwsp", 139 | "c.lw", 140 | ], 141 | "stores": [ 142 | "c.swsp", 143 | "c.sw", 144 | ], 145 | "imm computes": [ 146 | "c.li", 147 | "c.lui", 148 | "c.addi", 149 | "c.addi16sp", 150 | "c.addi4spn", 151 | "c.andi", 152 | ], 153 | "imm shifts": [ 154 | "c.slli", 155 | "c.srli", 156 | "c.srai", 157 | ], 158 | "reg computes": [ 159 | "c.add", 160 | "c.addw", 161 | "c.sub", 162 | "c.subw", 163 | "c.and", 164 | "c.or", 165 | "c.xor", 166 | "c.mv", 167 | ], 168 | "reg shifts": ["c.sll", "c.srl", "c.sra"], 169 | "jumps": ["c.j", "c.jal", "c.jr", "c.jalr"], 170 | "branches": [ 171 | "c.beqz", 172 | "c.bnez", 173 | "c.bltz", 174 | "c.bgez", 175 | "c.bltz", 176 | "c.bgez", 177 | "c.bltzal", 178 | "c.bgezal", 179 | ], 180 | "compares":[], 181 | "conversions":[], 182 | "moves":[], 183 | "classifies":[], 184 | "csrs":[], 185 | "fence":[], 186 | 187 | }, 188 | "B": { 189 | "loads": [], 190 | "stores": [], 191 | "imm computes": ["bclri","bexti","binvi","bseti",'slli.uw', 192 | "mergei","sbseti","sbinvi",], 193 | "imm shifts": ['rori','roli','roriw','roliw'], 194 | "reg computes": ["add.uw", 195 | "andn", 196 | "bclr", 197 | "bext", 198 | "binv", 199 | "bset", 200 | "clmul", 201 | "clmulh", 202 | "clmulr", 203 | "clz", 204 | "clzw", 205 | "cpop",'cpopw',"sbset","sbclr","sbseti","sbclri", 206 | "ctz",'ctzw',"pcnt", 207 | 'max','maxu','min','minu', 208 | 'orc.b','orn', 209 | "pack","packh","packu","packw", 210 | 'rev8','rev.b', 211 | 'sext.b','sexr.h','sh1add','sh1add.uw','sh2add','sh2add.uw','sh3add','sh3add.uw', 212 | 'unzip','xnor','xprem.b','xprem.n','zip','zext.h', 213 | "funnel","unfunnel","merge","gather","gatheru","gatherx","scatter","scatteru","scatterx","sbext","sbextu","sbset","sbinv", 214 | "crc32.b","crc32.h","crc32.w","crc32c.b","crc32c.h","crc32c.w"], 215 | "reg shifts": ["sll","srl","sra","slo","sro","rol","ror",'rorw','rolw'], 216 | "jumps": [], 217 | "compares": [], 218 | "conversions":[], 219 | "moves":[], 220 | "classifies":[], 221 | "branches": [], 222 | "csrs":[], 223 | "fence":[], 224 | }, 225 | "P": { 226 | "loads": [ 227 | "vld", 228 | ], 229 | "stores": [ 230 | "vst", 231 | ], 232 | "imm computes": [ 233 | "vaddi", 234 | "vsubi", 235 | "vslli", 236 | "vsrli", 237 | "vsrai", 238 | "vandi", 239 | "vori", 240 | "vxori", 241 | "vslti", 242 | "vsltiu", 243 | ], 244 | "imm shifts": [ 245 | "vsll", 246 | "vsrl", 247 | "vsra", 248 | ], 249 | "reg computes": [ 250 | "vadd", 251 | "vsub", 252 | "vand", 253 | "vor", 254 | "vxor", 255 | "vslt", 256 | "vsltu", 257 | "vmin", 258 | "vmax", 259 | "vseq", 260 | "vsne", 261 | "vzext", 262 | "vsext", 263 | ], 264 | "reg shifts": [ 265 | "vssrl", 266 | "vssra", 267 | "vsll", 268 | "vsrl", 269 | "vsra", 270 | ], 271 | "jumps": [], 272 | "branches": [], 273 | "compares":[], 274 | "conversions":[], 275 | "moves":[], 276 | "classifies":[], 277 | "csrs":[], 278 | "fence":[], 279 | }, 280 | "Zicsr": { 281 | "loads": [], 282 | "stores": [], 283 | "imm computes": [], 284 | "imm shifts": [], 285 | "reg computes": [], 286 | "reg shifts": [], 287 | "jumps": [], 288 | "compares": [], 289 | "conversions": [], 290 | "moves": [], 291 | "classifies": [], 292 | "branches": [], 293 | "csrs": ["csrrw","csrrs","csrrc","csrrwi","csrrsi","csrrci","rdtimeh","rdtime"], 294 | "fence":[], 295 | }, 296 | 297 | }, 298 | "RV64": { 299 | "I": { 300 | "loads": ["ld", "lh", "lhu", "lb", "lbu", "lw", "lwu"], 301 | "stores": ["sb", "sh", "sw", "sd"], 302 | "imm computes": [ 303 | "addi", 304 | "addiw", 305 | "andi", 306 | "ori", 307 | "xori", 308 | "slti", 309 | "sltiu", 310 | "auipc", 311 | "lui", 312 | ], 313 | "imm shifts": ["slli", "srli", "srai", "slliw", "srliw", "sraiw"], 314 | "reg computes": [ 315 | "add", 316 | "sub", 317 | "slt", 318 | "sltu", 319 | "xor", 320 | "or", 321 | "and", 322 | "addw", 323 | "subw", 324 | ], 325 | "reg shifts": ["sll", "srl", "sra", "sllw", "srlw", "sraw"], 326 | "jumps": ["jal", "jalr"], 327 | "branches": ["bge", "bgeu", "blt", "bltu", "beq", "bne"], 328 | "compares":[], 329 | "conversions":[], 330 | "moves":[], 331 | "classifies":[], 332 | "csrs":[], 333 | "fence":["fence","fence.i"], 334 | }, 335 | "M": { 336 | "loads": [], 337 | "stores": [], 338 | "imm computes": [], 339 | "imm shifts": [], 340 | "reg computes": [ 341 | "div", 342 | "divu", 343 | "mul", 344 | "mulh", 345 | "mulhsu", 346 | "mulhu", 347 | "rem", 348 | "remu", 349 | ], 350 | "reg shifts": [], 351 | "jumps": [], 352 | "branches": [], 353 | "compares":[], 354 | "conversions":[], 355 | "moves":[], 356 | "classifies":[], 357 | "csrs":[], 358 | "fence":[], 359 | }, 360 | "F": { 361 | "loads": ["flw","flwsp","fld","fldsp"], 362 | "stores": ["fsw","fswsp","fsd","fsdsp"], 363 | "imm computes": [], 364 | "imm shifts": [], 365 | "reg computes": [ 366 | "fmadd.s", 367 | "fmsub.s", 368 | "fadd.s", 369 | "fsub.s", 370 | "fmul.s", 371 | "fdiv.s", 372 | "fmin.s", 373 | "fmax.s", 374 | "fsqrt.s", 375 | "fmadd.s", 376 | "fmsub.s", 377 | "fnmsub.s" 378 | "fnmadd.s" 379 | ], 380 | "reg shifts": [], 381 | "jumps": [], 382 | "compares": ["flt.s","feq.s","fle.s"], 383 | "conversions":[ 384 | "fcvt.w.s", 385 | "fcvt.wu.s", 386 | "fcvt.s.w", 387 | "fcvt.s.wu", 388 | "fcvt.l.s", 389 | "fcvt.lu.s", 390 | "fcvt.s.l", 391 | "fcvt.s.lu", 392 | "fsgnj.s", 393 | "fsgnjn.s", 394 | "fsgnjx.s", 395 | ], 396 | "moves":["fmv.s","fmv.x.w","fmv.w.x"], 397 | "classifies":["fclass.s"], 398 | "branches": [], 399 | "csrs":["frcsr","fscsr","frrm","fsrm","fsrmi",], 400 | "fence":[], 401 | }, 402 | "D": { 403 | "loads": ["fld","fldsp"], 404 | "stores": ["fsd","fsdsp"], 405 | "imm computes": [], 406 | "imm shifts": [], 407 | "reg computes": [ 408 | "fmadd.d", 409 | "fmsub.d", 410 | "fadd.d", 411 | "fsub.d", 412 | "fmul.d", 413 | "fdiv.d", 414 | "fmin.d", 415 | "fmax.d", 416 | "fsqrt.d", 417 | "fmadd.d", 418 | "fmsub.d", 419 | "fnmsub.d" 420 | "fnmadd.d" 421 | ], 422 | "reg shifts": [], 423 | "jumps": [], 424 | "compares": ["flt.d","feq.d","fle.d"], 425 | "conversions":[ 426 | "fcvt.w.d", 427 | "fcvt.wu.d", 428 | "fcvt.d.w", 429 | "fcvt.d.wu", 430 | "fcvt.l.d", 431 | "fcvt.lu.d", 432 | "fcvt.d.l", 433 | "fcvt.d.lu", 434 | "fsgnj.d", 435 | "fsgnjn.d", 436 | "fsgnjx.d", 437 | ], 438 | "moves":["fmv.x.d","fmv.d.x"], 439 | "classifies":["fclass.d"], 440 | "branches": [], 441 | "csrs":["frcsr","fscsr","frrm","fsrm","fsrmi",], 442 | "fence":[], 443 | 444 | }, 445 | "C": { 446 | "loads": [ 447 | "c.lwsp", 448 | "c.ldsp", 449 | "c.lw", 450 | "c.ld", 451 | ], 452 | "stores": [ 453 | "c.swsp", 454 | "c.sdsp", 455 | "c.sw", 456 | "c.sd", 457 | ], 458 | "imm computes": [ 459 | "c.addi4spn", 460 | "c.addi", 461 | "c.addiw", 462 | "c.li", 463 | "c.lui", 464 | "c.addi16sp", 465 | "c.addi4spn", 466 | "c.addi", 467 | "c.addiw", 468 | "c.li", 469 | "c.lui", 470 | "c.addi16sp", 471 | ], 472 | "imm shifts": ["c.slli", "c.srli", "c.srai"], 473 | "reg computes": [ 474 | "c.add", 475 | "c.sub", 476 | "c.xor", 477 | "c.or", 478 | "c.and", 479 | "c.subw", 480 | "c.addw", 481 | "c.mv", 482 | ], 483 | "reg shifts": ["c.sll", "c.srl", "c.sra"], 484 | "jumps": ["c.j", "c.jal", "c.jr", "c.jalr"], 485 | "branches": [ 486 | "c.beqz", 487 | "c.bnez", 488 | "c.bltz", 489 | "c.bgez", 490 | "c.bltz", 491 | "c.bgez", 492 | "c.bltzal", 493 | "c.bgezal", 494 | ], 495 | "compares":[], 496 | "conversions":[], 497 | "moves":[], 498 | "classifies":[], 499 | "csrs":[], 500 | "fence":[], 501 | }, 502 | 503 | "B": { 504 | "loads": [], 505 | "stores": [], 506 | "imm computes": ["bclri","bexti","binvi","bseti",'slli.uw', 507 | "mergei","sbseti","sbinvi",], 508 | "imm shifts": ['rori','roli','roriw','roliw'], 509 | "reg computes": ["add.uw", 510 | "andn", 511 | "bclr", 512 | "bext", 513 | "binv", 514 | "bset", 515 | "clmul", 516 | "clmulh", 517 | "clmulr", 518 | "clz", 519 | "clzw", 520 | "cpop",'cpopw',"sbset","sbclr","sbseti","sbclri", 521 | "ctz",'ctzw',"pcnt", 522 | 'max','maxu','min','minu', 523 | 'orc.b','orn', 524 | "pack","packh","packu","packw", 525 | 'rev8','rev.b', 526 | 'sext.b','sexr.h','sh1add','sh1add.uw','sh2add','sh2add.uw','sh3add','sh3add.uw', 527 | 'unzip','xnor','xprem.b','xprem.n','zip','zext.h', 528 | "funnel","unfunnel","merge","gather","gatheru","gatherx","scatter","scatteru","scatterx","sbext","sbextu","sbset","sbinv", 529 | "crc32.b","crc32.h","crc32.w","crc32c.b","crc32c.h","crc32c.w","crc32c.d","crc32.d"], 530 | "reg shifts": ["sll","srl","sra","slo","sro","rol","ror",'rorw','rolw'], 531 | "jumps": [], 532 | "compares": [], 533 | "conversions":[], 534 | "moves":[], 535 | "classifies":[], 536 | "branches": [], 537 | "csrs":[], 538 | "fence":[], 539 | }, 540 | "P": { 541 | "loads": [ 542 | "vld", 543 | ], 544 | "stores": [ 545 | "vst", 546 | ], 547 | "imm computes": [ 548 | "vaddi", 549 | "vsubi", 550 | "vslli", 551 | "vsrli", 552 | "vsrai", 553 | "vandi", 554 | "vori", 555 | "vxori", 556 | "vslti", 557 | "vsltiu", 558 | ], 559 | "imm shifts": [ 560 | "vsll", 561 | "vsrl", 562 | "vsra", 563 | ], 564 | "reg computes": [ 565 | "vadd", 566 | "vsub", 567 | "vand", 568 | "vor", 569 | "vxor", 570 | "vslt", 571 | "vsltu", 572 | "vmin", 573 | "vmax", 574 | "vseq", 575 | "vsne", 576 | "vzext", 577 | "vsext", 578 | ], 579 | "reg shifts": [ 580 | "vssrl", 581 | "vssra", 582 | "vsll", 583 | "vsrl", 584 | "vsra", 585 | ], 586 | "jumps": [], 587 | "branches": [], 588 | "compares":[], 589 | "conversions":[], 590 | "moves":[], 591 | "classifies":[], 592 | "csrs":[], 593 | "fence":[], 594 | }, 595 | "Zicsr": { 596 | "loads": [], 597 | "stores": [], 598 | "imm computes": [], 599 | "imm shifts": [], 600 | "reg computes": [], 601 | "reg shifts": [], 602 | "jumps": [], 603 | "compares": [], 604 | "conversions": [], 605 | "moves": [], 606 | "classifies": [], 607 | "branches": [], 608 | "csrs": ["csrrw","csrrs","csrrc","csrrwi","csrrsi","csrrci","rdtimeh","rdtime"], 609 | "fence":[], 610 | }, 611 | }, 612 | } 613 | 614 | reg_file = {f'x{i}':'0x00000000' for i in range(32)} 615 | freg_file = {f'f{i}':'0' for i in range(32)} 616 | 617 | csr_file = {'0x000': 'ustatus', 618 | #Unprivileged Floating-Point CSRs 619 | '0x001': 'fflags', 620 | '0x002': 'frm', 621 | '0x003': 'fcsr', 622 | #Unprivileged Counter/Timers 623 | '0xc00': 'cycle', 624 | '0xc01': 'time', 625 | '0xc02': 'instret', 626 | '0xc03': 'hpmcounter3', 627 | '0xc04': 'hpmcounter4', 628 | '0xc05': 'hpmcounter5', 629 | '0xc06': 'hpmcounter6', 630 | '0xc07': 'hpmcounter7', 631 | '0xc08': 'hpmcounter8', 632 | '0xc09': 'hpmcounter9', 633 | '0xc0a': 'hpmcounter10', 634 | '0xc0b': 'hpmcounter11', 635 | '0xc0c': 'hpmcounter12', 636 | '0xc0d': 'hpmcounter13', 637 | '0xc0e': 'hpmcounter14', 638 | '0xc0f': 'hpmcounter15', 639 | '0xc10': 'hpmcounter16', 640 | '0xc11': 'hpmcounter17', 641 | '0xc12': 'hpmcounter18', 642 | '0xc13': 'hpmcounter19', 643 | '0xc14': 'hpmcounter20', 644 | '0xc15': 'hpmcounter21', 645 | '0xc16': 'hpmcounter22', 646 | '0xc17': 'hpmcounter23', 647 | '0xc18': 'hpmcounter24', 648 | '0xc19': 'hpmcounter25', 649 | '0xc1a': 'hpmcounter26', 650 | '0xc1b': 'hpmcounter27', 651 | '0xc1c': 'hpmcounter28', 652 | '0xc1d': 'hpmcounter29', 653 | '0xc1e': 'hpmcounter30', 654 | '0xc1f': 'hpmcounter31', 655 | '0xc80': 'cycleh', 656 | '0xc81': 'timeh', 657 | '0xc82': 'instreth', 658 | '0xc83': 'hpmcounter3h', 659 | '0xc84': 'hpmcounter4h', 660 | '0xc85': 'hpmcounter5h', 661 | '0xc86': 'hpmcounter6h', 662 | '0xc87': 'hpmcounter7h', 663 | '0xc88': 'hpmcounter8h', 664 | '0xc89': 'hpmcounter9h', 665 | '0xc8a': 'hpmcounter10h', 666 | '0xc8b': 'hpmcounter11h', 667 | '0xc8c': 'hpmcounter12h', 668 | '0xc8d': 'hpmcounter13h', 669 | '0xc8e': 'hpmcounter14h', 670 | '0xc8f': 'hpmcounter15h', 671 | '0xc90': 'hpmcounter16h', 672 | '0xc91': 'hpmcounter17h', 673 | '0xc92': 'hpmcounter18h', 674 | '0xc93': 'hpmcounter19h', 675 | '0xc94': 'hpmcounter20h', 676 | '0xc95': 'hpmcounter21h', 677 | '0xc96': 'hpmcounter22h', 678 | '0xc97': 'hpmcounter23h', 679 | '0xc98': 'hpmcounter24h', 680 | '0xc99': 'hpmcounter25h', 681 | '0xc9a': 'hpmcounter26h', 682 | '0xc9b': 'hpmcounter27h', 683 | '0xc9c': 'hpmcounter28h', 684 | '0xc9d': 'hpmcounter29h', 685 | '0xc9e': 'hpmcounter30h', 686 | '0xc9f': 'hpmcounter31h', 687 | #Supervisor Trap Setup 688 | '0x100': 'sstatus', 689 | '0x102': 'sedeleg', 690 | '0x103': 'sideleg', 691 | '0x104': 'sie', 692 | '0x105': 'stvec', 693 | '0x106': 'scounteren', 694 | #Supervisor Configuration 695 | '0x10a': 'senvcfg', 696 | #Supervisor Trap Handling 697 | '0x140': 'sscratch', 698 | '0x141': 'sepc', 699 | '0x142': 'scause', 700 | '0x143': 'stval', 701 | '0x144': 'sip', 702 | #Supervisor Protection and Translation 703 | '0x180': 'satp', 704 | #Debug/Trace Registers 705 | '0x5a8': 'scontext', 706 | #Hypervisor Trap Setup 707 | '0x600': 'hstatus', 708 | '0x602': 'hedeleg', 709 | '0x603': 'hideleg', 710 | '0x604': 'hie', 711 | '0x605': 'htvec', 712 | '0x606': 'hcounteren', 713 | '0x607': 'hgeie', 714 | #Hypervisor Trap Handling 715 | '0x643': 'htval', 716 | '0x644': 'hip', 717 | '0x645': 'hvip', 718 | '0x64a': 'htinst', 719 | '0xe12': 'hgeip', 720 | #Hypervisor Configuration 721 | '0x60a': 'henvcfg', 722 | '0x61a': 'henvcfgh', 723 | #Hypervisor Protection and Translation 724 | '0x680': 'hgatp', 725 | #Debug/Trace Registers 726 | '0x6a8': 'hcontext', 727 | #Hypervisor Counter/Timer Virtualization Registers 728 | '0x605': 'htimedelta', 729 | '0x615': 'htimedeltah', 730 | #Virtual Supervisor Registers 731 | '0x200': 'vsstatus', 732 | '0x204': 'vsie', 733 | '0x205': 'vstvec', 734 | '0x240': 'vsscratch', 735 | '0x241': 'vsepc', 736 | '0x242': 'vscause', 737 | '0x243': 'vstval', 738 | '0x244': 'vsip', 739 | '0x280': 'vsatp', 740 | #Machine Information Registers 741 | '0xf11': 'mvendorid', 742 | '0xf12': 'marchid', 743 | '0xf13': 'mimpid', 744 | '0xf14': 'mhartid', 745 | '0xf15': 'mconfigptr', 746 | #Machine Trap Setup 747 | '0x300': 'mstatus', 748 | '0x301': 'misa', 749 | '0x302': 'medeleg', 750 | '0x303': 'mideleg', 751 | '0x304': 'mie', 752 | '0x305': 'mtvec', 753 | '0x306': 'mcounteren', 754 | '0x307': 'mtvt', 755 | '0x310': 'mscratch', 756 | #Machine Trap Handling 757 | '0x340': 'mscratch', 758 | '0x341': 'mepc', 759 | '0x342': 'mcause', 760 | '0x343': 'mtval', 761 | '0x344': 'mip', 762 | '0x34a': 'mtinst', 763 | '0x34b': 'mtval2', 764 | #Machine Configuration 765 | '0x30a': 'menvcfg', 766 | '0x31a': 'menvcfgh', 767 | '0x747': 'mseccfg', 768 | '0x757': 'mseccfgh', 769 | #Machine Memory Protection 770 | '0x3a0': 'pmpcfg0', 771 | '0x3a1': 'pmpcfg1', 772 | '0x3a2': 'pmpcfg2', 773 | '0x3a3': 'pmpcfg3', 774 | '0x3a4': 'pmpcfg4', 775 | '0x3a5': 'pmpcfg5', 776 | '0x3a6': 'pmpcfg6', 777 | '0x3a7': 'pmpcfg7', 778 | '0x3a8': 'pmpcfg8', 779 | '0x3a9': 'pmpcfg9', 780 | '0x3aa': 'pmpcfg10', 781 | '0x3ab': 'pmpcfg11', 782 | '0x3ac': 'pmpcfg12', 783 | '0x3ad': 'pmpcfg13', 784 | '0x3ae': 'pmpcfg14', 785 | '0x3af': 'pmpcfg15', 786 | '0x3b0': 'pmpaddr0', 787 | '0x3b1': 'pmpaddr1', 788 | '0x3b2': 'pmpaddr2', 789 | '0x3b3': 'pmpaddr3', 790 | '0x3b4': 'pmpaddr4', 791 | '0x3b5': 'pmpaddr5', 792 | '0x3b6': 'pmpaddr6', 793 | '0x3b7': 'pmpaddr7', 794 | '0x3b8': 'pmpaddr8', 795 | '0x3b9': 'pmpaddr9', 796 | '0x3ba': 'pmpaddr10', 797 | '0x3bb': 'pmpaddr11', 798 | '0x3bc': 'pmpaddr12', 799 | '0x3bd': 'pmpaddr13', 800 | '0x3be': 'pmpaddr14', 801 | '0x3bf': 'pmpaddr15', 802 | '0x3c0': 'pmpaddr16', 803 | '0x3c1': 'pmpaddr17', 804 | '0x3c2': 'pmpaddr18', 805 | '0x3c3': 'pmpaddr19', 806 | '0x3c4': 'pmpaddr20', 807 | '0x3c5': 'pmpaddr21', 808 | '0x3c6': 'pmpaddr22', 809 | '0x3c7': 'pmpaddr23', 810 | '0x3c8': 'pmpaddr24', 811 | '0x3c9': 'pmpaddr25', 812 | '0x3ca': 'pmpaddr26', 813 | '0x3cb': 'pmpaddr27', 814 | '0x3cc': 'pmpaddr28', 815 | '0x3cd': 'pmpaddr29', 816 | '0x3ce': 'pmpaddr30', 817 | '0x3cf': 'pmpaddr31', 818 | '0x3d0': 'pmpaddr32', 819 | '0x3d1': 'pmpaddr33', 820 | '0x3d2': 'pmpaddr34', 821 | '0x3d3': 'pmpaddr35', 822 | '0x3d4': 'pmpaddr36', 823 | '0x3d5': 'pmpaddr37', 824 | '0x3d6': 'pmpaddr38', 825 | '0x3d7': 'pmpaddr39', 826 | '0x3d8': 'pmpaddr40', 827 | '0x3d9': 'pmpaddr41', 828 | '0x3da': 'pmpaddr42', 829 | '0x3db': 'pmpaddr43', 830 | '0x3dc': 'pmpaddr44', 831 | '0x3dd': 'pmpaddr45', 832 | '0x3de': 'pmpaddr46', 833 | '0x3df': 'pmpaddr47', 834 | '0x3e0': 'pmpaddr48', 835 | '0x3e1': 'pmpaddr49', 836 | '0x3e2': 'pmpaddr50', 837 | '0x3e3': 'pmpaddr51', 838 | '0x3e4': 'pmpaddr52', 839 | '0x3e5': 'pmpaddr53', 840 | '0x3e6': 'pmpaddr54', 841 | '0x3e7': 'pmpaddr55', 842 | '0x3e8': 'pmpaddr56', 843 | '0x3e9': 'pmpaddr57', 844 | '0x3ea': 'pmpaddr58', 845 | '0x3eb': 'pmpaddr59', 846 | '0x3ec': 'pmpaddr60', 847 | '0x3ed': 'pmpaddr61', 848 | '0x3ee': 'pmpaddr62', 849 | '0x3ef': 'pmpaddr63', 850 | #Machine Non-Maskable Interrupt Handling 851 | '0x740': 'mnscratch', 852 | '0x741': 'mnepc', 853 | '0x742': 'mncause', 854 | '0x743': 'mntval', 855 | '0x744': 'mnstatus', 856 | #Machine Counter/Timers 857 | '0xb00': 'mcycle', 858 | '0xb02': 'minstret', 859 | '0xb03': 'mhpmcounter3', 860 | '0xb04': 'mhpmcounter4', 861 | '0xb05': 'mhpmcounter5', 862 | '0xb06': 'mhpmcounter6', 863 | '0xb07': 'mhpmcounter7', 864 | '0xb08': 'mhpmcounter8', 865 | '0xb09': 'mhpmcounter9', 866 | '0xb0a': 'mhpmcounter10', 867 | '0xb0b': 'mhpmcounter11', 868 | '0xb0c': 'mhpmcounter12', 869 | '0xb0d': 'mhpmcounter13', 870 | '0xb0e': 'mhpmcounter14', 871 | '0xb0f': 'mhpmcounter15', 872 | '0xb10': 'mhpmcounter16', 873 | '0xb11': 'mhpmcounter17', 874 | '0xb12': 'mhpmcounter18', 875 | '0xb13': 'mhpmcounter19', 876 | '0xb14': 'mhpmcounter20', 877 | '0xb15': 'mhpmcounter21', 878 | '0xb16': 'mhpmcounter22', 879 | '0xb17': 'mhpmcounter23', 880 | '0xb18': 'mhpmcounter24', 881 | '0xb19': 'mhpmcounter25', 882 | '0xb1a': 'mhpmcounter26', 883 | '0xb1b': 'mhpmcounter27', 884 | '0xb1c': 'mhpmcounter28', 885 | '0xb1d': 'mhpmcounter29', 886 | '0xb1e': 'mhpmcounter30', 887 | '0xb1f': 'mhpmcounter31', 888 | '0xb80': 'mcycleh', 889 | '0xb82': 'minstreth', 890 | '0xb83': 'mhpmcounter3h', 891 | '0xb84': 'mhpmcounter4h', 892 | '0xb85': 'mhpmcounter5h', 893 | '0xb86': 'mhpmcounter6h', 894 | '0xb87': 'mhpmcounter7h', 895 | '0xb88': 'mhpmcounter8h', 896 | '0xb89': 'mhpmcounter9h', 897 | '0xb8a': 'mhpmcounter10h', 898 | '0xb8b': 'mhpmcounter11h', 899 | '0xb8c': 'mhpmcounter12h', 900 | '0xb8d': 'mhpmcounter13h', 901 | '0xb8e': 'mhpmcounter14h', 902 | '0xb8f': 'mhpmcounter15h', 903 | '0xb90': 'mhpmcounter16h', 904 | '0xb91': 'mhpmcounter17h', 905 | '0xb92': 'mhpmcounter18h', 906 | '0xb93': 'mhpmcounter19h', 907 | '0xb94': 'mhpmcounter20h', 908 | '0xb95': 'mhpmcounter21h', 909 | '0xb96': 'mhpmcounter22h', 910 | '0xb97': 'mhpmcounter23h', 911 | '0xb98': 'mhpmcounter24h', 912 | '0xb99': 'mhpmcounter25h', 913 | '0xb9a': 'mhpmcounter26h', 914 | '0xb9b': 'mhpmcounter27h', 915 | '0xb9c': 'mhpmcounter28h', 916 | '0xb9d': 'mhpmcounter29h', 917 | '0xb9e': 'mhpmcounter30h', 918 | '0xb9f': 'mhpmcounter31h', 919 | #Machine Counter Setup 920 | '0x320': 'mcountinhibit', 921 | '0x323': 'mhpmevent3', 922 | '0x324': 'mhpmevent4', 923 | '0x325': 'mhpmevent5', 924 | '0x326': 'mhpmevent6', 925 | '0x327': 'mhpmevent7', 926 | '0x328': 'mhpmevent8', 927 | '0x329': 'mhpmevent9', 928 | '0x32a': 'mhpmevent10', 929 | '0x32b': 'mhpmevent11', 930 | '0x32c': 'mhpmevent12', 931 | '0x32d': 'mhpmevent13', 932 | '0x32e': 'mhpmevent14', 933 | '0x32f': 'mhpmevent15', 934 | '0x330': 'mhpmevent16', 935 | '0x331': 'mhpmevent17', 936 | '0x332': 'mhpmevent18', 937 | '0x333': 'mhpmevent19', 938 | '0x334': 'mhpmevent20', 939 | '0x335': 'mhpmevent21', 940 | '0x336': 'mhpmevent22', 941 | '0x337': 'mhpmevent23', 942 | '0x338': 'mhpmevent24', 943 | '0x339': 'mhpmevent25', 944 | '0x33a': 'mhpmevent26', 945 | '0x33b': 'mhpmevent27', 946 | '0x33c': 'mhpmevent28', 947 | '0x33d': 'mhpmevent29', 948 | '0x33e': 'mhpmevent30', 949 | '0x33f': 'mhpmevent31', 950 | #Debug/Trace Registers (shared with Debug Mode) 951 | '0x7a0': 'tselect', 952 | '0x7a1': 'tdata1', 953 | '0x7a2': 'tdata2', 954 | '0x7a3': 'tdata3', 955 | '0x7a8': 'mcontext', 956 | #Debug Mode Registers 957 | '0x7b0': 'dcsr', 958 | '0x7b1': 'dpc', 959 | '0x7b2': 'dscratch', 960 | '0x7b3': 'dscratch1', 961 | 962 | '0x345': 'mnxti', 963 | '0x347': 'mintthresh', 964 | '0x346': 'mintstatus', 965 | '0x348': 'mscratchcsw', 966 | '0x349': 'mscratchcswl', 967 | } 968 | 969 | --------------------------------------------------------------------------------