├── docs
    ├── preview.adoc
    ├── index.rst
    ├── docs
    │   └── plugins
    │   │   └── instr_groups.rst
    ├── conf.py
    ├── adding_plugins.rst
    ├── intro.rst
    └── plugins.rst
├── riscv_application_profiler
    ├── __init__.py
    ├── requirements.txt
    ├── isac_port.py
    ├── dasm.sh
    ├── verif.py
    ├── main.py
    ├── utils.py
    ├── plugins
    │   ├── csr_compute.py
    │   ├── pattern.py
    │   ├── dependency.py
    │   ├── store_load_bypass.py
    │   ├── register_compute.py
    │   ├── jumps_ops.py
    │   ├── instr_groups.py
    │   └── branch_ops.py
    ├── profiler.py
    └── consts.py
├── .gitignore
├── setup.cfg
├── Makefile
├── CHANGELOG.md
├── make.bat
├── .readthedocs.yaml
├── sample_configs
    ├── cycle_accurate
    │   ├── config.yaml
    │   └── L2_configs
    │   │   └── config.yaml
    └── profiler_config
    │   ├── config.yaml
    │   └── L2_configs
    │       └── config.yaml
├── LICENSE
├── setup.py
├── CONTRIBUTING.md
├── README.md
└── CODE_OF_CONDUCT.md


/docs/preview.adoc:
--------------------------------------------------------------------------------
1 | RISC-V Application Profiler
2 | ===========================
3 | 
4 | This is the preface for the docs in rap.


--------------------------------------------------------------------------------
/riscv_application_profiler/__init__.py:
--------------------------------------------------------------------------------
1 | '''Top Level Package for riscv_application_profiler'''
2 | 
3 | __version__ = '1.0.0'
4 | 


--------------------------------------------------------------------------------
/riscv_application_profiler/requirements.txt:
--------------------------------------------------------------------------------
1 | click>=7.0.0
2 | riscv_isac==0.18.1
3 | pathlib
4 | riscv_config
5 | pycachesim==0.3.1


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Pycache files
 2 | *.pyc
 3 | 
 4 | # package information
 5 | *.egg-info/
 6 | dist/
 7 | .python-version
 8 | 
 9 | # build files
10 | build/
11 | 
12 | # riscv opcodes
13 | rvop_decoder
14 | 
15 | #files from isac build
16 | constants.py
17 | rvopcodesdecoder.py
18 | 
19 | #cubic larger than 100MB
20 | tests/cubic


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [bumpversion]
 2 | current_version = 1.0.0
 3 | commit = True
 4 | tag = True
 5 | 
 6 | [bumpversion:file:setup.py]
 7 | search = version='{current_version}'
 8 | replace = version='{new_version}'
 9 | 
10 | [bumpversion:file:riscv_application_profiler/__init__.py]
11 | search = __version__ = '{current_version}'
12 | replace = __version__ = '{new_version}'
13 | 
14 | [bdist_wheel]
15 | universal = 1
16 | 
17 | [flake8]
18 | exclude = docs
19 | 
20 | [aliases]
21 | 


--------------------------------------------------------------------------------
/riscv_application_profiler/isac_port.py:
--------------------------------------------------------------------------------
 1 | # See LICENSE for licensing information.
 2 | 
 3 | # this file is a porting mechanism for using riscv-isac in riscv-application-profiler
 4 | 
 5 | from git import Repo
 6 | import os
 7 | import shutil
 8 | import sys
 9 | 
10 | def isac_setup_routine():
11 |     '''
12 |     Sets up the riscv-isac environment.
13 |     '''
14 |     if not os.path.exists('rvop_decoder'):
15 |         os.system('riscv_isac setup')
16 |     sys.path.append(os.path.join(os.getcwd(), 'rvop_decoder'))
17 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. RISC-V Application Profiler documentation master file, created by
 2 |    sphinx-quickstart on Wed Sep 13 16:33:50 2023.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to RISC-V Application Profiler's documentation!
 7 | =======================================================
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 |    :caption: Contents:
12 | 
13 |    intro
14 |    preview
15 |    adding_plugins
16 |    plugins
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/riscv_application_profiler/dasm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | cat > .original
 4 | cat .original | awk '{match($0, /0x[0-9a-fA-F]+/); print substr($0, RSTART, RLENGTH)}' > .original.tmp.swp ;
 5 | cat .original | awk -F'[()]' '{print "DASM(" $2 ")"}' | spike-dasm > .dasm.tmp.swp ;
 6 | exec 3<.original.tmp.swp
 7 | exec 4<.dasm.tmp.swp
 8 | echo "" > .merged.tmp.swp
 9 | while read -r line1 <&3 && read -r line2 <&4; do
10 |     echo "$line1    ::    $line2" >> .merged.tmp.swp
11 | done
12 | exec 3<&-
13 | exec 4<&-
14 | cat .merged.tmp.swp > $1
15 | rm -f .original
16 | rm -f .original.tmp.swp
17 | rm -f .dasm.tmp.swp
18 | rm -f .merged.tmp.swp


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/docs/plugins/instr_groups.rst:
--------------------------------------------------------------------------------
 1 | Grouping Instructions
 2 | =====================
 3 | 
 4 | We iterate through a list of all entries in the provided execution log and make a classify all the instructions into groups. The groups are defined by the following rules:
 5 | 
 6 | * If the instruction is a branch, it is placed in a group of its own.
 7 | * If the instruction is a memory instruction, it is placed in a group of its own.
 8 | 
 9 | The remaining instructions are placed in groups based on the following rules:
10 | 
11 | * If the instruction is a load instruction, it is placed in a group of its own.
12 | * If the instruction is a store instruction, it is placed in a group of its own.
13 | * If the instruction is a call instruction, it is placed in a group of its own.


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # CHANGELOG
 2 | 
 3 | This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 4 | 
 5 | ## [1.0.1] - 2025-04-2025
 6 | - minor fix
 7 | 
 8 | ## [1.0.0] - 2023-09-13
 9 | - Support including a config file
10 | - Changed store_load_bypass to track bytes
11 | - Improved build times
12 | - Added docs
13 | - Added a nsichneu from embench-iot to tests
14 | - Added macro op plugin
15 | 
16 | ## [0.2.0] - 2023-07-25
17 | - Add cache plugins
18 | - Add register_compute plugins
19 | - Add jumps_ops plugins
20 | - Add register_compute plugins
21 | - Add store_load_bypass plugins
22 | - Add csr_compute plugins
23 | - Add data dependency plugins
24 | 
25 | ## [0.1.0] - 2023-05-05
26 | - Add branch plugins
27 | - Add instruction groups plugin
28 | - use riscv_isac for decoding
29 | - use riscv_config for ISA validation
30 | 
31 | ## [0.0.1] - 2023-03-08
32 | - Added sample log file.
33 | 


--------------------------------------------------------------------------------
/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # For the full list of built-in configuration values, see the documentation:
 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 5 | 
 6 | # -- Project information -----------------------------------------------------
 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 8 | 
 9 | project = 'RISC-V Application Profiler'
10 | copyright = '2023, Mahendra Vamshi A'
11 | author = 'Mahendra Vamshi A'
12 | release = '1.0.0'
13 | 
14 | # -- General configuration ---------------------------------------------------
15 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
16 | 
17 | extensions = []
18 | 
19 | templates_path = ['_templates']
20 | exclude_patterns = []
21 | 
22 | 
23 | 
24 | # -- Options for HTML output -------------------------------------------------
25 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
26 | 
27 | html_theme = 'alabaster'
28 | html_static_path = ['_static']
29 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # Read the Docs configuration file for Sphinx projects
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | # Required
 5 | version: 2
 6 | 
 7 | # Set the OS, Python version and other tools you might need
 8 | build:
 9 |   os: ubuntu-22.04
10 |   tools:
11 |     python: "3.7"
12 |     # You can also specify other tool versions:
13 |     # nodejs: "20"
14 |     # rust: "1.70"
15 |     # golang: "1.20"
16 | 
17 | # Build documentation in the "docs/" directory with Sphinx
18 | sphinx:
19 |   configuration: docs/conf.py
20 |   # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs
21 |   # builder: "dirhtml"
22 |   # Fail on all warnings to avoid broken references
23 |   # fail_on_warning: true
24 | 
25 | # Optionally build your docs in additional formats such as PDF and ePub
26 | # formats:
27 | #   - pdf
28 | #   - epub
29 | 
30 | # Optional but recommended, declare the Python requirements required
31 | # to build your documentation
32 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
33 | # python:
34 | #   install:
35 | #     - requirements: docs/requirements.txt


--------------------------------------------------------------------------------
/sample_configs/cycle_accurate/config.yaml:
--------------------------------------------------------------------------------
 1 | # YAML without l2 cache congifuration
 2 | cycles:
 3 |   # start_cycles: 100
 4 |   instructions_cycles:
 5 |     {add: 
 6 |       {latency : 1,
 7 |       throughput : 1},
 8 |     sub:
 9 |       {latency : 1,
10 |       throughput : 1},
11 |     mul:  
12 |       {latency : 1,
13 |       throughput : 1},
14 |     div:
15 |       {latency : 1,
16 |       throughput : 1},
17 |     rem:
18 |       {latency : 1,
19 |       throughput : 1},
20 |     csr:
21 |       {latency : 1,
22 |       throughput : 1},
23 |     s(a-z)l*:
24 |       {latency : 1,
25 |       throughput : 1},
26 |     s(a-z)r*:
27 |       {latency : 1,
28 |       throughput : 1},
29 |     j:
30 |       {latency : 1,
31 |       throughput : 1},
32 |     c.:
33 |       {latency : 1,
34 |       throughput : 1}
35 |       }
36 |   flush_cycles:
37 |     csr: 1 
38 |     branch: 0
39 |     fence.i: 1
40 |   reset_cycles: 69
41 |   pipeline_depth: 2
42 |   bus_latency: # this is inaccurate
43 |     data: 8
44 |     instruction: 9
45 |   bypass_latency: 1
46 |   structural_hazards:
47 |     data_cache: 1
48 |     bus: 8 # bus capacity in terms of words
49 |   # replacemnt_latency: 257
50 |   mem_latency:
51 |     cacheable:
52 |       data:
53 |         hit: 1
54 |         miss: 8 
55 |       instruction:
56 |         hit: 1
57 |         miss: 8
58 |     non_cacheable:
59 |       data:
60 |         miss: 8 #bus_latency 
61 |       instruction:
62 |         miss: 9 #bus_latency 
63 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) <2023> <PES University, InCore Semiconductors>.
2 | 
3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
4 | 
5 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
6 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
7 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
8 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/sample_configs/cycle_accurate/L2_configs/config.yaml:
--------------------------------------------------------------------------------
 1 | # YAML with l2 cache congifuration
 2 | cycles:
 3 |   # start_cycles: 100
 4 |   instructions_cycles:
 5 |     {add: 
 6 |       {latency : 1,
 7 |       throughput : 1},
 8 |     sub:
 9 |       {latency : 1,
10 |       throughput : 1},
11 |     mul:  
12 |       {latency : 1,
13 |       throughput : 1},
14 |     div:
15 |       {latency : 1,
16 |       throughput : 1},
17 |     rem:
18 |       {latency : 1,
19 |       throughput : 1},
20 |     csr:
21 |       {latency : 1,
22 |       throughput : 1},
23 |     s(a-z)l*:
24 |       {latency : 1,
25 |       throughput : 1},
26 |     s(a-z)r*:
27 |       {latency : 1,
28 |       throughput : 1},
29 |     j:
30 |       {latency : 1,
31 |       throughput : 1},
32 |     c.:
33 |       {latency : 1,
34 |       throughput : 1}
35 |       }
36 |   flush_cycles:
37 |     csr: 1 
38 |     branch: 0
39 |     fence.i: 1
40 |   reset_cycles: 69
41 |   pipeline_depth: 2
42 |   bus_latency: # this is inaccurate
43 |     data: 8
44 |     instruction: 9
45 |   bypass_latency: 1
46 |   structural_hazards:
47 |     data_cache: 1
48 |     bus: 8 # bus capacity in terms of words
49 |   # replacemnt_latency: 257
50 |   mem_latency:
51 |     cacheable:
52 |       data:
53 |         hit: 1
54 |         miss: 1 
55 |       instruction:
56 |         hit: 1
57 |         miss: 1 
58 |       L2:
59 |         hit: 1
60 |         miss: 16 #bus_latency + (size of cache line in bytes /4) eg: 8 + (32/4) = 16
61 |     non_cacheable:
62 |       data:
63 |         miss: 8 #bus_latency 
64 |       instruction:
65 |         miss: 9 #bus_latency 
66 | 
67 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | """The setup script."""
 2 | 
 3 | import os
 4 | from setuptools import setup, find_packages
 5 | import codecs
 6 | 
 7 | # Base directory of package
 8 | here = os.path.abspath(os.path.dirname(__file__))
 9 | 
10 | 
11 | def read(*parts):
12 |     with codecs.open(os.path.join(here, *parts), 'r') as fp:
13 |         return fp.read()
14 | def read_requires():
15 |     with open(os.path.join(here, "riscv_application_profiler/requirements.txt"),
16 |               "r") as reqfile:
17 |         return reqfile.read().splitlines()
18 | 
19 | 
20 | # Long Description
21 | with open("README.md", "r") as fh:
22 |     readme = fh.read()
23 | 
24 | setup_requirements = []
25 | 
26 | test_requirements = []
27 | 
28 | setup(
29 |     name='riscv-application-profiler',
30 |     version='1.0.0',
31 |     description="RISC-V Application Profiler",
32 |     long_description=readme + '\n\n',
33 |     classifiers=[
34 |         "Programming Language :: Python :: 3.7",
35 |         "License :: OSI Approved :: BSD License",
36 |         "Development Status :: 4 - Beta"
37 |     ],
38 |     url='https://github.com/mahendraVamshi/riscv-application-profiler',
39 |     author="PES University + InCore Semiconductors",
40 |     author_email='',
41 |     license="BSD-3-Clause",
42 |     packages=find_packages(),
43 |     package_dir={'riscv_application_profiler': 'riscv_application_profiler'},
44 |     package_data={'riscv_application_profiler': ['requirements.txt']},
45 |     install_requires=read_requires(),
46 |     python_requires='>=3.7.0',
47 |     entry_points={
48 |         'console_scripts': ['riscv_application_profiler=riscv_application_profiler.main:cli'],
49 |     },
50 |     include_package_data=True,
51 |     keywords='riscv_application_profiler',
52 |     tests_require=test_requirements,
53 |     zip_safe=False,
54 | )
55 | 


--------------------------------------------------------------------------------
/riscv_application_profiler/verif.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | from riscv_application_profiler.consts import *
 3 | import riscv_application_profiler.consts as consts
 4 | from riscv_application_profiler.utils import Utilities
 5 | import re
 6 | import os
 7 | 
 8 | def verify(check):
 9 |     count = 0
10 |     # utils = Utilities(check)
11 |     # utils.metadata()
12 |     with open(check, 'r+') as check_file:
13 |         
14 |         # Iterate through each line in the log file.
15 |         for line in check_file:
16 |             match = re.match('\[\s+(\d+)\]', line)
17 |             if match is not None:
18 | 
19 |                 x = int(match.group(1))
20 |                 if x is not None:
21 |                     count += x
22 | 
23 |     print('Actual number of cycles: ')
24 |     print(count//10)
25 | 
26 | def modi(check, mast_dict):
27 |     with open(check, 'r') as check_file, open("mine.txt", 'w') as mine, open("error.txt", 'w') as error:
28 |         l = list(mast_dict.values())
29 |         l1 = list(mast_dict.keys())
30 |         for idx,line in enumerate(check_file):
31 |             line = line.strip()
32 |             entry = l1[idx]
33 |             n_line = line+ '\t'+ '--------    ' + entry.instr_name + '\t'+ '['+str(l[idx])+']'
34 |             mine.writelines(n_line+'\n')
35 |             match = re.match('\[\s+(\d+)\]', line)
36 |             if match is not None:
37 |                 x = (int(match.group(1)))//10
38 |                 if x is not None:
39 |                     if x != l[idx]:
40 |                         error.writelines(n_line+'\n')
41 |                         error.writelines('Expected: '+str(l[idx])+'\n')
42 |                         error.writelines('Actual: '+str(x)+'\n')
43 |                         error.writelines('Difference: '+str(l[idx]-x)+'\n')
44 |                         error.writelines('Line number: '+str(idx + 1)+'\n')
45 |                         error.writelines('-----------------------------\n')
46 |                      
47 |             


--------------------------------------------------------------------------------
/docs/adding_plugins.rst:
--------------------------------------------------------------------------------
 1 | How to Add Plugins to the Profiler
 2 | ==================================
 3 | 
 4 | Plugins can be added to the profiler to extend its functionality and analyze program behavior. This guide explains the steps to create and integrate a new plugin into the profiler.
 5 | 
 6 | Creating a New Plugin
 7 | ----------------------
 8 | 
 9 | To create a new plugin, follow these steps:
10 | 
11 | 1. Create a New File:
12 | 
13 |    Create a new Python file in the ``plugins`` folder of the project.
14 | 
15 | 2. Define Inputs:
16 | 
17 |    Your plugin should accept the following inputs:
18 | 
19 |    - ``master_inst_list``: A list of all instructions in the program, represented as a list of ``Instruction`` objects.
20 |    - ``ops_dict``: A dictionary containing all operations in the program. Keys represent operation names, and values are operation objects.
21 |    - ``extension_used``: A boolean value indicating whether the program uses an extension. This is used to determine whether to include extension-specific operations in the analysis.
22 | 
23 |    Any custom inputs must be defined in the configuration YAML file. These will be treated as keyword args to the function.
24 | 
25 | 3. Define Outputs:
26 | 
27 |    Your plugin should return a dictionary containing the results of your analysis. The keys of this dictionary should be operation names, and the values are metrics that were computed in the plugin itself. All returned values will be tabulated. Eventually, this will become a custom class.
28 | 
29 | Adding a Plugin to the YAML File
30 | --------------------------------
31 | 
32 | To execute your plugin, you must add it to the configuration YAML file. To do this, follow these steps:
33 | 
34 | 1. Add ``plugin_name``:
35 | 
36 |    Add the plugin file name to your YAML file, under `profiles:config:metric`. Under that add your plugin function name along with a header name as `plugin_file_name:`.
37 | 
38 | 2. Execute the Plugin:
39 | 
40 |    Sit back and run the profiler as usual. Your plugin will be executed along with the rest of the analysis.
41 | 


--------------------------------------------------------------------------------
/riscv_application_profiler/main.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | import shutil
  3 | import click
  4 | from riscv_application_profiler import __version__
  5 | from riscv_application_profiler.profiler import run
  6 | from riscv_application_profiler.isac_port import isac_setup_routine
  7 | from riscv_isac.log import logger
  8 | import riscv_application_profiler.consts
  9 | import riscv_isac.plugins.spike as isac_spike_plugin
 10 | import os
 11 | from git import Repo
 12 | import yaml  
 13 | #remove later
 14 | from riscv_application_profiler.verif import verify
 15 | 
 16 | @click.group()
 17 | @click.version_option(version=__version__)
 18 | def cli():
 19 |     '''Command Line Interface for riscv_application_profiler'''
 20 | 
 21 | @cli.command()
 22 | # CLI option 'log'.
 23 | # Expects an ISA string.
 24 | @click.option(
 25 | 	'-l',
 26 | 	'--log',
 27 | 	help=
 28 | 	'This option expects the path to an execution log.',
 29 | 	required=True)
 30 | 
 31 | # CLI option 'output.
 32 | # Expects a directory.
 33 | @click.option(
 34 | 	'-o',
 35 | 	'--output',
 36 | 	help="Path to the output file.",
 37 | 	default='./build',
 38 | 	show_default=True,
 39 | 	required=False,
 40 |     )
 41 | 
 42 | # CLI option 'config'.
 43 | # Expects a YAML file.
 44 | @click.option('-c', '--config', help="Path to the YAML configuration file.", required=True)
 45 | 
 46 | # CLI option 'cycle accurate config'.
 47 | # Expects a YAML file.
 48 | @click.option('-ca', '--cycle_accurate_config', help="Path to the YAML cycle accurate configuration file.", required=False)
 49 | 
 50 | # CLI option 'verbose'.
 51 | # Expects a string.
 52 | @click.option('-v', '--verbose', default='info', help='Set verbose level', type=click.Choice(['info','error','debug'],case_sensitive=False))
 53 | 
 54 | # if one has files of log along with latency they can enable check option below 
 55 | # required changes for those are commented 
 56 | # @click.option('-ch', '--check', help="Path to the dump file which has cycle latency.", required=False)
 57 | 
 58 | def profile(config, log, output, verbose, cycle_accurate_config): #, check):
 59 |     '''
 60 |     Generates the hardware description of the decoder
 61 |     '''
 62 |     with open(config, 'r') as config_file:
 63 |         ia_config = yaml.safe_load(config_file)
 64 |     if cycle_accurate_config:
 65 |         with open(cycle_accurate_config, 'r') as cycle_accurate_config_file:
 66 |             ca_config = yaml.safe_load(cycle_accurate_config_file)
 67 |         # if check:
 68 |         #     check_file = str(Path(check).absolute())
 69 |         #     verify(check_file)
 70 |         # else:
 71 |         #     check_file = None
 72 |     else:
 73 |         ca_config = None
 74 |     default_commitlog_regex = ia_config['profiles']['cfg']['commitlog_regex']
 75 |     default_privilege_mode_regex = ia_config['profiles']['cfg']['privilege_mode_regex']
 76 |     isa = ia_config['profiles']['cfg']['isa']
 77 |     log_file = str(Path(log).absolute())
 78 |     output_dir = str(Path(output).absolute())
 79 | 
 80 |     # clone riscv_opcodes and copy decoder plugin
 81 |     
 82 |     isac_setup_routine()
 83 | 
 84 |     logger.level(verbose)
 85 |     logger.info("**********************************")
 86 |     logger.info(f"RISC-V Application Profiler v{__version__}")
 87 |     logger.info("**********************************")
 88 |     logger.info("ISA Extension used: " + isa)
 89 |     
 90 |     logger.info(f"\nLog file: {log_file}")
 91 |     logger.info(f"Output directory: {output_dir}")
 92 | 
 93 |     # Invoke the actual profiler
 94 |     if ca_config != None:
 95 |         run(log_file, isa, output_dir, verbose, ia_config, ca_config)# ,check_file)
 96 |     else:
 97 |         run(log_file, isa, output_dir, verbose, ia_config, None)# ,None)
 98 |     logger.info("Done profiling.")
 99 |     logger.info(f"Reports in {output_dir}/reports.")
100 | 
101 | def main():
102 |     cli()
103 | 
104 | if __name__ == '__main__':
105 |     main()


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | .. See LICENSE.incore for details
 2 | 
 3 | .. highlight:: shell
 4 | 
 5 | ============
 6 | Contributing
 7 | ============
 8 | 
 9 | Your inputs are welcome and greatly appreciated! We want to make contributing to this project as easy and transparent as possible, whether it's:
10 | 
11 | - Reporting a bug
12 | - Discussing the current state of the code
13 | - Submitting a bug fix
14 | - Proposing new features
15 | - Becoming a maintainer
16 | - Adding a new plugin
17 | 
18 | We develop with Github
19 | ----------------------
20 | 
21 | We use github to host code, to track issues and feature requests, as well as accept pull requests.
22 | 
23 | All changes happen through Pull Requests
24 | ----------------------------------------
25 | 
26 | Pull requests are the best way to propose changes to the codebase. We actively welcome your pull requests:
27 | 
28 | 1. Fork the repo and create your branch from `master`.
29 | 2. If you have updated the docs, ensure that they render correctly in the respective format.
30 | 3. Make sure to create an entry in the CHANGELOG.md. Please refer to the section on versioning below
31 |    to choose an appropriate version number.
32 | 4. Ensure the existing framework is not broken and still passes the basic checks.
33 | 5. Please include a comment with the SPDX license identifier in all source files, for example:
34 |    ```
35 |    // SPDX-License-Identifier: BSD-3-Clause
36 |    ```
37 | 6. Bump the version of the tool to patch/minor/major as per the entry made in the CHANGELOG.md
38 | 7. Issue that pull request!
39 | 
40 | Checks for a PR
41 | ---------------
42 | 
43 | Make sure your PR meets all the following requirements:
44 | 
45 | 1. You have made an entry in the CHANGELOG.md.
46 | 2. You have bumped the version of the tool using bumpversion utility described below.
47 | 3. The commit messages are verbose.
48 | 4. You PR doesn't break existing framework.
49 | 
50 | Versioning
51 | ----------
52 | 
53 | When issuing pull requests, an entry in the CHANGELOG.md is mandatory. The arch-test-repo adheres to
54 | the [`Semantic Versioning`](https://semver.org/spec/v2.0.0.html) scheme. Following guidelines must
55 | be followed while assigning a new version number :
56 | 
57 | - Patch-updates: all doc updates (like typos, more clarification,etc) will be patches. Beautification enhancements will also be treated as patch updates. Certain bug fixes to existing code may be treated as patches as well.
58 | - Minor-updates: Updates to code with new extensions, features, run time optimizations can be
59 |   treated as minor updates.
60 | - Major-updates: Changes to the framework flow (backward compatible or incompatible) will be treated
61 |   as major updates.
62 | 
63 | Note: You can have either a patch or minor or major update.
64 | Note: In case of a conflict, the maintainers will decide the final version to be assigned.
65 | 
66 | All contributions will be under the permissive open-source License
67 | ------------------------------------------------------------------
68 | 
69 | In short, when you submit code changes, your submissions are understood to be under a permissive open source license like BSD-3, Apache-2.0 and CC, etc that covers the project. Feel free to contact the maintainers if that's a concern.
70 | 
71 | Report bugs using Github's `issues <https://github.com/mahendraVamshi/riscv-application-profiler/issues>`_
72 | ------------------------------------------------------------------------------------
73 | 
74 | We use GitHub issues to track public bugs. Report a bug by `opening a new issue <https://github.com/mahendraVamshi/riscv-application-profiler/issues/new>`_  it's that easy!
75 | 
76 | Write bug reports with detail, background, and sample code
77 | ----------------------------------------------------------
78 | 
79 | **Great Bug Reports** tend to have:
80 | 
81 | - A quick summary and/or background
82 | - Steps to reproduce
83 |   - Be specific!
84 |   - Give sample code if you can. 
85 | - What you expected would happen
86 | - What actually happens
87 | - Notes (possibly including why you think this might be happening, or stuff you tried that didn't work)
88 | 
89 | 
90 | Version Bumping made simple
91 | ---------------------------
92 | 
93 | Each PR will require the tools version to be bumped. This can be achieved using the following
94 | commands::
95 | 
96 |   $ bumpversion --allow-dirty --no-tag --config-file setup.cfg patch  #options: major / minor / patch
97 | 
98 | 


--------------------------------------------------------------------------------
/riscv_application_profiler/utils.py:
--------------------------------------------------------------------------------
  1 | # See LICENCE for licence details.
  2 | 
  3 | from riscv_isac.log import *
  4 | import pytablewriter as ptw
  5 | import os
  6 | import riscv_application_profiler.consts as consts
  7 | import pprint as pp
  8 | 
  9 | class Utilities:
 10 |     def __init__(self, log, output) -> None:
 11 |         os.makedirs(f'{output}/reports', exist_ok=True)
 12 |         self.log = log
 13 |         self.tables_file = open(f'{output}/reports/tables.adoc', 'w')
 14 |         self.tables_file.write(f'# Reports from the RISC-V Application Profiler\n\n')
 15 | 
 16 |     def metadata(self):
 17 |         '''
 18 |         Prints the metadata of the application being profiled.
 19 |         '''
 20 |         logger.debug("Printing metadata.")
 21 |         self.tables_file.write('## Application metadata\n\n')
 22 |         self.tables_file.write(f'Execution log file: {self.log}\n\n')
 23 |         self.tables_file.write('<Insert other metadata here.>\n\n')
 24 |     def print_stats(self, op_dict, counts):
 25 |         '''
 26 |         Prints the statistics of the grouped instructions.
 27 | 
 28 |         Args:
 29 |             - op_dict: A dictionary with the operations as keys and a list of InstructionEntry
 30 |                 objects as values.
 31 |             - counts: A dictionary with the operations as keys and the number of instructions
 32 |                 in each group as values.
 33 |         '''
 34 |         logger.debug("Printing statistics.")
 35 |         for op in op_dict.keys():
 36 |             logger.info(f'{op}: {counts[op]}')
 37 |         logger.debug("Done.")
 38 | 
 39 |     def tabulate_stats (self, in_dict, header_name):
 40 | 
 41 |         logger.debug("Tabulating statistics.")
 42 |         table = []
 43 |         if in_dict == None:
 44 |             logger.error("Dictionary is empty.")
 45 |             exit(1)
 46 |         key_list = list(in_dict.keys())
 47 |         length=len(key_list)
 48 |         for i in range(len(in_dict[key_list[0]])):
 49 |             l1=[]
 50 |             for j in range(length):
 51 |                 l1.append(in_dict[key_list[j]][i])
 52 |             table.append(l1)
 53 | 
 54 | 
 55 |         self.tables_file.write(f'## {header_name}\n')
 56 |         if header_name is None:
 57 |             header_name = "Name"
 58 |         writer = ptw.AsciiDocTableWriter()
 59 |         writer.table_name = ""
 60 |         writer.headers = key_list
 61 |         writer.value_matrix = table
 62 |         self.tables_file.write(writer.dumps())
 63 | 
 64 |         self.tables_file.write('\n\n')
 65 |         logger.debug("Done.")
 66 | 
 67 |     def remove_dups(self, target: list) -> list:
 68 |         '''
 69 |         Removes duplicates from a list.
 70 | 
 71 |         Args:
 72 |             - target: The list to remove duplicates from.
 73 | 
 74 |         Returns:
 75 |             - A list with no duplicates.
 76 |         '''
 77 |         temp_list = []
 78 |         for entry in target:
 79 |             if entry not in temp_list:
 80 |                 temp_list.append(entry)
 81 |         return temp_list
 82 | 
 83 |     def compute_ops_dict(self, args_list: list, ext_list: list, isa_arg: str) -> dict:
 84 |         '''
 85 |         compute the current ops dict out of the master ops db
 86 |         
 87 |         Args:
 88 |             - ext_list: A list of extensions to be supported.
 89 |             - isa_arg: The ISA to be supported.
 90 |         
 91 |         Returns:
 92 |             - A dictionary containing the supported operations.
 93 |         '''
 94 | 
 95 |         temp_ops_dict = {entry:[] for entry in args_list}
 96 |         if isa_arg == 'RV32':
 97 |             master_ops_dict = consts.ops_dict['RV32']
 98 |         elif isa_arg == 'RV64':
 99 |             master_ops_dict = consts.ops_dict['RV32']
100 |             for ext in ext_list:
101 |                 for op_type in args_list:
102 |                     master_ops_dict[ext][op_type].extend(consts.ops_dict['RV64'][ext][op_type])
103 |         else:
104 |             logger.error(f'XLEN {isa_arg} not supported.')
105 |             exit(1)
106 |         for ext in ext_list:
107 |             for op_type in args_list:
108 |                 temp_ops_dict[op_type] += master_ops_dict[ext][op_type]
109 |         result_dict = {entry:self.remove_dups(temp_ops_dict[entry]) for entry in temp_ops_dict}
110 |         return result_dict
111 |     
112 | 
113 |     
114 | 


--------------------------------------------------------------------------------
/sample_configs/profiler_config/config.yaml:
--------------------------------------------------------------------------------
  1 | 
  2 | # '''Create one cache level out of given configuration.
  3 | 
  4 | # :param sets: total number of sets, if 1 cache will be full-associative
  5 | # :param ways: total number of ways, if 1 cache will be direct mapped
  6 | # :param cl_size: number of bytes that can be addressed individually
  7 | # :param replacement_policy: FIFO, LRU (default), MRU or RR
  8 | # :param write_back: if true (default), write back will be done on evict.
  9 | #                     Otherwise write-through is used. If false, write through
 10 | # :param write_allocate: if true (default), a load will be issued on a
 11 | #                         write miss. If false, write no allocate
 12 | # :param write_combining: if true, this cache will combine writes and
 13 | #                         issue them on evicts(default is false)
 14 | # :param subblock_size: the minimum blocksize that write-combining can
 15 | #                         handle
 16 | # :param load_from: the cache level to forward a load in case of a load
 17 | #                     miss or write-allocate, if None, assumed to be main
 18 | #                     memory
 19 | # :param store_to: the cache level to forward a store to in case of
 20 | #                     eviction of dirty lines, if None, assumed to be main
 21 | #                     memory
 22 | # :param victims_to: the cache level to forward any evicted lines to
 23 | #                     (dirty or not)
 24 | 
 25 | # The total cache size is the product of sets*ways*cl_size.
 26 | # Internally all addresses are converted to cacheline indices.
 27 | 
 28 | # Instantization has to happen from last level cache to first level
 29 | # cache, since each subsequent level requires a reference of the other
 30 | # level.
 31 | # '''
 32 | 
 33 | # YAML without l2 cache congifuration
 34 | 
 35 | profiles:
 36 |   cfg:
 37 |     isa: RV64IMFDCZicsr
 38 |     metrics:   # Metrics such as grouping instructs by operation and privledge mode are hard coded in the profiler.py file as the input to these functions is the commit log. 
 39 |       branch_ops:
 40 |         group_by_branch_offset:
 41 |           - Grouping Branchs by Offset Size.
 42 |         group_by_branch_sign:
 43 |           - Grouping Branchs by Direction.
 44 |         loop_compute:
 45 |           - Nested loop computation.
 46 |       register_compute:
 47 |         register_compute:
 48 |           - Register computation.
 49 |         fregister_compute:
 50 |           - Floating point register computation.
 51 |       jumps_ops:
 52 |         jumps_compute:
 53 |           - Grouping jumps by jump direction.
 54 |         jump_size:
 55 |           - Grouping jumps by jump size.
 56 |       cache:
 57 |         data_cache_simulator:
 58 |           - Data cache utilization.
 59 |         instruction_cache_simulator:
 60 |           - Instruction cache utilization.
 61 |       dependency:
 62 |         raw_compute:
 63 |           - RAW dependency Computation.
 64 |       csr_compute:
 65 |         csr_compute:
 66 |           - CSR computation.
 67 |       store_load_bypass:
 68 |         store_load_bypass:
 69 |           - Store load bypass Computation.
 70 |       pattern:
 71 |         group_by_pattern:
 72 |           - Pattern computation.
 73 |     commitlog_regex : '^core\s+\d+:\s+(\d*)\s+(0x[0-9a-fA-F]+)\s+\((0x[0-9a-fA-F]+)\)\s*(x[0-9]*)?(c[0-9]+[_a-z]*)?(mem)?\s*(0x[0-9a-fA-F]*)?\s*(x[0-9]*)?(c[0-9]+[_a-z]*)?(mem)?\s*(0x[0-9a-fA-F]*)?\s*(x[0-9]*)?(c[0-9]+[_a-z]*)?(mem)?\s*(0x[0-9a-fA-F]*)?'
 74 |     privilege_mode_regex : '^core\s+\d+:\s+(\d+)'
 75 |     uarch:
 76 |       bypass_depth: 2
 77 |     data_cache:
 78 |       range:
 79 |         start: 0x80000000
 80 |         end: 0x8fffffff
 81 |       no_of_sets : 8
 82 |       no_of_ways : 4
 83 |       line_size : 32
 84 |       replacement_policy: "FIFO" #round robin
 85 |       write_back : True # false for write through
 86 |       write_allocate : True # false for write no allocate
 87 |       structural_latency: 7 #number of words - 1
 88 |     instr_cache:
 89 |       range:
 90 |         start: 0x80000000
 91 |         end: 0x8fffffff
 92 |       no_of_sets : 16
 93 |       no_of_ways : 2
 94 |       line_size : 32
 95 |       replacement_policy: "LFSR"
 96 |       write_back : True # false for write through
 97 |       write_allocate : True # false for write no allocate
 98 |       structural_latency: 7 #number of words - 1
 99 | 
100 |   # interface:
101 |     # L1: cache_line
102 |     # L2: 32 bits


--------------------------------------------------------------------------------
/riscv_application_profiler/plugins/csr_compute.py:
--------------------------------------------------------------------------------
  1 | from riscv_isac.log import *
  2 | from riscv_application_profiler.consts import *
  3 | import riscv_application_profiler.consts as consts
  4 | 
  5 | def csr_compute(master_inst_dict: dict, ops_dict: dict, extension_used: list, config, cycle_accurate_config):
  6 |     '''
  7 |     Computes the number of reads and writes to each CSR.
  8 |     
  9 |     Args:
 10 |         - master_inst_dict: A dictionary of InstructionEntry objects.
 11 |         - ops_dict: A dictionary containing the operations as keys and a list of InstructionEntry objects as values.
 12 |         - extension_used: A list of extensions used in the application.
 13 |         - config: A yaml with the configuration information.
 14 |         - cycle_accurate_config: A dyaml with the cycle accurate configuration information.
 15 |     
 16 |     Returns:
 17 |         - A dictionary with the CSR names as keys and a list of reads and writes as values.
 18 |     '''
 19 |     
 20 |     # Initialize dictionaries and lists
 21 |     csr = {}
 22 |     csr_reg_list = []
 23 |     ret_dict = {'CSR': [], 'Reads': [], 'Writes': []}
 24 |     prev_inst_csr = None
 25 | 
 26 |     # Logging the CSR computation process
 27 |     logger.info("Computing CSRs.")
 28 |     for entry in master_inst_dict:
 29 |     # Loop through CSR instructions
 30 |         if entry in ops_dict['csrs']:
 31 |             # If no CSR value is specified
 32 |             if entry.csr is None:
 33 |                 if 'f' in entry.instr_name:
 34 |                     csr_reg = entry.instr_name[0] + entry.instr_name[2:]
 35 |                     
 36 |                     if csr_reg not in csr_reg_list:
 37 |                         # Create a new entry for the CSR
 38 |                         csr_reg_list.append(csr_reg)
 39 |                         csr[csr_reg] = {'read': 0, 'write': 0}
 40 |                     
 41 |                     # Update read/write counts for the corresponding CSR
 42 |                     if 'fr' in entry.instr_name:
 43 |                         csr[csr_reg]['read'] += 1
 44 |                     elif 'fs' in entry.instr_name:
 45 |                         csr[csr_reg]['write'] += 1
 46 |                         #for flushing pipe checking if write is happening to CSR register
 47 |                         prev_inst_csr = csr[csr_reg]
 48 |             # If a CSR value is specified
 49 |             else:
 50 |                 csr_hex = hex(entry.csr)
 51 |                 csr_reg = consts.csr_file.get(csr_hex)
 52 |                 
 53 |                 if csr_reg is not None and csr_reg not in csr_reg_list:
 54 |                     # Create a new entry for the CSR
 55 |                     csr_reg_list.append(csr_reg)
 56 |                     csr[csr_reg] = {'read': 0, 'write': 0}
 57 |                 
 58 |                 if csr_reg is not None:
 59 |                     # Update read/write counts for the corresponding CSR
 60 |                     if 'rw' in entry.instr_name:
 61 |                         rd = str(entry.rd[1]) + str(entry.rd[0])
 62 |                         if rd == 'x0':
 63 |                             csr[csr_reg]['write'] += 1
 64 |                             prev_inst_csr = csr[csr_reg]
 65 |                         else:
 66 |                             csr[csr_reg]['read'] += 1
 67 |                             csr[csr_reg]['write'] += 1
 68 |                             prev_inst_csr = csr[csr_reg]
 69 |                     elif 'rs' in entry.instr_name or 'rc' in entry.instr_name:
 70 |                         if entry.rs1 is not None:
 71 |                             rs1 = str(entry.rs1[1]) + str(entry.rs1[0])
 72 |                         else:
 73 |                             rs1 = None
 74 |                         if entry.imm == 0 or rs1 == 'x0':
 75 |                             csr[csr_reg]['read'] += 1
 76 |                         else:
 77 |                             csr[csr_reg]['read'] += 1
 78 |                             csr[csr_reg]['write'] += 1
 79 |                             prev_inst_csr = csr[csr_reg]
 80 |         elif cycle_accurate_config != None:
 81 |             # if there's a writing to a csr instr, then we have to flush the pipe
 82 |             # so we have to add those flush instr to the next instruction 
 83 |             if prev_inst_csr != None:
 84 |                 for op in ops_dict.keys():
 85 |                     if entry in ops_dict[op]:
 86 |                         ops_dict[op][entry] = ops_dict[op][entry] + cycle_accurate_config['cycles']['flush_cycles']['csr']
 87 |                         master_inst_dict[entry] = ops_dict[op][entry]
 88 |                         prev_inst_csr = None
 89 |     # Populate the ret_dict with CSR information
 90 |     for entry in csr_reg_list:
 91 |         ret_dict['CSR'].append(entry)
 92 |         ret_dict['Reads'].append(csr[entry]['read'])
 93 |         ret_dict['Writes'].append(csr[entry]['write'])
 94 | 
 95 |     logger.info("Done.")
 96 | 
 97 |     # Return the final results
 98 |     return ret_dict
 99 | 
100 | 


--------------------------------------------------------------------------------
/riscv_application_profiler/plugins/pattern.py:
--------------------------------------------------------------------------------
  1 | from riscv_isac.log import *
  2 | from riscv_application_profiler.consts import *
  3 | import re
  4 | from riscv_application_profiler import consts
  5 | 
  6 | def group_by_pattern(master_inst_dict: dict, ops_dict: dict, extension_used: list, config, cycle_accurate_config):
  7 |     '''
  8 |     Groups instructions based on the operation.
  9 | 
 10 |     Args:
 11 |         - master_inst_dict: A dictionary of InstructionEntry objects.
 12 |         - ops_dict: A dictionary containing the operations as keys and a list of InstructionEntry objects as values.
 13 |         - extension_used: A list of extensions used in the application.
 14 |         - config: A yaml with the configuration information.
 15 |         - cycle_accurate_config: A dyaml with the cycle accurate configuration information.
 16 | 
 17 | 
 18 |     Returns:
 19 |         - A dictionary with pattern counts, instructions, PC, cycles, and cycles reduced as keys and lists of values.
 20 |     '''
 21 |     # Log the start of the process for getting the pattern.
 22 |     logger.info("Getting Pattern.")
 23 | 
 24 | 
 25 |     # Initialize dictionaries to hold address counts, patterns, names, and cycle information.
 26 |     count_dict = {}
 27 |     pattern_dict = {}
 28 |     address_name_dict = {}
 29 |     address_pc_dict = {}
 30 |     address_cycle_dict = {}
 31 |     prev = None
 32 |     # Loop through each entry in the master_inst_dict.
 33 |     for entry in master_inst_dict:
 34 |         name = entry.instr_name+ ' '
 35 |         if entry.rs1 is not None:
 36 |             name=name+'rs1: '+str(entry.rs1[1])+str(entry.rs1[0])+' '
 37 |         if entry.rs2 is not None:
 38 |             name=name+'rs2: '+str(entry.rs2[1])+str(entry.rs2[0])+' '
 39 |         if entry.rd is not None:
 40 |             name=name+'rd: '+str(entry.rd[1])+str(entry.rd[0])+' '
 41 |         # Check if the instruction address is not in the count_dict.
 42 |         if hex(entry.instr_addr) not in count_dict:
 43 |             count_dict[hex(entry.instr_addr)] = 0
 44 |             address_name_dict[hex(entry.instr_addr)] = name
 45 |             address_pc_dict[hex(entry.instr_addr)] = hex(entry.instr_addr)
 46 |             address_cycle_dict[hex(entry.instr_addr)] = master_inst_dict[entry]
 47 |         count_dict[hex(entry.instr_addr)] += 1
 48 | 
 49 |     # Group instructions based on their occurrence count.
 50 |     for entry in count_dict:
 51 |         if count_dict[entry] not in pattern_dict:
 52 |             pattern_dict[count_dict[entry]] = list()
 53 |         pattern_dict[count_dict[entry]].append(entry)
 54 |     if 1 in pattern_dict:
 55 |         del pattern_dict[1]
 56 | 
 57 |     # Sort the patterns by occurrence count in descending order.
 58 |     sort_count_list = sorted(pattern_dict.items(), key=lambda x: x[0], reverse=True)
 59 | 
 60 |     # Remove single instructions or patterns with count 1.
 61 |     sort_count_list = [entry for entry in sort_count_list if len(entry[1]) > 1]
 62 | 
 63 |     # Initialize a dictionary to store sorted pattern information.
 64 |     s_dict = {'count': [], 'instr': [], 'PC': [], 'cycles': [], 'cycles_reduced': []}
 65 | 
 66 |     # Process sorted patterns.
 67 |     for entry in sort_count_list:
 68 |         adj_inst = [address_name_dict[entry[1][0]]]
 69 |         adj_pc = [address_pc_dict[entry[1][0]]]
 70 |         adj_cycles = [address_cycle_dict[entry[1][0]]]
 71 |         prev = entry[1][0]
 72 |         for i in entry[1][1:]:
 73 |             # Check if the difference between addresses is 4 or 2.
 74 |             if (int(i, 16) - int(prev, 16)) == 4 or (int(i, 16) - int(prev, 16)) == 2:
 75 |                 adj_inst.append(address_name_dict[i])
 76 |                 adj_pc.append(address_pc_dict[i])
 77 |                 adj_cycles.append(address_cycle_dict[i])
 78 |             elif len(adj_inst) > 1:
 79 |                 # Store the current pattern information.
 80 |                 # if adj_cycles in s_dict['cycles']:
 81 |                 #     continue
 82 |                 s_dict['instr'].append(adj_inst)
 83 |                 s_dict['PC'].append(adj_pc)
 84 |                 s_dict['cycles'].append(adj_cycles)
 85 |                 s_dict['count'].append(entry[0])
 86 |                 adj_inst = [address_name_dict[i]]
 87 |                 adj_pc = [i]
 88 |                 adj_cycles = [address_cycle_dict[i]]
 89 |             prev = i
 90 |         if len(adj_inst) > 1:
 91 |             s_dict['count'].append(entry[0])
 92 |             s_dict['cycles'].append(adj_cycles)
 93 |             s_dict['instr'].append(adj_inst)
 94 |             s_dict['PC'].append(adj_pc)
 95 | 
 96 |     # Calculate improved performance for each pattern.
 97 |     for i in range(len(s_dict['count'])):
 98 |         imp_performance = s_dict['count'][i] * (sum(s_dict['cycles'][i]) - 1)
 99 |         s_dict['cycles_reduced'].append(imp_performance)
100 | 
101 |     # Log the completion of pattern computation.
102 |     logger.info("Pattern computed.")
103 |     # Return the computed pattern information.
104 |     return s_dict


--------------------------------------------------------------------------------
/sample_configs/profiler_config/L2_configs/config.yaml:
--------------------------------------------------------------------------------
  1 | 
  2 | # '''Create one cache level out of given configuration.
  3 | 
  4 | # :param sets: total number of sets, if 1 cache will be full-associative
  5 | # :param ways: total number of ways, if 1 cache will be direct mapped
  6 | # :param cl_size: number of bytes that can be addressed individually
  7 | # :param replacement_policy: FIFO, LRU (default), MRU or RR
  8 | # :param write_back: if true (default), write back will be done on evict.
  9 | #                     Otherwise write-through is used.If false, write through
 10 | # :param write_allocate: if true (default), a load will be issued on a
 11 | #                         write miss. If false, write no allocate
 12 | # :param write_combining: if true, this cache will combine writes and
 13 | #                         issue them on evicts(default is false)
 14 | # :param subblock_size: the minimum blocksize that write-combining can
 15 | #                         handle
 16 | # :param load_from: the cache level to forward a load in case of a load
 17 | #                     miss or write-allocate, if None, assumed to be main
 18 | #                     memory
 19 | # :param store_to: the cache level to forward a store to in case of
 20 | #                     eviction of dirty lines, if None, assumed to be main
 21 | #                     memory
 22 | # :param victims_to: the cache level to forward any evicted lines to
 23 | #                     (dirty or not)
 24 | 
 25 | # The total cache size is the product of sets*ways*cl_size.
 26 | # Internally all addresses are converted to cacheline indices.
 27 | 
 28 | # Instantization has to happen from last level cache to first level
 29 | # cache, since each subsequent level requires a reference of the other
 30 | # level.
 31 | # '''
 32 | 
 33 | # YAML with l2 cache congifuration
 34 | 
 35 | profiles:
 36 |   cfg:
 37 |     isa: RV64IMFDCZicsr
 38 |     metrics:    # Metrics such as grouping instructs by operation and privledge mode are hard coded in the profiler.py file as the input to these functions is the commit log. However, the metrics such as grouping instructs by CSRs and cache computation is mandatory for a cycle accurate profiling.
 39 |       branch_ops:
 40 |         group_by_branch_offset:
 41 |           - Grouping Branchs by Offset Size.
 42 |         group_by_branch_sign:
 43 |           - Grouping Branchs by Direction.
 44 |         loop_compute:
 45 |           - Nested loop computation.
 46 |       register_compute:
 47 |         register_compute:
 48 |           - Register computation.
 49 |         fregister_compute:
 50 |           - Floating point register computation.
 51 |       jumps_ops:
 52 |         jumps_compute:
 53 |           - Grouping jumps by jump direction.
 54 |         jump_size:
 55 |           - Grouping jumps by jump size.
 56 |       dependency:
 57 |         raw_compute:
 58 |           - RAW dependency Computation.
 59 |       csr_compute: 
 60 |         csr_compute:
 61 |           - CSR computation.
 62 |       cache:
 63 |         data_cache_simulator:
 64 |           - Data cache utilization.
 65 |         instruction_cache_simulator:
 66 |           - Instruction cache utilization.
 67 |       # cache:
 68 |       #   unified_L2_cache_simulator:
 69 |       #     - Cache utilization.
 70 |       store_load_bypass:
 71 |         store_load_bypass:
 72 |           - Store load bypass Computation.
 73 |       pattern:
 74 |         group_by_pattern:
 75 |           - Pattern computation.
 76 |     commitlog_regex : '^core\s+\d+:\s+(\d*)\s+(0x[0-9a-fA-F]+)\s+\((0x[0-9a-fA-F]+)\)\s*(x[0-9]*)?(c[0-9]+[_a-z]*)?(mem)?\s*(0x[0-9a-fA-F]*)?\s*(x[0-9]*)?(c[0-9]+[_a-z]*)?(mem)?\s*(0x[0-9a-fA-F]*)?\s*(x[0-9]*)?(c[0-9]+[_a-z]*)?(mem)?\s*(0x[0-9a-fA-F]*)?'
 77 |     privilege_mode_regex : '^core\s+\d+:\s+(\d+)'
 78 |     uarch:
 79 |       bypass_depth: 2
 80 |     data_cache:
 81 |       range:
 82 |         start: 0x80000000
 83 |         end: 0x8fffffff
 84 |       no_of_sets : 64
 85 |       no_of_ways : 8
 86 |       line_size : 32
 87 |       replacement_policy: "FIFO" #round robin
 88 |       write_back : True # false for write through
 89 |       write_allocate : True # false for write no allocate
 90 |       structural_latency: 7 #number of words - 1
 91 |     instr_cache:
 92 |       range:
 93 |         start: 0x80000000
 94 |         end: 0x8fffffff
 95 |       no_of_sets : 32
 96 |       no_of_ways : 2
 97 |       line_size : 32
 98 |       replacement_policy: "LFSR"
 99 |       write_back : True # false for write through
100 |       write_allocate : True # false for write no allocate
101 |       structural_latency: 7 #number of words - 1
102 |     l2_cache:
103 |       range:
104 |         start: 0x80000000
105 |         end: 0x8fffffff
106 |       no_of_sets : 32
107 |       no_of_ways : 2
108 |       line_size : 32
109 |       replacement_policy: "LFSR"
110 |       write_back : True # false for write through
111 |       write_allocate : True # false for write no allocate
112 |       structural_latency: 7 #number of words - 1
113 | 
114 |   # interface:
115 |     # L1: cache_line
116 |     # L2: 32 bits


--------------------------------------------------------------------------------
/riscv_application_profiler/plugins/dependency.py:
--------------------------------------------------------------------------------
  1 | from riscv_isac.log import *
  2 | from riscv_application_profiler.consts import *
  3 | import riscv_application_profiler.consts as consts
  4 | import statistics
  5 | 
  6 | def raw_compute(master_inst_dict: list, ops_dict: dict, extension_used: list, config, cycle_accurate_config):
  7 |     '''
  8 |     Groups instructions based on the branch offset.
  9 | 
 10 |     Args:
 11 |         - master_inst_dict: A dictonary of InstructionEntry objects.
 12 |         - ops_dict: A dictionary with the operations as keys and a list of InstructionEntry objects as values.
 13 |         - extension_used: A list of extensions used in the application.
 14 |         - config: A yaml with the configuration information.
 15 |         - cycle_accurate_config: A dyaml with the cycle accurate configuration information.
 16 | 
 17 |     Returns:
 18 |         - A dictionary with the operations as keys and a list of InstructionEntry objects as values.
 19 |     '''
 20 |     # Initialize the process of computing register reads after writes.
 21 |     logger.info("Computing register reads after writes.")
 22 | 
 23 |     # Get a list of all registers in the register file.
 24 |     reg_list = list(consts.reg_file.keys()) + list(consts.freg_file.keys())
 25 | 
 26 |     # Initialize a dictionary to hold register information, initially all with a depth of 1.
 27 |     regs = {i: {'depth': 1} for i in reg_list}
 28 | 
 29 |     # Initialize dictionaries to store results and raw data.
 30 |     ret_dict = {'Instructions': [], 'Depth': [], 'Count': []}
 31 |     raw = {}
 32 | 
 33 |     # Initialize a list to store combined instructions.
 34 |     instruction_list = []
 35 | 
 36 |     # Initialize a list to store names of previously encountered registers.
 37 |     prev_names = []
 38 | 
 39 |     # Iterate through the list of instructions in master_inst_dict.
 40 |     for entry in master_inst_dict:
 41 |         # Check if the instruction uses rs1 register.
 42 |         if entry.rs1 is not None:
 43 |             name = str(entry.rs1[1]) + str(entry.rs1[0])
 44 |             instr = str(entry.instr_name)
 45 |             
 46 |             # Check if this register name was encountered before.
 47 |             if name in prev_names:
 48 |                 instruction = prev_instr + '  ' + instr
 49 |                 
 50 |                 # Check if the combined instruction is in raw data.
 51 |                 if instruction in raw:
 52 |                     # Check if the register depth matches raw depth.
 53 |                     if regs[name]['depth'] == raw[instruction]['depth']:
 54 |                         raw[instruction]['count'] += 1
 55 |                         prev_names.remove(name)
 56 |                         regs[name]['depth'] = 1
 57 |                 else:
 58 |                     raw[instruction] = {'depth': regs[name]['depth'], 'count': 1}
 59 |                     instruction_list.append(instruction)
 60 |                     prev_names.remove(name)
 61 |                     regs[name]['depth'] = 1
 62 |             else:
 63 |                 regs[name]['depth'] += 1
 64 |         
 65 |         # Check if the instruction uses rs2 register.
 66 |         if entry.rs2 is not None:
 67 |             name = str(entry.rs2[1]) + str(entry.rs2[0])
 68 |             instr = str(entry.instr_name)
 69 |             
 70 |             # Check if this register name was encountered before.
 71 |             if name in prev_names:
 72 |                 instruction = prev_instr + '  ' + instr
 73 |                 
 74 |                 # Check if the combined instruction is in raw data.
 75 |                 if instruction in raw:
 76 |                     # Check if the register depth matches raw depth.
 77 |                     if regs[name]['depth'] == raw[instruction]['depth']:
 78 |                         raw[instruction]['count'] += 1
 79 |                         prev_names.remove(name)
 80 |                         regs[name]['depth'] = 1
 81 |                 else:
 82 |                     raw[instruction] = {'depth': regs[name]['depth'], 'count': 1}
 83 |                     instruction_list.append(instruction)
 84 |                     prev_names.remove(name)
 85 |                     regs[name]['depth'] = 1
 86 |             else:
 87 |                 regs[name]['depth'] += 1
 88 |         
 89 |         # Check if the instruction defines a destination register (rd).
 90 |         if entry.rd is not None:
 91 |             name = str(entry.rd[1]) + str(entry.rd[0])
 92 |             prev_instr = str(entry.instr_name)
 93 |             
 94 |             # Check if this register name was encountered before.
 95 |             if name not in prev_names:
 96 |                 prev_names.append(name)
 97 |             else:
 98 |                 regs[name]['depth'] = 1
 99 | 
100 |     # Populate the result dictionary from raw data.
101 | 
102 |     if cycle_accurate_config != None:
103 |         for entry in raw:
104 |             if raw[entry]['depth'] < int(cycle_accurate_config['cycles']['pipeline_depth']):
105 |                 if cycle_accurate_config['cycles']['bypass_latency'] == None:
106 |                     ret_dict['Instructions'].append(entry)
107 |                     ret_dict['Count'].append(raw[entry]['count'])
108 |                     ret_dict['Depth'].append(raw[entry]['depth'])
109 |     else:
110 |         for entry in raw:
111 |             ret_dict['Instructions'].append(entry)
112 |             ret_dict['Count'].append(raw[entry]['count'])
113 |             ret_dict['Depth'].append(raw[entry]['depth'])
114 | 
115 |     # Log the completion of the computation.
116 |     logger.info("Done")
117 | 
118 |     # Return the result dictionary.
119 |     return ret_dict


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # RISC-V Application Profiler
  2 | 
  3 | The RISC-V Application Profiler is a Python-based tool designed to help software developers optimize the performance of their applications on RISC-V hardware. It works by parsing execution logs and providing insights about the application's behavior. The tool has a modular design, where performance metrics are added as plugins, allowing developers to customize the profiler to their specific needs. The profiler is highly accessible, easy to use, and can be adapted to capture different types of performance metrics. Overall, the RISC-V Application Profiler is a flexible and customizable solution for software developers who want to ensure optimal performance of their applications on RISC-V platforms.
  4 | 
  5 | Detailed documentation can be found [here](https://riscv-application-profiler.readthedocs.io/en/latest/).
  6 | 
  7 | ## Installation
  8 | 
  9 | Install `pycachesim`. This is a requirement to use the `caches` plugin in the profiler.
 10 | 
 11 | ```shell
 12 | git clone https://github.com/mahendraVamshi/pycachesim.git
 13 | cd pycachesim
 14 | pip install -e .
 15 | cd ..
 16 | ```
 17 | 
 18 | Install `riscv_isac`. This is a development version of isac.
 19 | ```shell
 20 | git clone https://github.com/mahendraVamshi/riscv-isac.git
 21 | cd riscv-isac
 22 | pip install -e .
 23 | cd ..
 24 | ```
 25 | 
 26 | Finally, install the profiler itself.
 27 | ```shell
 28 | git clone https://github.com/mahendraVamshi/riscv-application-profiler.git
 29 | cd riscv-application-profiler
 30 | pip install -e .
 31 | ```
 32 | 
 33 | ## Usage
 34 | 
 35 | To display the help message, run:
 36 | ```shell
 37 | riscv_application_profiler --help
 38 | riscv_application_profiler profile --help
 39 | ```
 40 | 
 41 | To generate a log file, run:
 42 | ```shell
 43 | spike --log-commits <path-to-binary>
 44 | ```
 45 | 
 46 | **NOTE**: You need to use ``--enable-commitlog`` while configuring [spike](https://github.com/riscv-software-src/riscv-isa-sim#build-steps).
 47 | 
 48 | To profile an application, run:
 49 | ```shell
 50 | riscv_application_profiler profile --log <path-to-log> --output <path-to-output-directory> --config <path-to-config-file> config.yaml
 51 | ```
 52 | To profile an application with cycle accurate simulation, run:
 53 | ```shell
 54 | riscv_application_profiler profile --log <path-to-log> --output <path-to-output-directory> --config <path-to-config-file> config.yaml --cycle_accurate_config <path-to-config-file> config.yaml
 55 | ```
 56 | **Info**:
 57 | 
 58 | Path to the log file is mandatory. Example log files can be found in the `sample_artifacts/logs` directory.
 59 | 
 60 | Path to the output directory is optional. If not provided, the profiler will create a directory named `build` in the current working directory.
 61 | 
 62 | Path to the config file is mandatory. Example `config.yaml` is located in `sample_configs/profiler_config` directory. L2 cache config files are located in `sample_configs/profiler_config/L2_configs` directory. 
 63 | 
 64 | Path to the cycle accurate config file is optional. Example `config.yaml` is located in `sample_configs/cycle_accurate` directory. Use this option only if you want to profile an application with cycle accurate simulation. L2 cache config files are located in `sample_configs/cycle_accurate/L2_configs` directory.
 65 | 
 66 | Command line options to the `profile` command:
 67 | 
 68 | ```text
 69 | Options:
 70 |   -l, --log TEXT                  This option expects the path to an execution
 71 |                                   log.  [required]
 72 |   -o, --output TEXT               Path to the output file.  [default: ./build]
 73 |   -c, --config TEXT               Path to the YAML configuration file.
 74 |                                   [required]
 75 |   -ca, --cycle_accurate_config TEXT
 76 |                                   Path to the YAML cycle accurate
 77 |                                   configuration file.
 78 |   -v, --verbose [info|error|debug]
 79 |                                   Set verbose level
 80 |   --help                          Show this message and exit.
 81 | ```
 82 | 
 83 | Example:
 84 | 
 85 | To profile an application, run:
 86 | 
 87 | ```shell
 88 | riscv_application_profiler profile --log ./sample_artifacts/logs/hello.log --output ./build --config ./sample_configs/profiler_config/config.yaml   
 89 | ```
 90 | To profile an application with cycle accurate simulation, run:
 91 | 
 92 | ```shell
 93 | riscv_application_profiler profile --log ./sample_artifacts/logs/hello.log --output ./build --config ./sample_configs/profiler_config/L2_configs/config.yaml --cycle_accurate_config ./sample_configs/cycle_accurate/L2_configs/config.yaml 
 94 | ```
 95 | 
 96 | **Note**: The log file should be an execution log generated using spike as of today. Support for configuring log formats will be added in the future.
 97 | 
 98 | **Note**: Metrics such as grouping instructs by operation and privledge mode are hard coded in the profiler.py file as the input to these functions is the commit log. However, the metrics such as grouping instructs by CSRs and cache computation is mandatory for a cycle accurate profiling.
 99 | 
100 | ## Features
101 | 
102 | The profiler supports the following list of features as plugins:
103 | 
104 | Grouping instructions by:
105 | - Type of operation performed.
106 | - Privilege mode used for execution.
107 | - Directions and Sizes (for jumps/branches).
108 | 
109 | Lists:
110 | - Presence of Nested Loops.
111 | - Store-Load bypass.
112 | - Presence of RAW dependencies.
113 | - Pattern detection for custom instructions.
114 | 
115 | Histogram for:
116 | - RegisterFile (XRF/FRF) usage.
117 | - CSR accesses.
118 | - D$/I$ Hits/Misses/Usage/Utilization. 
119 | - Unifed L2 Cache Hits/Misses/Usage/Utilization.
120 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # Contributor Covenant Code of Conduct
  2 | 
  3 | ## Our Pledge
  4 | 
  5 | We as members, contributors, and leaders pledge to make participation in our
  6 | community a harassment-free experience for everyone, regardless of age, body
  7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  8 | identity and expression, level of experience, education, socio-economic status,
  9 | nationality, personal appearance, race, religion, or sexual identity
 10 | and orientation.
 11 | 
 12 | We pledge to act and interact in ways that contribute to an open, welcoming,
 13 | diverse, inclusive, and healthy community.
 14 | 
 15 | ## Our Standards
 16 | 
 17 | Examples of behavior that contributes to a positive environment for our
 18 | community include:
 19 | 
 20 | * Demonstrating empathy and kindness toward other people
 21 | * Being respectful of differing opinions, viewpoints, and experiences
 22 | * Giving and gracefully accepting constructive feedback
 23 | * Accepting responsibility and apologizing to those affected by our mistakes,
 24 |   and learning from the experience
 25 | * Focusing on what is best not just for us as individuals, but for the
 26 |   overall community
 27 | 
 28 | Examples of unacceptable behavior include:
 29 | 
 30 | * The use of sexualized language or imagery, and sexual attention or
 31 |   advances of any kind
 32 | * Trolling, insulting or derogatory comments, and personal or political attacks
 33 | * Public or private harassment
 34 | * Publishing others' private information, such as a physical or email
 35 |   address, without their explicit permission
 36 | * Other conduct which could reasonably be considered inappropriate in a
 37 |   professional setting
 38 | 
 39 | ## Enforcement Responsibilities
 40 | 
 41 | Community leaders are responsible for clarifying and enforcing our standards of
 42 | acceptable behavior and will take appropriate and fair corrective action in
 43 | response to any behavior that they deem inappropriate, threatening, offensive,
 44 | or harmful.
 45 | 
 46 | Community leaders have the right and responsibility to remove, edit, or reject
 47 | comments, commits, code, wiki edits, issues, and other contributions that are
 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 49 | decisions when appropriate.
 50 | 
 51 | ## Scope
 52 | 
 53 | This Code of Conduct applies within all community spaces, and also applies when
 54 | an individual is officially representing the community in public spaces.
 55 | Examples of representing our community include using an official e-mail address,
 56 | posting via an official social media account, or acting as an appointed
 57 | representative at an online or offline event.
 58 | 
 59 | ## Enforcement
 60 | 
 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 62 | reported to the community leaders responsible for enforcement at
 63 | .
 64 | All complaints will be reviewed and investigated promptly and fairly.
 65 | 
 66 | All community leaders are obligated to respect the privacy and security of the
 67 | reporter of any incident.
 68 | 
 69 | ## Enforcement Guidelines
 70 | 
 71 | Community leaders will follow these Community Impact Guidelines in determining
 72 | the consequences for any action they deem in violation of this Code of Conduct:
 73 | 
 74 | ### 1. Correction
 75 | 
 76 | **Community Impact**: Use of inappropriate language or other behavior deemed
 77 | unprofessional or unwelcome in the community.
 78 | 
 79 | **Consequence**: A private, written warning from community leaders, providing
 80 | clarity around the nature of the violation and an explanation of why the
 81 | behavior was inappropriate. A public apology may be requested.
 82 | 
 83 | ### 2. Warning
 84 | 
 85 | **Community Impact**: A violation through a single incident or series
 86 | of actions.
 87 | 
 88 | **Consequence**: A warning with consequences for continued behavior. No
 89 | interaction with the people involved, including unsolicited interaction with
 90 | those enforcing the Code of Conduct, for a specified period of time. This
 91 | includes avoiding interactions in community spaces as well as external channels
 92 | like social media. Violating these terms may lead to a temporary or
 93 | permanent ban.
 94 | 
 95 | ### 3. Temporary Ban
 96 | 
 97 | **Community Impact**: A serious violation of community standards, including
 98 | sustained inappropriate behavior.
 99 | 
100 | **Consequence**: A temporary ban from any sort of interaction or public
101 | communication with the community for a specified period of time. No public or
102 | private interaction with the people involved, including unsolicited interaction
103 | with those enforcing the Code of Conduct, is allowed during this period.
104 | Violating these terms may lead to a permanent ban.
105 | 
106 | ### 4. Permanent Ban
107 | 
108 | **Community Impact**: Demonstrating a pattern of violation of community
109 | standards, including sustained inappropriate behavior,  harassment of an
110 | individual, or aggression toward or disparagement of classes of individuals.
111 | 
112 | **Consequence**: A permanent ban from any sort of public interaction within
113 | the community.
114 | 
115 | ## Attribution
116 | 
117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
118 | version 2.0, available at
119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
120 | 
121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
122 | enforcement ladder](https://github.com/mozilla/diversity).
123 | 
124 | [homepage]: https://www.contributor-covenant.org
125 | 
126 | For answers to common questions about this code of conduct, see the FAQ at
127 | https://www.contributor-covenant.org/faq. Translations are available at
128 | https://www.contributor-covenant.org/translations.
129 | 


--------------------------------------------------------------------------------
/docs/intro.rst:
--------------------------------------------------------------------------------
  1 | RISC-V Application Profiler
  2 | ===========================
  3 | 
  4 | The RISC-V Application Profiler is a Python-based tool designed to help software developers optimize the performance of their applications on RISC-V hardware. It works by parsing execution logs and providing insights about the application's behavior. The tool has a modular design, where performance metrics are added as plugins, allowing developers to customize the profiler to their specific needs. The profiler is highly accessible, easy to use, and can be adapted to capture different types of performance metrics. Overall, the RISC-V Application Profiler is a flexible and customizable solution for software developers who want to ensure optimal performance of their applications on RISC-V platforms.
  5 | 
  6 | Installation
  7 | ------------
  8 | 
  9 | Install `pycachesim`. This is a requirement to use the `caches` plugin in the profiler.
 10 | 
 11 | .. code-block:: shell
 12 | 
 13 |    git clone https://github.com/mahendraVamshi/pycachesim.git
 14 |    cd pycachesim
 15 |    pip install -e .
 16 |    cd ..
 17 | 
 18 | Install `riscv_isac`. This is a development version of isac.
 19 | 
 20 |    git clone https://github.com/mahendraVamshi/riscv-isac.git
 21 |    cd riscv-application-profiler
 22 |    pip install -e .
 23 |    cd ..
 24 | 
 25 | Finally, install the profiler itself.
 26 | 
 27 |    git clone https://github.com/mahendraVamshi/riscv-application-profiler.git
 28 |    cd riscv-application-profiler
 29 |    pip install -e .
 30 | 
 31 | Usage
 32 | -----
 33 | 
 34 | To display the help message, run:
 35 | 
 36 | .. code-block:: shell
 37 | 
 38 |    riscv_application_profiler --help
 39 |    riscv_application_profiler profile --help
 40 | 
 41 | To generate a log file, run:
 42 | 
 43 | .. code-block:: shell
 44 | 
 45 |    spike --log-commits <path-to-binary>
 46 | 
 47 | **NOTE**: You need to use `--enable-commitlog` while configuring `spike` (https://github.com/riscv-software-src/riscv-isa-sim#build-steps).
 48 | 
 49 | To profile an application, run:
 50 | 
 51 | .. code-block:: shell
 52 | 
 53 |    riscv_application_profiler profile --log <path-to-log> --output <path-to-output-directory> --config <path-to-config-file> config.yaml
 54 | 
 55 | To profile an application with cycle accurate simulation, run:
 56 | 
 57 | .. code-block:: shell
 58 | 
 59 |    riscv_application_profiler profile --log <path-to-log> --output <path-to-output-directory> --config <path-to-config-file> config.yaml --cycle_accurate_config <path-to-config-file> config.yaml
 60 | 
 61 | **Info**:
 62 | 
 63 | Path to the log file is mandatory. Example log files can be found in the `sample_artifacts/logs` directory.
 64 | 
 65 | Path to the output directory is optional. If not provided, the profiler will create a directory named `build` in the current working directory.
 66 | 
 67 | Path to the config file is mandatory. Example `config.yaml` is located in `sample_configs/profiler_config` directory. L2 cache config files are located in `sample_configs/profiler_config/L2_configs` directory. 
 68 | 
 69 | Path to the cycle accurate config file is optional. Example `config.yaml` is located in `sample_configs/cycle_accurate` directory. Use this option only if you want to profile an application with cycle accurate simulation. L2 cache config files are located in `sample_configs/cycle_accurate/L2_configs` directory.
 70 | 
 71 | **Command line options to the `profile` command:**
 72 | 
 73 | Options:
 74 |   -l, --log TEXT                  This option expects the path to an execution
 75 |                                   log.  [required]
 76 |   -o, --output TEXT               Path to the output file.  [default: ./build]
 77 |   -c, --config TEXT               Path to the YAML configuration file.
 78 |                                   [required]
 79 |   -ca, --cycle_accurate_config TEXT
 80 |                                   Path to the YAML cycle accurate
 81 |                                   configuration file.
 82 |   -v, --verbose [info|error|debug]
 83 |                                   Set verbose level
 84 |   --help                          Show this message and exit.
 85 | 
 86 | **Example:**
 87 | 
 88 | To profile an application, run:
 89 | 
 90 | .. code-block:: shell
 91 | 
 92 |    riscv_application_profiler profile --log ./sample_artifacts/logs/hello.log --output ./build --config ./sample_config/config.yaml   
 93 | 
 94 | To profile an application with cycle accurate simulation, run:
 95 | 
 96 | .. code-block:: shell
 97 | 
 98 |    riscv_application_profiler profile --log ./sample_artifacts/logs/hello.log --output ./build --config ./sample_configs/profiler_config/L2_configs/config.yaml --cycle_accurate_config ./sample_configs/cycle_accurate/L2_configs/config.yaml 
 99 | 
100 | **Note**: The log file should be an execution log generated using `spike` as of today. Support for configuring log formats will be added in the future.
101 | 
102 | **Note**: Metrics such as grouping instructs by operation and privledge mode are hard coded in the profiler.py file as the input to these functions is the commit log. However, the metrics such as grouping instructs by CSRs and cache computation is mandatory for a cycle accurate profiling.
103 | 
104 | Features
105 | --------
106 | 
107 | The profiler supports the following list of features as plugins:
108 | 
109 | - Grouping instructions by:
110 |   - Type of operation performed.
111 |   - Privilege mode used for execution.
112 |   - Directions and Sizes (for jumps/branches).
113 | 
114 | - Lists:
115 |   - Presence of Nested Loops.
116 |   - Store-Load bypass.
117 |   - Presence of RAW dependencies.
118 |   - Pattern of repeated instructions.
119 | 
120 | - Histogram for:
121 |   - RegisterFile (XRF/FRF) usage.
122 |   - CSR accesses.
123 |   - D$/I$ Hits/Misses/Usage/Utilization.
124 |   - Unifed L2 Cache Hits/Misses/Usage/Utilization.
125 | 
126 | 


--------------------------------------------------------------------------------
/riscv_application_profiler/plugins/store_load_bypass.py:
--------------------------------------------------------------------------------
  1 | from riscv_isac.log import *
  2 | from riscv_application_profiler.consts import *
  3 | import riscv_application_profiler.consts as consts
  4 | from pprint import pprint
  5 | 
  6 | def store_load_bypass (master_inst_dict: dict, ops_dict: dict, extension_used: list, config, cycle_accurate_config):
  7 |     '''
  8 |     Computes the number of instances of store load bypass.
  9 |     
 10 |     Args:
 11 |         - master_inst_dict: A dictionary of InstructionEntry objects.
 12 |         - ops_dict: A dictionary containing the operations as keys and a list of
 13 |             InstructionEntry objects as values.
 14 |         - extension_used: A list of extensions used in the application.
 15 |         - config: A yaml with the configuration information.
 16 |         - cycle_accurate_config: A dyaml with the cycle accurate configuration information.
 17 |     
 18 |     Returns:
 19 |         - A dictionary with the addresses, counts, depth and bypass width as keys and their values as values.
 20 |         
 21 |     '''
 22 | 
 23 |     # Log the start of the process for computing store-load bypass.
 24 |     logger.info("Computing store load bypass.")
 25 | 
 26 |     # make a bypass dict
 27 |     bypass_dict = {}
 28 |     tracking = {}
 29 |     eff_addr = []
 30 |     ret_dict = {'Address': [], 'Counts': [], 'Depth': [], 'Bypass Width': []}
 31 | 
 32 |     # iterate through master inst list
 33 |     # if a store is encountered, make a set of bytes touched and look out for loads from these bytes else continue
 34 |     # upon encountering a load that touches these bytes, freeze the depth and reset counts/depths
 35 | 
 36 |     for entry in master_inst_dict:
 37 |         if entry in ops_dict['stores']: # this is a store
 38 |             # Determine the base address for the memory access.
 39 |             reg_name = 'x2' if 'sp' in entry.instr_name else f'x{entry.rs1[0]}'
 40 |             base = int(consts.reg_file[reg_name], 16)
 41 |             address = hex(base + entry.imm) if entry.imm is not None else hex(base)
 42 |             access_sz = 8 if 'd' in entry.instr_name \
 43 |                         else 4 if 'w' in entry.instr_name \
 44 |                         else 2 if 'h' in entry.instr_name \
 45 |                         else 1 if 'b' in entry.instr_name \
 46 |                         else None
 47 |             
 48 |             # sanity check
 49 |             if access_sz is None:
 50 |                 raise Exception(f'Invalid access size encountered: {entry.instr_name}')
 51 |             # make a set of all bytes touched by this store
 52 |             bytes_touched = {hex(int(address, 16) + i) for i in range(0, access_sz, 1)}
 53 |             for _entry in bytes_touched:
 54 |                 tracking[_entry] = {}
 55 |                 tracking[_entry]['depth'] = 0
 56 |                 tracking[_entry]['s_access_sz'] = access_sz
 57 |         
 58 |         # look for loads
 59 |         if entry in ops_dict['loads']:
 60 |             # Determine the base address for the memory access.
 61 |             reg_name = 'x2' if 'sp' in entry.instr_name else f'x{entry.rs1[0]}'
 62 |             base = int(consts.reg_file[reg_name], 16)
 63 |             address = hex(base + entry.imm) if entry.imm is not None else hex(base)
 64 |             eff_addr.append(address)
 65 |             access_sz = 8 if 'd' in entry.instr_name \
 66 |                         else 4 if 'w' in entry.instr_name \
 67 |                         else 2 if 'h' in entry.instr_name \
 68 |                         else 1 if 'b' in entry.instr_name \
 69 |                         else None
 70 |             if access_sz is None:
 71 |                 raise Exception(f'Invalid access size encountered: {entry.instr_name}')
 72 |             count = 0
 73 |             bytes_touched = {hex(int(address, 16) + i) for i in range(0, access_sz, 1)}
 74 |             for byte_entry in bytes_touched:
 75 |                 if byte_entry in tracking:
 76 |                     count += 1
 77 |             for _entry in bytes_touched:
 78 |                 if _entry in tracking:
 79 |                     if _entry in bypass_dict:
 80 |                         if bypass_dict[_entry]['depth'] == tracking[_entry]['depth']:
 81 |                             bypass_dict[_entry]['counts'] += 1
 82 |                         
 83 |                     else:
 84 |                         bypass_dict[_entry] = {'counts': 1, 'depth': tracking[_entry]['depth'], 'bypass_width': count}   
 85 |                     tracking.pop(_entry)
 86 |         
 87 |         if entry.instr_name not in ops_dict['loads']: # this is a regular instruction which causes a deeper bypass
 88 |             for _entry in tracking:
 89 |                 tracking[_entry]['depth'] += 1
 90 | 
 91 |         # Update register values based on commit information.
 92 |         if (entry.reg_commit is not None):
 93 |             if (entry.reg_commit[1] != '0'):
 94 |                 consts.reg_file[f'x{int(entry.reg_commit[1])}'] = entry.reg_commit[2]
 95 | 
 96 |     keys_to_remove = []
 97 | 
 98 |     # Iterate over the dictionary and identify keys to remove.
 99 |     for entry in bypass_dict:
100 |         if entry not in eff_addr:
101 |             keys_to_remove.append(entry)
102 | 
103 |     # Remove the identified keys from the dictionary.
104 |     for key in keys_to_remove:
105 |         bypass_dict.pop(key)
106 | 
107 |         
108 | 
109 | 
110 |     # Reset register values.
111 |     consts.reg_file = {f'x{i}': '0x00000000' for i in range(32)}
112 | 
113 |     # Populate the result dictionary with store-load bypass information.
114 |     for address in bypass_dict:
115 |         ret_dict['Address'].append(address)
116 |         ret_dict['Counts'].append(bypass_dict[address]['counts'])
117 |         ret_dict['Depth'].append(bypass_dict[address]['depth'])
118 |         ret_dict['Bypass Width'].append(bypass_dict[address]['bypass_width'])
119 | 
120 |     # Log the completion of the store-load bypass computation.
121 |     logger.info('Done.')
122 | 
123 |     # Return the resulting dictionary containing store-load bypass data.
124 |     return ret_dict
125 | 
126 | 
127 |                 


--------------------------------------------------------------------------------
/riscv_application_profiler/profiler.py:
--------------------------------------------------------------------------------
  1 | import importlib
  2 | from riscv_application_profiler.consts import *
  3 | import riscv_application_profiler.consts as consts
  4 | from riscv_isac.log import *
  5 | from riscv_isac.plugins.spike import *
  6 | from riscv_application_profiler.plugins import instr_groups
  7 | from riscv_application_profiler import verif
  8 | from riscv_application_profiler import plugins
  9 | import riscv_config.isa_validator as isaval
 10 | from riscv_application_profiler.utils import Utilities
 11 | import os
 12 | import yaml
 13 | 
 14 | # script_dir = os.path.dirname(os.path.abspath(__file__))
 15 | # config_path = os.path.join(script_dir, 'config.yaml')
 16 | # with open(consts.config_path, 'r') as config_file:
 17 | #     config = yaml.safe_load(config_file)
 18 | 
 19 | def print_stats(op_dict, counts):
 20 |     '''
 21 |     Prints the statistics of the grouped instructions.
 22 | 
 23 |     Args:
 24 |         - op_dict: A dictionary with the operations as keys and a list of InstructionEntry
 25 |             objects as values.
 26 |         - counts: A dictionary with the operations as keys and the number of instructions
 27 |             in each group as values.
 28 |     '''
 29 |     logger.info("Printing statistics.")
 30 |     for op in op_dict.keys():
 31 |         logger.info(f'{op}: {counts[op]}')
 32 |     logger.info("Done.")
 33 | 
 34 | def run(log, isa, output, verbose, config, cycle_accurate_config): #, check):
 35 |     from rvop_decoder.rvopcodesdecoder import disassembler
 36 |     spike_parser = spike()
 37 |     spike_parser.setup(trace=str(log), arch='rv64')
 38 |     iter_commitlog = spike_parser.__iter__()
 39 |     with open(log, 'r') as logfile:
 40 |         # Read the log file
 41 |         lines = logfile.readlines()
 42 |         cl_matches_list = [iter_commitlog.__next__() for i in range(len(lines))]
 43 |     isac_decoder = disassembler()
 44 |     isac_decoder.setup(arch='rv64')
 45 |     # master_inst_list = []
 46 |     master_inst_dict = {}
 47 |     for entry in cl_matches_list:
 48 |         if entry.instr is None:
 49 |             continue
 50 |         temp_entry = isac_decoder.decode(entry)
 51 |         # master_inst_list.append(temp_entry)
 52 |         master_inst_dict[temp_entry] = 1
 53 |     # master_inst_dict = {entry: 1 for entry in master_inst_list}
 54 |     logger.info(f'Parsed {len(master_inst_dict)} instructions.')
 55 |     logger.info("Decoding...")
 56 |     logger.info("Done decoding instructions.")
 57 |     logger.info("Starting to profile...")
 58 |     
 59 |     utils = Utilities(log, output)
 60 |     utils.metadata()
 61 | 
 62 |     # Grouping by operations
 63 |     groups = [
 64 |         'loads',
 65 |         'stores',
 66 |         'imm computes',
 67 |         'imm shifts',
 68 |         'reg computes',
 69 |         'reg shifts',
 70 |         'jumps',
 71 |         'branches',
 72 |         "compares",
 73 |         "conversions",
 74 |         "moves",
 75 |         "classifies",
 76 |         "csrs",
 77 |         "fence",
 78 |     ]
 79 | 
 80 |     (extension_list, err, err_list) = isaval.get_extension_list(isa)
 81 | 
 82 |     for e in err_list:
 83 |         logger.error(e)
 84 |     if err:
 85 |         raise SystemExit(1)
 86 |     
 87 |     isa_arg = isa.split('I')[0]
 88 | 
 89 |     ret_dict, extension_instruction_list, op_dict = instr_groups.group_by_operation(groups, isa_arg, extension_list, master_inst_dict, config, cycle_accurate_config)
 90 |     if (len(extension_instruction_list)<=len(master_inst_dict)):
 91 |         # left_out=[]
 92 |         # for i in master_inst_list:
 93 |         #     if i not in extension_instruction_list:
 94 |         #         left_out.append(i)
 95 |         #         print(i)
 96 |         logger.warning("Check the extension input.")
 97 | 
 98 |     curr_ops_dict = utils.compute_ops_dict(args_list=groups, isa_arg=isa_arg, ext_list=extension_list) 
 99 |     
100 |     if 'C' in extension_list:
101 |         logger.warning("riscv-isac does not decode immediate fields for compressed instructions. \
102 | Value based metrics on branch ops may be inaccurate.")
103 | 
104 |     
105 |     utils.tabulate_stats(ret_dict, header_name='Grouping instructions by Operation')
106 |     ret_dict = instr_groups.privilege_modes(log,config)
107 |     utils.tabulate_stats(ret_dict, header_name='Privilege Mode')
108 | 
109 |     if cycle_accurate_config != None:
110 | 
111 |         if 'cache' not in config['profiles']['cfg']['metrics'] or 'csr_compute' not in config['profiles']['cfg']['metrics']:
112 |             logger.error("Cache and CSR compute metrics are not enabled. Please enable them for cycle accurate profiling.")
113 |             raise SystemExit(1)
114 | 
115 |         for metric in config['profiles']['cfg']['metrics']:
116 |             # Finding the new plugin file mentioned in the yaml file
117 |             spec = importlib.util.spec_from_file_location("plugins", f"riscv_application_profiler/plugins/{metric}.py")
118 |             # Converting file to a module
119 |             metric_module = importlib.util.module_from_spec(spec)
120 |             # Importing the module
121 |             spec.loader.exec_module(metric_module)
122 |             
123 |             for funct in config['profiles']['cfg']['metrics'][metric]:
124 |                 funct_to_call = getattr(metric_module, funct)
125 |                 ret_dict1 = funct_to_call(master_inst_dict, ops_dict=op_dict, extension_used=extension_list, config= config, cycle_accurate_config=cycle_accurate_config)
126 |                 utils.tabulate_stats(ret_dict1, header_name=funct)
127 | 
128 |         # total_cycles = op_dict['total_cycles']
129 |         total_cycles = sum([master_inst_dict[entry] for entry in master_inst_dict]) + cycle_accurate_config['cycles']['reset_cycles']
130 |         ret_dict = {"Total Cycles": [total_cycles]}
131 |         utils.tabulate_stats(ret_dict, header_name='Total Cycles')
132 |         
133 |     else:
134 |         for metric in config['profiles']['cfg']['metrics']:
135 |             # Finding the new plugin file mentioned in the yaml file
136 |             spec = importlib.util.spec_from_file_location("plugins", f"riscv_application_profiler/plugins/{metric}.py")
137 |             # Converting file to a module
138 |             metric_module = importlib.util.module_from_spec(spec)
139 |             # Importing the module
140 |             spec.loader.exec_module(metric_module)
141 |             
142 |             for funct in config['profiles']['cfg']['metrics'][metric]:
143 |                 funct_to_call = getattr(metric_module, funct)
144 |                 ret_dict1 = funct_to_call(master_inst_dict, ops_dict=op_dict, extension_used=extension_list, config= config, cycle_accurate_config=cycle_accurate_config)
145 |                 utils.tabulate_stats(ret_dict1, header_name=funct)
146 | 


--------------------------------------------------------------------------------
/riscv_application_profiler/plugins/register_compute.py:
--------------------------------------------------------------------------------
  1 | from riscv_isac.log import *
  2 | from riscv_application_profiler.consts import *
  3 | import riscv_application_profiler.consts as consts
  4 | import statistics
  5 | 
  6 | def register_compute(master_inst_dict: dict, ops_dict: dict, extension_used: list, config, cycle_accurate_config):
  7 |     '''
  8 |     Computes the number of reads and writes to each register.
  9 |     Args:
 10 |         - master_inst_dict: A dictionary of InstructionEntry objects.
 11 |         - ops_dict: A dictionary containing the operations as keys and a list of InstructionEntry objects as values.
 12 |         - extension_used: A list of extensions used in the application.
 13 |         - config: A yaml with the configuration information.
 14 |         - cycle_accurate_config: A dyaml with the cycle accurate configuration information.
 15 | 
 16 | 
 17 |     Returns:
 18 |         - A dictionary with the registers as keys and a list of reads and writes as values.
 19 |     '''
 20 |     # Log the start of the process for computing register read and write counts.
 21 |     logger.info("Computing register read writes.")
 22 | 
 23 |     # Get a list of all registers in the register file.
 24 |     reg_list = list(consts.reg_file.keys())
 25 | 
 26 |     # Initialize a dictionary to track read and write counts for each register.
 27 |     regs = {i: {'write_count': 0, 'read_count': 0} for i in reg_list}
 28 | 
 29 |     # Initialize dictionaries to hold the resulting data.
 30 |     ret_dict = {'Register': [], 'Reads': [], 'Writes': []}
 31 | 
 32 |     # Iterate through the list of instructions in master_inst_dict.
 33 |     for entry in master_inst_dict:
 34 |         inst_name = str(entry.instr_name)
 35 |         if 'f' in inst_name:
 36 |             continue
 37 |         # Check if the instruction uses rs1 register.
 38 |         if entry.rs1 is not None:
 39 |             name = str(entry.rs1[1]) + str(entry.rs1[0])
 40 |             regs[name]['read_count'] += 1
 41 |         # Check if the instruction uses rs2 register.
 42 |         if entry.rs2 is not None:
 43 |             name = str(entry.rs2[1]) + str(entry.rs2[0])
 44 |             regs[name]['read_count'] += 1
 45 |         # Check if the instruction defines a destination register (rd).
 46 |         if entry.rd is not None:
 47 |             name = str(entry.rd[1]) + str(entry.rd[0])
 48 |             regs[name]['write_count'] += 1
 49 |             # if (entry.reg_commit is None):
 50 |             #     if 'fence' in entry.instr_name or 'j' in entry.instr_name:
 51 |             #         continue
 52 |             #     # print(entry)
 53 |             # else:
 54 |             #     if 'l' in entry.instr_name or 's' in entry.instr_name:
 55 |             #         continue
 56 |             #     print(entry)
 57 | 
 58 |     # Populate the result dictionary with register read and write counts.
 59 |     for reg in reg_list:
 60 |         ret_dict['Register'].append(reg)
 61 |         ret_dict['Reads'].append(regs[reg]['read_count'])
 62 |         ret_dict['Writes'].append(regs[reg]['write_count'])
 63 | 
 64 |     logger.info('Done.')
 65 | 
 66 |     # Return the resulting dictionary containing register read and write counts.
 67 |     return ret_dict
 68 | 
 69 | 
 70 | def fregister_compute(master_inst_dict: dict, ops_dict: dict, extension_used: list, config, cycle_accurate_config):
 71 |     '''
 72 |     Computes the number of reads and writes to each floating point register.
 73 |     Args:
 74 |         - master_inst_dict: A dictionary of InstructionEntry objects.
 75 |         - ops_dict: A dictionary containing the operations as keys and a list of InstructionEntry objects as values.
 76 |         - extension_used: A list of extensions used in the application.
 77 |         - config: A yaml with the configuration information.
 78 |         - cycle_accurate_config: A dyaml with the cycle accurate configuration information.
 79 | 
 80 | 
 81 |     Returns:
 82 |         - A dictionary with the registers as keys and a list of reads and writes as values.
 83 |         '''
 84 |     # Log the start of the process for computing F_register read and write counts.
 85 |     logger.info("Computing F_register read writes.")
 86 | 
 87 |     # Initialize an empty list to store F_register names and a dictionary to track counts.
 88 |     reg_list = []
 89 |     regs = {}
 90 | 
 91 |     # Initialize dictionaries to hold the resulting data.
 92 |     ret_dict = {'F_Register': [], 'Reads': [], 'Writes': []}
 93 | 
 94 |     # Check if 'F' and 'D' extensions are present, if not, return empty lists and dictionary.
 95 |     if 'F' not in extension_used or 'D' not in extension_used:
 96 |         return (ret_dict)
 97 | 
 98 |     # Log that the process of computing register read and write counts is starting.
 99 |     logger.info("Computing register read writes.")
100 | 
101 |     # Get a list of all F_registers in the F_register file.
102 |     reg_list = list(consts.freg_file.keys())
103 | 
104 |     # Initialize a dictionary to track read and write counts for each F_register.
105 |     regs = {i: {'write_count': 0, 'read_count': 0} for i in reg_list}
106 | 
107 |     # Initialize dictionaries to hold the resulting data.
108 |     ret_dict = {'F_Register': [], 'Reads': [], 'Writes': []}
109 | 
110 |     # Iterate through the list of instructions in master_inst_dict.
111 |     for entry in master_inst_dict:
112 |         inst_name = str(entry.instr_name)
113 |         # Check if the instruction involves F_registers.
114 |         if 'f' in inst_name:
115 |             # Check if the instruction uses rs1 F_register.
116 |             if entry.rs1 is not None and 'x' not in entry.rs1[1]:
117 |                 name = str(entry.rs1[1]) + str(entry.rs1[0])
118 |                 regs[name]['read_count'] += 1
119 |             # Check if the instruction uses rs2 F_register.
120 |             if entry.rs2 is not None and 'x' not in entry.rs2[1]:
121 |                 name = str(entry.rs2[1]) + str(entry.rs2[0])
122 |                 regs[name]['read_count'] += 1
123 |             # Check if the instruction defines a destination F_register (rd).
124 |             if entry.rd is not None and 'x' not in entry.rd[1]:
125 |                 name = str(entry.rd[1]) + str(entry.rd[0])
126 |                 regs[name]['write_count'] += 1
127 | 
128 |     # Populate the result dictionary with F_register read and write counts.
129 |     for reg in reg_list:
130 |         ret_dict['F_Register'].append(reg)
131 |         ret_dict['Reads'].append(regs[reg]['read_count'])
132 |         ret_dict['Writes'].append(regs[reg]['write_count'])
133 | 
134 |     # Log the completion of F_register read and write computation.
135 |     logger.info('Done.')
136 | 
137 |     # Return the resulting dictionary containing F_register read and write counts.
138 |     return ret_dict


--------------------------------------------------------------------------------
/riscv_application_profiler/plugins/jumps_ops.py:
--------------------------------------------------------------------------------
  1 | from riscv_isac.log import *
  2 | from riscv_application_profiler.consts import *
  3 | import riscv_application_profiler.consts as consts
  4 | 
  5 | def jumps_compute(master_inst_dict: dict ,ops_dict: dict, extension_used: list,config, cycle_accurate_config):
  6 |     '''
  7 |     Computes the number of jumps in the program.
  8 | 
  9 |     Args:
 10 |         - master_inst_dict: A dictionary of InstructionEntry objects.
 11 |         - ops_dict: A dictionary containing the operations as keys and a list of
 12 |         InstructionEntry objects as values.
 13 |         - extension_used: A list of extensions used in the application.
 14 |         - config: A yaml with the configuration information.
 15 |         - cycle_accurate_config: A dyaml with the cycle accurate configuration information.
 16 | 
 17 | 
 18 |     Returns:
 19 |         - A dictionary with the jumps as keys and the number of jumps which are forward and backward.
 20 |     '''
 21 |     # Log the start of the process for computing jumps.
 22 |     logger.info("Computing jumps.")
 23 | 
 24 |     # Initialize dictionaries to hold jump data and direction information.
 25 |     op_dict = {'forward': [], 'backward': []}
 26 |     direc_list = ['forward', 'backward']
 27 |     direc_dict = {'forward': {'count': 0}, 'backward': {'count': 0}}
 28 | 
 29 |     # Initialize a dictionary to hold the resulting direction and count data.
 30 |     ret_dict = {'Direction': direc_list, 'Count': []}
 31 | 
 32 |     # Iterate through each instruction in master_inst_dict.
 33 |     for entry in master_inst_dict:
 34 |         
 35 |         # Check if the instruction is a jump operation.
 36 |         if entry in ops_dict['jumps']:
 37 |             if str(entry.instr_name) == 'jalr':
 38 |                 rs1 = str(entry.rs1[1]) + str(entry.rs1[0])
 39 |                 rd = str(entry.rd[1]) + str(entry.rd[0])
 40 |                 jump_value = entry.imm + int(consts.reg_file[rs1], 16)
 41 |                 consts.reg_file[rd] = hex(int(entry.instr_addr) + 4)
 42 |             else:
 43 |                 jump_value = entry.imm
 44 |             
 45 |             # Handle the case where jump_value is None or negative.
 46 |             if jump_value is None:
 47 |                 if 'c.jr' in entry.instr_name or 'c.jalr' in entry.instr_name:
 48 |                     rs1 = str(entry.rs1[1]) + str(entry.rs1[0])
 49 |                     jump_value = int(entry.instr_addr) + int(consts.reg_file[rs1], 16)
 50 |                     if 'c.jalr' in entry.instr_name:
 51 |                         consts.reg_file['x1'] = hex(int(entry.instr_addr) + 2)
 52 |             if jump_value < 0:
 53 |                 op_dict['backward'].append(entry)
 54 |                 direc_dict['backward']['count'] += 1
 55 |             else:
 56 |                 op_dict['forward'].append(entry)
 57 |                 direc_dict['forward']['count'] += 1
 58 | 
 59 | 
 60 |         # Update register values based on commit information.
 61 |         if (entry.reg_commit is not None):
 62 |             name = str(entry.reg_commit[0]) + str(entry.reg_commit[1])
 63 |             if (name != 'x0'):
 64 |                 consts.reg_file[name] = entry.reg_commit[2]
 65 | 
 66 |     # Reset register values.
 67 |     consts.reg_file = {f'x{i}': '0x00000000' for i in range(32)}
 68 |     
 69 |     # Log the completion of jump computation.
 70 |     logger.info('Done.')
 71 | 
 72 |     # Populate the result dictionary with direction and count information.
 73 |     ret_dict['Count'].append(direc_dict['forward']['count'])
 74 |     ret_dict['Count'].append(direc_dict['backward']['count'])
 75 | 
 76 |     # Return the resulting dictionary containing jump direction and count data.
 77 |     return ret_dict
 78 | 
 79 | 
 80 | def jump_size(master_inst_dict: dict, ops_dict: dict, extension_used: list, config, cycle_accurate_config):
 81 |     '''
 82 |     Computes the number of jumps in the program.
 83 | 
 84 |     Args:
 85 |         - master_inst_dict: A dict of InstructionEntry objects.
 86 |         - ops_dict: A dictionary containing the operations as keys and a list of
 87 |         InstructionEntry objects as values.
 88 |         - extension_used: A list of extensions used in the application.
 89 |         - config: A yaml with the configuration information.
 90 |         - cycle_accurate_config: A dyaml with the cycle accurate configuration information.
 91 | 
 92 | 
 93 |     Returns:
 94 |         - A dictionary with the jumps as keys and the number of jumps and jump size.
 95 | 
 96 |     '''
 97 |     # Log the start of the process for computing jump size.
 98 |     logger.info("Computing jump size.")
 99 | 
100 |     # Initialize dictionaries and lists to hold jump instruction data.
101 |     jump_instr = {}     # Dictionary to store information about jump instructions.
102 |     target_address = [] # List to store target addresses for jumps.
103 |     ret_dict = {'Instruction name':[],'count':[],'size':[]} # Dictionary to store return data.
104 | 
105 |     # Iterate through each instruction in master_inst_dict.
106 |     for entry in master_inst_dict:
107 |         # Check if the instruction is a jump operation.
108 |         if entry in ops_dict['jumps']:
109 |             instr = ''  # Initialize instruction string.
110 |             size = 0    # Initialize size of the jump.
111 | 
112 |             # Calculate the target address for the jump.
113 |             if entry.imm is not None:
114 |                 if entry.instr_name == 'jalr':
115 |                     rs1 = f"{entry.rs1[1]}{entry.rs1[0]}"
116 |                     rd = f"{entry.rd[1]}{entry.rd[0]}"
117 |                     ta = int(consts.reg_file[rs1], 16) + int(entry.imm)
118 |                     instr = f"{entry.instr_name} {rd}, {entry.imm}({rs1})"
119 |                     consts.reg_file[rd] = hex(int(entry.instr_addr) + 4)
120 |                 else:
121 |                     jump_value = entry.imm
122 |                     ta = int(entry.instr_addr) + int(jump_value)
123 |                     if entry.instr_name == 'c.jal':
124 |                         instr = f"{entry.instr_name} {entry.imm}"
125 |                         consts.reg_file['x1'] = hex(int(entry.instr_addr) + 2)
126 |                     elif entry.instr_name == 'c.j':
127 |                         instr = f"{entry.instr_name} {entry.imm}"
128 |                     elif entry.instr_name == 'jal':
129 |                         rd = f"{entry.reg_commit[1]}{entry.reg_commit[0]}"
130 |                         instr = f"{entry.instr_name} {rd}, {entry.imm}"
131 |                         consts.reg_file['x1'] = hex(int(entry.instr_addr) + 4)
132 |             elif entry.instr_name in {'c.jr', 'c.jalr'}:
133 |                 rs1 = f"{entry.rs1[1]}{entry.rs1[0]}"
134 |                 ta = int(consts.reg_file[rs1], 16)
135 |                 if 'c.jalr' in entry.instr_name:
136 |                     consts.reg_file['x1'] = hex(int(entry.instr_addr) + 2)
137 |                 instr = f"{entry.instr_name} {rs1}"
138 |             else:
139 |                 logger.debug(f"Immediate value not found for: {entry}")
140 | 
141 |             # Calculate the size of the jump instruction.
142 |             size = abs(int(entry.instr_addr) - ta)
143 | 
144 |             # Update jump_instr dictionary with jump information.
145 |             if instr not in jump_instr or (hex(ta) not in target_address and str(size) not in jump_instr[instr]['size(bytes)']):
146 |                 jump_instr[instr] = {'count': 1, 'size(bytes)': str(size)}
147 |                 target_address.append(hex(ta))
148 |             else:
149 |                 jump_instr[instr]['count'] += 1
150 | 
151 |         # Update register values based on commit information.
152 |         if entry.reg_commit is not None and entry.rd is not None:
153 |             name = f"{entry.rd[1]}{entry.rd[0]}"
154 |             if name != 'x0':
155 |                 consts.reg_file[name] = entry.reg_commit[2]
156 | 
157 |     # Reset register values.
158 |     consts.reg_file = {f'x{i}': '0x00000000' for i in range(32)}
159 |     # Populate the return dictionary with jump instruction data.   
160 |     ret_dict['Instruction name'] = list(jump_instr.keys())
161 |     ret_dict['count'] = [jump_instr[key]['count'] for key in jump_instr.keys()]
162 |     ret_dict['size'] = [jump_instr[key]['size(bytes)'] for key in jump_instr.keys()]
163 | 
164 |     # Log the completion of jump size computation.
165 |     logger.info('Done.')
166 | 
167 |     # Return the dictionary.
168 |     return ret_dict
169 | 
170 | 
171 | 
172 |                 


--------------------------------------------------------------------------------
/riscv_application_profiler/plugins/instr_groups.py:
--------------------------------------------------------------------------------
  1 | # See LICENSE for licensing information.
  2 | 
  3 | # this file is a plugin for riscv_application_profiler
  4 | # this file classifies instructions into groups based on the conditions defined by the user.
  5 | 
  6 | from riscv_isac.log import *
  7 | from riscv_application_profiler.consts import *
  8 | import re
  9 | from riscv_application_profiler import consts
 10 | 
 11 | def group_by_operation(operations: list, isa, extension_list, master_inst_dict: dict, config, cycle_accurate_config):
 12 |     
 13 | 
 14 |     '''
 15 |     Groups instructions based on the operation.
 16 | 
 17 |     Args:
 18 |         - operations: A list of operations to group by.
 19 |         - master_inst_dict: A dictionary of InstructionEntry objects.
 20 |         - isa: The ISA used in the application.
 21 |         - extension_list: A list of extensions used in the application.
 22 |         - config: A yaml with the configuration information.
 23 |         - cycle_accurate_config: A dyaml with the cycle accurate configuration information.
 24 | 
 25 |     Returns:
 26 |         - dictionaries containing grouped instructions and counts.
 27 | 
 28 |     '''
 29 |     # Log the start of the process for grouping instructions by operation.
 30 |     logger.info("Grouping instructions by operation.")
 31 | 
 32 |     # Create a dictionary to hold instructions grouped by operation.
 33 |     op_dict = {f'{op}': {} for op in operations}
 34 | 
 35 |     # Create a dictionary to keep track of instruction counts per operation.
 36 |     ops_count = {f'{op}': {'counts': 0} for op in operations}
 37 | 
 38 |     # Create a dictionary to hold the resulting counts and operation names.
 39 |     ret_dict = {'Operation': [f'{op}' for op in operations], 'Counts': []}
 40 | 
 41 |     # Initialize a list to store extension-related instructions.
 42 |     extension_instruction_list = []
 43 | 
 44 |     prev_instr_name = None
 45 |     prev_instr_addr = None
 46 |     # Iterate through the list of instructions in master_inst_dict.
 47 |     for entry in master_inst_dict:
 48 |         for extension in extension_list:
 49 |             for op in operations:
 50 |                 try:
 51 |                         # Check if the current instruction belongs to the specified operation.
 52 |                     if entry.instr_name in ops_dict[isa][extension][op]:
 53 |                         # Append the instruction to the corresponding operation group.
 54 |                         if cycle_accurate_config != None:
 55 |                             matched = False
 56 |                             for inst in cycle_accurate_config['cycles']['instructions_cycles']:
 57 |                                 if re.match(inst, entry.instr_name) != None:
 58 |                                     # assigning latency to instructions
 59 |                                     op_dict[op][entry] = cycle_accurate_config['cycles']['instructions_cycles'][inst]['latency']
 60 |                                     master_inst_dict[entry] = cycle_accurate_config['cycles']['instructions_cycles'][inst]['latency']
 61 |                             
 62 |                                     if prev_instr_addr != entry.instr_addr and prev_instr_name == entry.instr_name:
 63 |                                         # checking if curent instr is equal to prev instr in case it can be parallelised
 64 |                                         if (op_dict[op][prev_instr] - cycle_accurate_config['cycles']['instructions_cycles'][inst]['throughput'] > 0):
 65 |                                             op_dict[op][entry] -= op_dict[op][prev_instr] - cycle_accurate_config['cycles']['instructions_cycles'][inst]['throughput']
 66 |                                             master_inst_dict[entry] -= master_inst_dict[prev_instr] - cycle_accurate_config['cycles']['instructions_cycles'][inst]['throughput']
 67 | 
 68 |                                     #DEBUG
 69 |                                     # if 'rem' in prev_instr_name or 'div' in prev_instr_name:
 70 |                                     #     op_dict[op][entry] += 1
 71 |                                     #     master_inst_dict[entry] += 1
 72 | 
 73 |                                     prev_instr = entry
 74 |                                     prev_instr_name = entry.instr_name
 75 |                                     prev_instr_addr = entry.instr_addr
 76 | 
 77 |                                     matched = True
 78 |                                     break
 79 |                             if matched == False:
 80 |                                 op_dict[op][entry] = 1
 81 |                                 master_inst_dict[entry] = 1
 82 | 
 83 |                                 #DEBUG
 84 |                                     # if 'rem' in prev_instr_name or 'div' in prev_instr_name:
 85 |                                     #     op_dict[op][entry] += 1
 86 |                                     #     master_inst_dict[entry] += 1
 87 | 
 88 |                                 prev_instr = entry
 89 |                                 prev_instr_name = entry.instr_name
 90 |                                 prev_instr_addr = entry.instr_addr
 91 |                                 
 92 |                                     
 93 |                         else:
 94 |                             op_dict[op][entry]=1
 95 |                         
 96 |                         # Increment the instruction count for the operation.
 97 |                         ops_count[op]['counts'] += 1
 98 |                         
 99 |                         # Append the instruction to the extension instruction list.
100 |                         extension_instruction_list.append(entry)
101 |                 except KeyError as e:
102 |                     # Handle the case where the extension is not supported.
103 |                     logger.error(f'Extension {e} not supported.')
104 |                     exit(1)
105 | 
106 |     # Populate the 'Counts' field in the ret_dict with the instruction counts per operation.
107 |     ret_dict['Counts'] = [len(op_dict[op]) for op in operations]
108 |     # Log the completion of the computation.
109 |     logger.info("Done")
110 | 
111 |     # Return the resulting dictionaries containing grouped instructions and counts.
112 |     return (ret_dict,extension_instruction_list,op_dict)
113 | 
114 | 
115 | def privilege_modes(log,config):
116 |     '''
117 |     Computes the privilege modes.
118 |     
119 |     Args:
120 |         - log: The path to the log file.
121 |         
122 |     Returns:
123 |         - A list of privilege modes.
124 |         - A dictionary with the privilege modes as keys and the number of instructions in each group as values.
125 |     '''
126 |     # Log the start of the process for computing privilege modes.
127 |     logger.info("Computing privilege modes.")
128 |     privilege_mode_regex = config['profiles']['cfg']['privilege_mode_regex']
129 | 
130 |     # List of privilege modes to track: user, supervised, and machine.
131 |     mode_list = ['user', 'supervised', 'machine']
132 | 
133 |     # Initialize a dictionary to track the counts of privilege modes.
134 |     mode_dict = {'user': {'count': 0}, 'supervised': {'count': 0}, 'machine': {'count': 0}}
135 | 
136 |     # Initialize a dictionary to hold the resulting counts and privilege mode names.
137 |     ret_dict = {'Privilege Mode': mode_list, 'Counts': []}
138 | 
139 |     # Open the specified log file for reading.
140 |     with open(log, 'r') as log_file:
141 |         # Iterate through each line in the log file.
142 |         for line in log_file:
143 |             # Attempt to match the line against the privilege mode regex pattern.
144 |             match = re.match(privilege_mode_regex, line)
145 |             if match is not None:
146 |                 # Extract the privilege mode value from the regex match.
147 |                 x = int(match.group(1))
148 |                 if x is not None:
149 |                     # Update the counts for each privilege mode based on the extracted value.
150 |                     if x == 0:
151 |                         mode_dict['user']['count'] += 1
152 |                     elif x == 1:
153 |                         mode_dict['supervised']['count'] += 1
154 |                     elif x == 3:
155 |                         mode_dict['machine']['count'] += 1
156 | 
157 |     # Populate the 'Counts' field in the ret_dict with the privilege mode counts.
158 |     ret_dict['Counts'] = [mode_dict[mode]['count'] for mode in mode_list]
159 | 
160 |     # Log the completion of the privilege mode computation.
161 |     logger.info('Done.')
162 | 
163 |     # Return the resulting dictionary containing privilege mode counts.
164 |     return ret_dict


--------------------------------------------------------------------------------
/riscv_application_profiler/plugins/branch_ops.py:
--------------------------------------------------------------------------------
  1 | # See LICENSE for licensing information.
  2 | 
  3 | # this file is a plugin for riscv_application_profiler
  4 | # this file classifies instructions into groups based on +ve/-ve branch offsets.
  5 | # this file classifies instructions into 'long' and 'short' branches based on branch offsets.
  6 | 
  7 | from riscv_isac.log import *
  8 | from riscv_application_profiler.consts import *
  9 | import riscv_application_profiler.consts as consts
 10 | import statistics
 11 | import pprint as pp
 12 | 
 13 | def compute_threshold(master_inst_dict: dict, ops_dict: dict) -> int:
 14 |     '''
 15 |     compute the mean plus two standard deviations as the threshold
 16 |     
 17 |     Args:
 18 |         - master_inst_dict: A dictionary of InstructionEntry objects.
 19 |         - ops_dict: A dictionary containing the operations as keys and a list of InstructionEntry objects as values.
 20 |     '''
 21 | 
 22 |     # compute the list of branch offsets from the master_inst_dict where each entry has an imm field
 23 |     branch_offsets = [entry.imm for entry in ops_dict['branches'] if entry.imm is not None]
 24 | 
 25 |     # compute the mean and standard deviation of the branch offsets
 26 |     if len(branch_offsets) == 0:
 27 |         return 0
 28 |     mean = statistics.mean(branch_offsets)
 29 |     std_dev = statistics.stdev(branch_offsets)
 30 | 
 31 |     # compute the threshold as the mean plus two standard deviations
 32 |     threshold = mean + 2*std_dev
 33 | 
 34 |     return int(threshold)
 35 | 
 36 | def group_by_branch_offset(master_inst_dict: dict, ops_dict: dict, extension_used: list, config, cycle_accurate_config):
 37 |     '''
 38 |     Groups instructions based on the branch offset.
 39 | 
 40 |     Args:
 41 |         - master_inst_dict: A dictionary of InstructionEntry objects.
 42 |         - branch_threshold: The threshold for a branch to be considered 'long'.
 43 |         - ops_dict: A dictionary containing the operations as keys and a list of InstructionEntry objects as values.
 44 |         - extension_used: A list of extensions used in the application.
 45 |         - config: A yaml with the configuration information.
 46 |         - cycle_accurate_config: A dyaml with the cycle accurate configuration information.
 47 | 
 48 |     Returns:
 49 |         - A dictionary with the branch offset sizes and count as keys and values respectively. 
 50 |     '''
 51 |     # Logging the grouping process
 52 |     logger.info("Grouping instructions by branch offset.")
 53 | 
 54 |     branch_threshold = compute_threshold(master_inst_dict, ops_dict)
 55 | 
 56 |     # Initializing dictionaries and lists
 57 |     size_list = ['long', 'short']
 58 |     size_dict = {size: {'count': 0} for size in size_list}
 59 |     ret_dict = {'Offset Size': size_list, 'Count': []}
 60 | 
 61 |     # loop though the branch instructions
 62 |     for entry in ops_dict['branches']:
 63 |         if entry.imm is None:
 64 |             continue
 65 |         # Determine whether the branch is long or short based on the threshold
 66 |         size = 'short' if entry.imm < branch_threshold else 'long'
 67 |         size_dict[size]['count'] += 1
 68 | 
 69 |     # Logging completion of the grouping process
 70 |     logger.info('Done.')
 71 | 
 72 |     # Appending the counts to the result dictionary
 73 |     ret_dict['Count'].append(size_dict['long']['count'])
 74 |     ret_dict['Count'].append(size_dict['short']['count'])
 75 | 
 76 |     # Return the final results
 77 |     return ret_dict
 78 | 
 79 | 
 80 | def group_by_branch_sign(master_inst_dict: dict, ops_dict: dict, extension_used: list, config, cycle_accurate_config):
 81 |     '''
 82 |     Groups instructions based on the sign bit of the branch offset.
 83 |     
 84 |     Args:
 85 |         - master_inst_dict: A dictionary of InstructionEntry objects.
 86 |         - ops_dict: A dictionary with the operations as keys and a list of InstructionEntry.
 87 |         - extension_used: A list of extensions used in the application.
 88 |         - config: A yaml with the configuration information.
 89 |         - cycle_accurate_config: A dyaml with the cycle accurate configuration information.
 90 | 
 91 |     
 92 |     Returns:
 93 |         -A list of directions, which in this case are 'positive' and 'negative'.
 94 |         A dictionary direc_dict containing the counts of instructions in each direction. 
 95 |         The keys are 'positive' and 'negative', and the values are dictionaries containing the 
 96 |         'count' of instructions with positive and negative branch offsets.
 97 | 
 98 |     '''
 99 |     # Logging the grouping process
100 |     logger.info("Grouping instructions by branch offset sign.")
101 | 
102 |     # Initializing dictionaries and lists
103 |     direc_list = ['positive', 'negative']
104 |     direc_dict = {direc: {'count': 0} for direc in direc_list}
105 |     ret_dict = {'Direction': direc_list, 'Count': []}
106 | 
107 |     # Loop through branch instructions 
108 |     for entry in ops_dict['branches']:
109 |         if entry.imm is None:
110 |             continue
111 |         # Determine whether the branch offset is positive or negative
112 |         direction = 'positive' if entry.imm >= 0 else 'negative'
113 |         direc_dict[direction]['count'] += 1
114 | 
115 |     # Logging completion of the grouping process
116 |     logger.info('Done.')
117 | 
118 |     # Appending the counts to the result dictionary
119 |     ret_dict['Count'].append(direc_dict['positive']['count'])
120 |     ret_dict['Count'].append(direc_dict['negative']['count'])
121 | 
122 |     # Return the final results
123 |     return ret_dict
124 | 
125 | 
126 | 
127 | def loop_compute (master_inst_dict: dict, ops_dict: dict, extension_used: list, config, cycle_accurate_config):
128 |     '''
129 |     Groups instructions based on the branch offset.
130 |     
131 |     Args:
132 |         - master_inst_dict: A dictionary of InstructionEntry objects.
133 |         - ops_dict: A dictionary with the operations as keys and a list of InstructionEntry.
134 |         - extension_used: A list of extensions used in the application.
135 |         - config: A yaml with the configuration information.
136 |         - cycle_accurate_config: A dyaml with the cycle accurate configuration information.
137 |         
138 |     Returns:
139 |         - A dictionary loop_instr containing the counts of instructions in each loop.
140 |         The keys are the branch instructions, and the values are dictionaries containing the
141 |         'target address', 'depth', 'count' and 'size' of the loop.
142 |             '''
143 |     # Logging the loop computation process
144 |     logger.info("Computing loops.")
145 | 
146 |     # Initializing dictionaries, lists, and result dictionary
147 |     loop_instr = {}
148 |     target_address = {}
149 |     loop_list = []
150 |     ret_dict = {'Branch Instruction': loop_list, 'Depth': [], 'Count': [], 'Size(bytes)': []}
151 | 
152 |     # Loop through branch instructions
153 |     for entry in ops_dict['branches']:
154 |         if entry.imm is None:
155 |             continue
156 |         # Determine the instruction and its target address
157 |         if entry.rs2 is not None:
158 |             instr = f"{entry.instr_name} {entry.rs1[1]}{entry.rs1[0]},{entry.rs2[1]}{entry.rs2[0]}"
159 |         else:
160 |             instr = f"{entry.instr_name} {entry.rs1[1]}{entry.rs1[0]}"
161 |         ta = int(entry.instr_addr) + int(entry.imm)
162 |         
163 |         # Update loop information in the dictionaries
164 |         if instr not in loop_instr or hex(ta) not in target_address.get(instr, []):
165 |             loop_instr[instr] = {'depth': 1, 'count': 1, 'size(bytes)': abs(int(entry.instr_addr) - ta)}
166 |             target_address.setdefault(instr, []).append(hex(ta))
167 |         else:
168 |             loop_instr[instr]['count'] = loop_instr[instr]['count'] + 1
169 | 
170 |     # Calculate the number of loops
171 |     number_of_loops = len(loop_instr)
172 | 
173 |     # Initialize loop_list based on conditions
174 |     loop_list = list(loop_instr.keys())
175 |     for i in range(number_of_loops - 1):
176 |         if loop_list[i + 1] < loop_list[i]:
177 |             loop_instr[loop_list[i + 1]]['depth'] = loop_instr[loop_list[i]]['depth'] + 1
178 | 
179 |     # Populate the ret_dict with loop information
180 |     for i in range(number_of_loops):
181 |         ret_dict['Branch Instruction'].append(loop_list[i])
182 |         ret_dict['Depth'].append(loop_instr[loop_list[i]]['depth'])
183 |         ret_dict['Count'].append(loop_instr[loop_list[i]]['count'])
184 |         ret_dict['Size(bytes)'].append(loop_instr[loop_list[i]]['size(bytes)'])
185 | 
186 |     # Logging completion of the loop computation process
187 |     logger.info('Done.')
188 | 
189 |     # Return the final results
190 |     return ret_dict
191 | 
192 |     


--------------------------------------------------------------------------------
/docs/plugins.rst:
--------------------------------------------------------------------------------
  1 | Plugins Functions
  2 | ===========================
  3 | 
  4 | Instruction Groups
  5 | ------------
  6 | 
  7 | By categorizing instructions based on their type,
  8 | developers can identify which types of instructions are
  9 | executed most frequently. This information can help
 10 | identify performance bottlenecks in the code and guide
 11 | optimization efforts
 12 | 
 13 | By analyzing the instruction mix, developers can identify
 14 | areas of the code where optimizations can be made. For
 15 | example, if load instructions are executed more
 16 | frequently than store instructions, it may be possible to
 17 | optimize the code by reducing the number of load
 18 | instructions or by using more efficient data structures
 19 | 
 20 | By comparing instruction mixes from different runs of the
 21 | same code, developers can track changes in performance
 22 | over time. This can help identify performance regressions
 23 | and ensure that optimizations are having the desired
 24 | effect
 25 | 
 26 | In some architectures, such as RISC-V, profiling hardware
 27 | events can provide insight into the code execution
 28 | behavior on various micro-architectural units. By
 29 | categorizing instructions based on their type, developers
 30 | can gain insights into how different types of instructions
 31 | affect hardware performance
 32 | 
 33 | 
 34 | Privilege Modes
 35 | ------------
 36 | 
 37 | By categorizing instructions based on their privilege
 38 | mode, developers can identify which privilege modes are
 39 | executed most frequently and which ones take the
 40 | longest time to execute. This information can help
 41 | identify performance bottlenecks in the code and guide
 42 | optimization efforts.
 43 | 
 44 | By analyzing the instruction mix based on privilege
 45 | modes, developers can identify areas of the code where
 46 | optimizations can be made. For example, if a large
 47 | number of instructions are executed in machine mode, it
 48 | may be possible to optimize the code by reducing the
 49 | number of machine mode instructions or by using more
 50 | efficient algorithms
 51 | 
 52 | In RISC-V, profiling hardware events can provide insight
 53 | into the code execution behavior on various microarchitectural units. By categorizing instructions based on
 54 | their privilege mode, developers can gain insights into
 55 | how different privilege modes affect hardware
 56 | performance
 57 | 
 58 | By providing a clear separation between privileged and
 59 | non-privileged instructions, developers can identify and
 60 | fix issues more quickly and easily. This can help in
 61 | debugging and diagnosing problems with the operating
 62 | system and applications
 63 | 
 64 | 
 65 | Grouping Branches by Offset Size
 66 | ------------
 67 | 
 68 | The "Grouping Branches by Offset Size" serves as a
 69 | valuable profiling tool for understanding the behavior of
 70 | branch instructions within a program. This metric
 71 | essentially categorizes branches into different groups
 72 | based on the size of their offset, which is the numerical
 73 | distance between the branch instruction and its target
 74 | destination.
 75 | 
 76 | For instance, a scenario where a program exhibits a high
 77 | frequency of branches with small offset sizes could imply
 78 | that the program frequently jumps to nearby instructions.
 79 | This pattern might lead to increased pipeline stalls and
 80 | reduced overall execution efficiency, as the processor has
 81 | to frequently change its execution path. Conversely, when
 82 | a program has a notable number of branches with large
 83 | offset sizes, it suggests that the program is frequently
 84 | making longer jumps to more distant instructions. This
 85 | behavior can also negatively influence performance due
 86 | to the potential disruption of the processor's instruction
 87 | fetching and execution pipelines.
 88 | 
 89 | Analyzing the "Grouping Branches by Offset Size" metric
 90 | offers developers a window into areas of the code that
 91 | might benefit from optimization. For example, if a
 92 | substantial number of small offset branches are detected,
 93 | it could indicate opportunities to consolidate code
 94 | segments or use techniques like loop unrolling to reduce
 95 | the frequency of branching.
 96 | 
 97 | Similarly, addressing excessive large offset branches
 98 | might prompt developers to reorganize the code to
 99 | minimize the need for distant jumps, thus enhancing
100 | execution speed
101 | 
102 | 
103 | Grouping Branches by Direction
104 | ------------
105 | 
106 | By grouping branches based on their direction,
107 | developers can identify which types of branches are
108 | executed most frequently and which ones take the
109 | longest time to execute. This information can help
110 | identify performance bottlenecks in the code and guide
111 | optimization efforts
112 | 
113 | By analyzing the branch mix based on sign, developers
114 | can identify areas of the code where optimizations can be
115 | made. For example, if a large number of branches are
116 | taken when the sign is negative, it may be possible to
117 | optimize the code by reducing the number of negative
118 | branches or by using more efficient algorithms
119 | 
120 | In RISC-V, profiling hardware events can provide insight
121 | into the code execution behavior on various microarchitectural units. By grouping branches based on their
122 | sign, developers can gain insights into how different
123 | types of branches affect hardware performance
124 | 
125 | By providing a clear separation between branches based
126 | on their sign, developers can identify and fix issues more
127 | quickly and easily. This can help in debugging and
128 | diagnosing problems with the operating system and
129 | applications
130 | 
131 | 
132 | Nested Loops
133 | ------------
134 | 
135 | The "Nested Loop Computation" metric provides insights
136 | into the performance characteristics of nested loops
137 | within a program. Nested loops are a common
138 | programming construct where one loop is contained
139 | within another. These loops can significantly impact
140 | program performance, and analyzing the "Nested Loop
141 | Computation" metric helps developers understand and
142 | optimize these loop structures.
143 | 
144 | Nested loops can lead to repeated execution of the inner
145 | loop code, potentially causing a significant computational
146 | load. By measuring the "Nested Loop Computation"
147 | metric, developers can identify which loops are nested
148 | and gain insights into how many times the inner loop is
149 | executed. This information highlights potential
150 | performance bottlenecks arising from inefficient loop
151 | structures.
152 | 
153 | Resource Utilization: Nested loops can strain the
154 | resources of the processor, memory hierarchy, and
155 | caches due to frequent memory accesses and
156 | computational demands. Profiling the "Nested Loop
157 | Computation" metric can help in assessing how
158 | effectively these resources are utilized and whether
159 | improvements in memory access patterns or cache usage
160 | are needed.
161 | 
162 | Optimization Opportunities: Analyzing the "Nested Loop
163 | Computation" metric can reveal optimization
164 | opportunities. Developers can explore strategies like loop
165 | fusion (combining nested loops with similar iteration
166 | counts), loop unrolling (reducing loop overhead by
167 | processing multiple loop iterations at once), and
168 | optimizing data access patterns within the nested loops.
169 | These optimizations can lead to reduced execution time
170 | and improved program efficiency.
171 | 
172 | Parallelism Potential: Depending on the independence of
173 | computations within nested loops, developers might
174 | identify opportunities for parallel execution using
175 | techniques like multithreading or SIMD (Single
176 | Instruction, Multiple Data) vectorization. Profiling the
177 | nested loop metric helps in determining whether such
178 | parallelism can be effectively exploited.
179 | 
180 | Algorithmic Analysis: Sometimes, the presence of deeply
181 | nested loops can indicate inefficient algorithmic choices.
182 | By analyzing the "Nested Loop Computation" metric,
183 | developers can assess whether alternative algorithms or
184 | algorithmic improvements could lead to better overall
185 | performance
186 | 
187 | 
188 | Grouping Jumps by Direction
189 | ------------
190 | 
191 | The "Jumps Direction" metric provides valuable insights
192 | into the distribution and behavior of jump instructions
193 | within a program based on their directions, i.e., whether
194 | the jumps are forward or backward in terms of memory
195 | addresses. This metric focuses specifically on
196 | understanding the control flow patterns and potential
197 | performance implications associated with the jump
198 | instructions.
199 | 
200 | Control Flow Analysis: By categorizing jump instructions
201 | into forward and backward jumps, developers can
202 | understand the structure and complexity of a program's
203 | control flow. Forward jumps typically indicate regular
204 | program execution, while backward jumps might indicate
205 | loop structures or other instances where the program is
206 | revisiting previous instructions.
207 | 
208 | Loop Identification: Backward jumps often correspond to
209 | loop constructs in the code. Analyzing the distribution
210 | and frequency of these backward jumps can help
211 | developers identify loops and understand their
212 | characteristics. This is crucial for optimizing loops, as
213 | they often represent hotspots where performance
214 | improvements can have a significant impact on overall
215 | execution time.
216 | 
217 | Code Layout Optimization: Understanding the
218 | distribution of forward and backward jumps can provide
219 | insights into the placement of code in memory.
220 | Minimizing the number of backward jumps or strategically
221 | arranging instructions can help reduce branch
222 | mispredictions and improve the efficiency of instruction
223 | fetching and execution.
224 | 
225 | Optimization Opportunities: By studying the jump
226 | directions, developers can identify opportunities to
227 | optimize code. For instance, loops with high-frequency
228 | backward jumps might be candidates for loop unrolling or
229 | other loop optimization techniques to reduce branch
230 | overhead and improve instruction-level parallelism.     
231 | 
232 | 
233 | Grouping Jumps by Jump size
234 | ------------
235 | The "Jumps Size" metric provides insights into the
236 | distances that the program's jump instructions cover
237 | when transitioning from one part of the code to another.
238 | This metric focuses specifically on the size of the jumps,
239 | which refers to the numerical difference between the
240 | source and target addresses of jump instructions, often
241 | measured in terms of instructions or bytes.
242 | 
243 | Branching Behavior: Different jump sizes can indicate
244 | various types of branching behavior. Small jump sizes
245 | may suggest tight loops or frequently executed code
246 | segments, while large jump sizes might indicate less
247 | frequent transitions between more distant parts of the
248 | program. This information is crucial for optimizing branch
249 | prediction mechanisms and mitigating the effects of
250 | mispredicted branches.
251 | 
252 | Performance Bottlenecks: Unusually large jump sizes may
253 | highlight potential performance bottlenecks. These could
254 | be caused by jumps to distant code regions that might
255 | result in cache misses, pipeline stalls, or other
256 | inefficiencies. Identifying such bottlenecks can guide
257 | developers in reorganizing code or applying optimization
258 | techniques to minimize the impact of these large jumps.
259 | Function Call Patterns: The "Jumps Size" metric can
260 | provide insights into function call patterns.
261 | 
262 | Frequent small jumps could indicate the presence of
263 | short and frequently called functions, while occasional
264 | large jumps may point to functions with longer code
265 | bodies. Optimizing the layout of frequently used
266 | functions can lead to better cache utilization and
267 | reduced instruction fetch latencies.
268 | 
269 | Profiling for Optimization: Analyzing the "Jumps Size"
270 | metric can help developers identify opportunities for
271 | code optimization. For instance, if a certain range of jump
272 | sizes is observed frequently, it might be worth
273 | investigating if those transitions can be made more
274 | efficient by reordering code, introducing inline functions,
275 | or applying loop transformations.
276 | 
277 | 
278 | Register Usage
279 | ------------
280 | The "Analysis of Registers" metric pertains to the
281 | examination of register usage within a program. In RISC-V
282 | architecture, registers are small storage units within the
283 | CPU used to hold temporary data and operands during
284 | program execution. Analyzing register usage can provide
285 | valuable insights into how a program utilizes registers and
286 | can help developers identify potential areas for
287 | optimization and performance improvement.
288 | 
289 | Identifying Hotspots: Registers that are frequently read
290 | from or written to can indicate hotspots in the code.
291 | Hotspots are sections of code that are executed
292 | frequently and have a significant impact on overall
293 | performance. By focusing optimization efforts on these
294 | hotspots, developers can achieve substantial
295 | performance gains.
296 | 
297 | Resource Balancing: Profiling register reads and writes
298 | can aid in resource balancing within the processor.
299 | Modern processors have limited resources, and
300 | understanding how registers are utilized can help balance
301 | other resources like execution units, cache utilization,
302 | and memory bandwidth.
303 | 
304 | Compiler Optimization: Profiling register usage provides
305 | valuable information to compilers for making
306 | optimization decisions. Compilers can use this
307 | information to perform register allocation, instruction
308 | scheduling, and other transformations to improve code
309 | efficiency
310 | 
311 | 
312 | Read After Write
313 | ------------
314 | The RAW metric helps in profiling by identifying
315 | situations where a read operation follows a write
316 | operation to the same location. This indicates a potential
317 | data dependency, where the result of a write operation is
318 | needed for a subsequent read operation.
319 | 
320 | Dependency Analysis: By tracking RAW dependencies,
321 | developers can identify instructions that are
322 | interdependent due to their order of execution. These
323 | dependencies can impact the order in which instructions
324 | can be executed in parallel, potentially leading to stalls
325 | and inefficiencies in the pipeline.
326 | 
327 | Pipeline Stalls: When a read operation follows a write
328 | operation to the same location, the processor needs to
329 | ensure that the write operation is completed before the
330 | read operation can proceed. This can introduce pipeline
331 | stalls, where the processor has to wait for the write data
332 | to be available before it can continue executing
333 | subsequent instructions. Identifying and minimizing such
334 | stalls can significantly improve pipeline efficiency.
335 | 
336 | Out-of-Order Execution: Modern processors often
337 | employ techniques like out-of-order execution to
338 | mitigate the impact of data dependencies. However,
339 | excessive RAW dependencies can still limit the
340 | effectiveness of these techniques. Profiling RAW
341 | dependencies can help developers understand the
342 | limitations of out-of-order execution and find
343 | opportunities to reorder instructions for better
344 | performance.
345 | 
346 | Instruction Scheduling: By analyzing the RAW metric,
347 | developers can make informed decisions about
348 | instruction scheduling. This involves reordering
349 | instructions to maximize parallel execution while
350 | minimizing the impact of data dependencies. Strategic
351 | scheduling can lead to better resource utilization and
352 | improved overall program performance.
353 | 
354 | Register Allocation: In architectures with limited
355 | registers, managing RAW dependencies becomes crucial
356 | for efficient register allocation. By identifying where
357 | registers are being overwritten and immediately read
358 | afterward, developers can make decisions about register
359 | usage and potentially optimize the register allocation
360 | strategy.
361 | 
362 | 
363 | Store Load Bypass
364 | ------------
365 | The "Store Load Bypass" metric plays a crucial role in
366 | profiling and optimizing programs by providing insights
367 | into memory access patterns and potential performance
368 | bottlenecks. This metric refers to the behavior of the
369 | processor's memory subsystem when it encounters a
370 | sequence of instructions that involve both storing data
371 | into memory and subsequently loading that data back
372 | from memory.
373 | 
374 | In a RISC-V processor, a store-load bypass occurs when a
375 | load instruction depends on a preceding store instruction
376 | that has not yet completed. The bypass mechanism
377 | allows the load instruction to fetch the stored data
378 | directly from the internal data path, bypassing the
379 | memory hierarchy. This can prevent unnecessary delays
380 | that would have occurred if the load instruction had to
381 | wait for the store instruction to fully commit to memory
382 | before retrieving the data
383 | 
384 | A high frequency of store-load bypasses can indicate
385 | potential performance bottlenecks. If loads are
386 | frequently stalled due to pending stores, the processor's
387 | execution pipeline could experience significant delays.
388 | This might highlight areas in the code where the
389 | frequency of stores and loads could be optimized to
390 | reduce such stalls.
391 | 
392 | Dependency Analysis: By studying the occurrence of
393 | store-load bypasses, developers can identify
394 | dependencies between store and load instructions. This
395 | understanding can guide them in rearranging code or
396 | using memory access optimizations like prefetching to
397 | reduce the impact of these dependencies on overall
398 | execution speed.
399 | 
400 | Memory Access Patterns: The metric can reveal patterns
401 | in memory access behavior. For example, frequent storeload bypasses might suggest that the program is
402 | modifying data and then quickly accessing it again, which
403 | could provide insights into potential opportunities for
404 | caching or buffering mechanisms.
405 | 
406 | Cache Utilization: The presence of frequent store-load
407 | bypasses could also point to potential inefficiencies in
408 | cache utilization. Addressing these inefficiencies might
409 | involve adjusting cache parameters or reconsidering the
410 | order of memory accesses to minimize conflicts and
411 | improve cache hit rates.
412 | 
413 | Compiler Optimizations: Profiling store-load bypasses
414 | can inform compiler optimizations. The compiler might be
415 | able to reorder instructions to minimize the impact of
416 | dependencies, or even employ advanced techniques like
417 | software pipelining to overlap memory accesses and
418 | computations more effectively
419 | 
420 | 
421 | Data Cache Utilization
422 | ------------
423 | The "Data Cache" metric pertains to the behavior and
424 | efficiency of the data cache, which is a crucial
425 | component of the memory hierarchy in modern
426 | processors. This metric provides insights into how
427 | effectively the processor's data cache is being utilized by
428 | a program and can play a significant role in profiling and
429 | optimizing the program's performance.
430 | 
431 | Here's how the "Data Cache" metric in RISC-V helps in
432 | profiling:
433 | 
434 | Cache Hit Rate Analysis: The metric helps in tracking the
435 | cache hit rate, which indicates how often the processor
436 | successfully retrieves data from the cache without
437 | needing to access main memory. A high cache hit rate
438 | suggests that the data cache is effectively storing
439 | frequently accessed data, leading to improved execution
440 | speed. Conversely, a low hit rate may point to cache
441 | inefficiencies or poor memory access patterns.
442 | 
443 | Cache Misses: By monitoring cache misses, developers
444 | can identify instances where data requested by the
445 | program is not present in the cache and must be fetched
446 | from main memory. Frequent cache misses can lead to
447 | performance bottlenecks, as accessing main memory is
448 | much slower than accessing the cache.
449 | 
450 | Cache Line Utilization: This metric can help in
451 | understanding how effectively cache lines are utilized.
452 | Cache lines are the smallest units of data that the cache
453 | stores. If a program frequently only uses a small portion
454 | of a cache line, it might lead to inefficient cache usage,
455 | and optimization strategies such as data padding or
456 | rearrangement might be necessary.
457 | 
458 | 
459 | Instruction Cache Utilization
460 | ------------
461 | The "Instruction Cache Utilization" is a valuable tool for
462 | understanding how efficiently the instruction cache of a
463 | processor is being utilized by a program. The instruction
464 | cache is a small, fast memory component that stores
465 | frequently used instructions, allowing the processor to
466 | fetch and execute them quickly without having to access
467 | the slower main memory.
468 | 
469 | The utilization of the instruction cache is crucial for
470 | achieving high performance in a program, as cache hits
471 | (when the required instruction is found in the cache)
472 | result in faster execution, while cache misses (when the
473 | instruction is not in the cache and needs to be fetched
474 | from main memory) lead to performance slowdowns due
475 | to longer memory access times.
476 | 
477 | The "Instruction Cache Utilization" metric provides
478 | insights into how effectively the cache is being used by a
479 | program, and it can help in profiling in the following ways:
480 | Cache Hit Rate Analysis: By monitoring the instruction
481 | cache utilization, developers can determine the
482 | percentage of instructions that are found in the cache
483 | when needed. A high cache hit rate indicates that the
484 | program is efficiently using the cache, resulting in faster
485 | execution. Conversely, a low hit rate suggests that the
486 | cache might not be adequately sized for the program's
487 | working set or that the program's memory access
488 | patterns are not cache-friendly.
489 | 
490 | Cache Miss Analysis: Alongside the hit rate, analyzing the
491 | cache miss rate is equally important. A high cache miss
492 | rate suggests that the cache is frequently being
493 | bypassed, leading to more memory accesses and longer
494 | execution times. Profiling cache misses can help identify
495 | specific code sections or memory access patterns that
496 | are causing cache inefficiencies.
497 | 
498 | Optimization Targets: Understanding instruction cache
499 | utilization guides developers to optimize their code to
500 | enhance cache efficiency. Techniques such as code
501 | reordering, loop unrolling, and optimizing memory access
502 | patterns can help reduce cache misses and improve
503 | overall performance.
504 | 
505 | Cache Size Evaluation: The "Instruction Cache Utilization"
506 | metric can also aid in evaluating whether the current size
507 | of the instruction cache is sufficient for the program's
508 | needs. If the cache is frequently being thrashed (high
509 | miss rate), it might indicate that the cache is too small to
510 | accommodate the program's working set of instructions,
511 | necessitating a larger cache size.
512 | 
513 | Profiling for Different Architectures: Different RISC-V
514 | processors might have varying cache sizes and
515 | configurations. Profiling instruction cache utilization
516 | helps tailor code optimization strategies to the specific
517 | cache characteristics of the target architecture.
518 | 
519 | 
520 | CSR Histogram
521 | ------------
522 | A histogram that provides information about the usage of
523 | control and status registers.
524 | 
525 | Identifying CSR usage: A CSR histogram can help identify
526 | which control and status registers are being accessed
527 | most frequently during program execution. This
528 | information can be valuable in understanding the
529 | behavior of the program and identifying potential
530 | bottlenecks or areas for optimization.
531 | 
532 | Analyzing performance impact: By analyzing the CSR
533 | histogram, developers can gain insights into how the
534 | usage of control and status registers affects the
535 | performance of the program. This can help in identifying
536 | areas where the program may be spending excessive time
537 | accessing CSRs and optimizing those sections of code to
538 | improve overall performance.
539 | 
540 | Comparing CSR usage: By comparing CSR histograms
541 | from different runs of the same code or different versions
542 | of the program, developers can track changes in CSR
543 | usage over time. This can help identify any unexpected
544 | changes in behavior and guide optimization efforts.
545 | 
546 | 
547 | Repeating Sequences
548 | ------------
549 | Identifying code patterns: By finding repeating sequences
550 | of instructions, developers can identify common patterns
551 | in the code. This can provide insights into the structure
552 | and behavior of the program, helping to understand its
553 | overall design and logic.
554 | 
555 | Optimizing code: Analyzing repeating instruction
556 | sequences can help identify areas where code
557 | optimizations can be applied. By optimizing frequently
558 | executed sequences, developers can improve the overall
559 | performance of the program. This may involve reducing
560 | the number of instructions, optimizing memory access
561 | patterns, or applying algorithmic improvements.
562 | 
563 | Identifying hotspots: Repeating instruction sequences
564 | often indicate hotspots in the code, where a significant
565 | amount of time is spent during program execution. By
566 | identifying these hotspots, developers can focus their
567 | optimization efforts on the most critical parts of the
568 | code, leading to more effective performance
569 | improvements.
570 | 
571 | Profiling hardware events: Identifying repeating
572 | instruction sequences can provide insights into the
573 | behavior of the program on the underlying hardware. This
574 | information can be used to profile hardware events and
575 | understand how different instructions impact the
576 | performance of the processor.


--------------------------------------------------------------------------------
/riscv_application_profiler/consts.py:
--------------------------------------------------------------------------------
  1 | ops_dict = {
  2 |     "RV32": {
  3 |         "I": {
  4 |             "loads": [
  5 |                 "lb",
  6 |                 "lbu",
  7 |                 "lh",
  8 |                 "lhu",
  9 |                 "lw",
 10 |             ],
 11 |             "stores": ["sb", "sh", "sw"],
 12 |             "imm computes": [
 13 |                 "addi",
 14 |                 "andi",
 15 |                 "ori",
 16 |                 "xori",
 17 |                 "slti",
 18 |                 "sltiu",
 19 |                 "auipc",
 20 |                 "lui",
 21 |             ],
 22 |             "imm shifts": ["slli", "srli", "srai"],
 23 |             "reg computes": ["add", "sub", "slt", "sltu", "xor", "or", "and"],
 24 |             "reg shifts": ["sll", "srl", "sra"],
 25 |             "jumps": ["jal", "jalr"],
 26 |             "branches": ["bge", "bgeu", "blt", "bltu", "beq", "bne"],
 27 |             "compares":[],
 28 |             "conversions":[],
 29 |             "moves":[],
 30 |             "classifies":[],
 31 |             "csrs":[],
 32 |             "fence":["fence","fence.i"],
 33 |         },
 34 |         "M": {
 35 |             "loads": [],
 36 |             "stores": [],
 37 |             "imm computes": [],
 38 |             "imm shifts": [],
 39 |             "reg computes": [
 40 |                 "div",
 41 |                 "divu",
 42 |                 "mul",
 43 |                 "mulh",
 44 |                 "mulhsu",
 45 |                 "mulhu",
 46 |                 "rem",
 47 |                 "remu",
 48 |             ],
 49 |             "reg shifts": [],
 50 |             "jumps": [],
 51 |             "branches": [],
 52 |             "compares":[],
 53 |             "conversions":[],
 54 |             "moves":[],
 55 |             "classifies":[],
 56 |             "csrs":[],
 57 |             "fence":[],
 58 |         },
 59 |         "F": {
 60 |             "loads": ["flw","flwsp","fld","fldsp"],
 61 |             "stores": ["fsw","fswsp","fsd","fsdsp"],
 62 |             "imm computes": [],
 63 |             "imm shifts": [],
 64 |             "reg computes": [
 65 |                 "fmadd.s",
 66 |                 "fmsub.s",
 67 |                 "fadd.s",
 68 |                 "fsub.s",
 69 |                 "fmul.s",
 70 |                 "fdiv.s",
 71 |                 "fmin.s",
 72 |                 "fmax.s",
 73 |                 "fsqrt.s",
 74 |                 "fmadd.s",
 75 |                 "fmsub.s",
 76 |                 "fnmsub.s"
 77 |                 "fnmadd.s"
 78 |             ],
 79 |             "reg shifts": [],
 80 |             "jumps": [],
 81 |             "compares": ["flt.s","feq.s","fle.s"],
 82 |             "conversions":[
 83 |                 "fcvt.w.s",
 84 |                 "fcvt.wu.s",
 85 |                 "fcvt.s.w",
 86 |                 "fcvt.s.wu",
 87 |                 "fsgnj.s",
 88 |                 "fsgnjn.s",
 89 |                 "fsgnjx.s",
 90 |             ],
 91 |             "moves":["fmv.s","fmv.x.w","fmv.w.x"],
 92 |             "classifies":["fclass.s"],
 93 |             "branches": [],
 94 |             "csrs":["frcsr.s","fscsr.s","frrm","fsrm","fsrmi",],
 95 |             "fence":[],
 96 |         },
 97 |         "D": {
 98 |             "loads": ["fld","fldsp"],
 99 |             "stores": ["fsd","fsdsp"],
100 |             "imm computes": [],
101 |             "imm shifts": [],
102 |             "reg computes": [
103 |                 "fmadd.d",
104 |                 "fmsub.d",
105 |                 "fadd.d",
106 |                 "fsub.d",
107 |                 "fmul.d",
108 |                 "fdiv.d",
109 |                 "fmin.d",
110 |                 "fmax.d",
111 |                 "fsqrt.d",
112 |                 "fmadd.d",
113 |                 "fmsub.d",
114 |                 "fnmsub.d"
115 |                 "fnmadd.d"
116 |             ],
117 |             "reg shifts": [],
118 |             "jumps": [],
119 |             "compares": ["flt.d","feq.d","fle.d"],
120 |             "conversions":[
121 |                 "fcvt.w.d",
122 |                 "fcvt.wu.d",
123 |                 "fcvt.d.w",
124 |                 "fcvt.d.wu",
125 |                 "fsgnj.d",
126 |                 "fsgnjn.d",
127 |                 "fsgnjx.d",
128 |             ],
129 |             "moves":["fmv.x.d","fmv.d.x"],
130 |             "classifies":["fclass.d"],
131 |             "branches": [],
132 |             "csrs":["frcsr","fscsr","frrm","fsrm","fsrmi",],
133 |             "fence":[],
134 |             
135 |         }, 
136 |         "C": {
137 |             "loads": [
138 |                 "c.lwsp",
139 |                 "c.lw",
140 |             ],
141 |             "stores": [
142 |                 "c.swsp",
143 |                 "c.sw",
144 |             ],
145 |             "imm computes": [
146 |                 "c.li",
147 |                 "c.lui",
148 |                 "c.addi",
149 |                 "c.addi16sp",
150 |                 "c.addi4spn",
151 |                 "c.andi",
152 |             ],
153 |             "imm shifts": [
154 |                 "c.slli",
155 |                 "c.srli",
156 |                 "c.srai",
157 |             ],
158 |             "reg computes": [
159 |                 "c.add",
160 |                 "c.addw",
161 |                 "c.sub",
162 |                 "c.subw",
163 |                 "c.and",
164 |                 "c.or",
165 |                 "c.xor",
166 |                 "c.mv",
167 |             ],
168 |             "reg shifts": ["c.sll", "c.srl", "c.sra"],
169 |             "jumps": ["c.j", "c.jal", "c.jr", "c.jalr"],
170 |             "branches": [
171 |                 "c.beqz",
172 |                 "c.bnez",
173 |                 "c.bltz",
174 |                 "c.bgez",
175 |                 "c.bltz",
176 |                 "c.bgez",
177 |                 "c.bltzal",
178 |                 "c.bgezal",
179 |             ],
180 |             "compares":[],
181 |             "conversions":[],
182 |             "moves":[],
183 |             "classifies":[],
184 |             "csrs":[],
185 |             "fence":[],
186 |             
187 |         },  
188 |         "B": {
189 |             "loads": [],
190 |             "stores": [],
191 |             "imm computes": ["bclri","bexti","binvi","bseti",'slli.uw',
192 |                              "mergei","sbseti","sbinvi",],
193 |             "imm shifts": ['rori','roli','roriw','roliw'],
194 |             "reg computes": ["add.uw",
195 |                              "andn",
196 |                              "bclr",
197 |                              "bext",
198 |                              "binv",
199 |                              "bset",
200 |                              "clmul",
201 |                              "clmulh",
202 |                              "clmulr",
203 |                              "clz",
204 |                              "clzw",
205 |                              "cpop",'cpopw',"sbset","sbclr","sbseti","sbclri",
206 |                              "ctz",'ctzw',"pcnt",
207 |                              'max','maxu','min','minu',
208 |                              'orc.b','orn',
209 |                              "pack","packh","packu","packw",
210 |                              'rev8','rev.b',
211 |                              'sext.b','sexr.h','sh1add','sh1add.uw','sh2add','sh2add.uw','sh3add','sh3add.uw',
212 |                              'unzip','xnor','xprem.b','xprem.n','zip','zext.h',
213 |                              "funnel","unfunnel","merge","gather","gatheru","gatherx","scatter","scatteru","scatterx","sbext","sbextu","sbset","sbinv",
214 |                              "crc32.b","crc32.h","crc32.w","crc32c.b","crc32c.h","crc32c.w"],
215 |             "reg shifts": ["sll","srl","sra","slo","sro","rol","ror",'rorw','rolw'],
216 |             "jumps": [],
217 |             "compares": [],
218 |             "conversions":[],
219 |             "moves":[],
220 |             "classifies":[],
221 |             "branches": [],
222 |             "csrs":[],
223 |             "fence":[],
224 |         },
225 |         "P": {
226 |             "loads": [
227 |                 "vld",
228 |             ],
229 |             "stores": [
230 |                 "vst",
231 |             ],
232 |             "imm computes": [
233 |                 "vaddi",
234 |                 "vsubi",
235 |                 "vslli",
236 |                 "vsrli",
237 |                 "vsrai",
238 |                 "vandi",
239 |                 "vori",
240 |                 "vxori",
241 |                 "vslti",
242 |                 "vsltiu",
243 |             ],
244 |             "imm shifts": [
245 |                 "vsll",
246 |                 "vsrl",
247 |                 "vsra",
248 |             ],
249 |             "reg computes": [
250 |                 "vadd",
251 |                 "vsub",
252 |                 "vand",
253 |                 "vor",
254 |                 "vxor",
255 |                 "vslt",
256 |                 "vsltu",
257 |                 "vmin",
258 |                 "vmax",
259 |                 "vseq",
260 |                 "vsne",
261 |                 "vzext",
262 |                 "vsext",
263 |             ],
264 |             "reg shifts": [
265 |                 "vssrl",
266 |                 "vssra",
267 |                 "vsll",
268 |                 "vsrl",
269 |                 "vsra",
270 |             ],
271 |             "jumps": [],
272 |             "branches": [],
273 |             "compares":[],
274 |             "conversions":[],
275 |             "moves":[],
276 |             "classifies":[],
277 |             "csrs":[],
278 |             "fence":[],
279 |         },
280 |         "Zicsr": {
281 |             "loads": [],
282 |             "stores": [],
283 |             "imm computes": [],
284 |             "imm shifts": [],
285 |             "reg computes": [],
286 |             "reg shifts": [],
287 |             "jumps": [],
288 |             "compares": [],
289 |             "conversions": [],
290 |             "moves": [],
291 |             "classifies": [],
292 |             "branches": [],
293 |             "csrs": ["csrrw","csrrs","csrrc","csrrwi","csrrsi","csrrci","rdtimeh","rdtime"],
294 |             "fence":[],
295 |             },
296 | 
297 |     },
298 |     "RV64": {
299 |         "I": {
300 |             "loads": ["ld", "lh", "lhu", "lb", "lbu", "lw", "lwu"],
301 |             "stores": ["sb", "sh", "sw", "sd"],
302 |             "imm computes": [
303 |                 "addi",
304 |                 "addiw",
305 |                 "andi",
306 |                 "ori",
307 |                 "xori",
308 |                 "slti",
309 |                 "sltiu",
310 |                 "auipc",
311 |                 "lui",
312 |             ],
313 |             "imm shifts": ["slli", "srli", "srai", "slliw", "srliw", "sraiw"],
314 |             "reg computes": [
315 |                 "add",
316 |                 "sub",
317 |                 "slt",
318 |                 "sltu",
319 |                 "xor",
320 |                 "or",
321 |                 "and",
322 |                 "addw",
323 |                 "subw",
324 |             ],
325 |             "reg shifts": ["sll", "srl", "sra", "sllw", "srlw", "sraw"],
326 |             "jumps": ["jal", "jalr"],
327 |             "branches": ["bge", "bgeu", "blt", "bltu", "beq", "bne"],
328 |             "compares":[],
329 |             "conversions":[],
330 |             "moves":[],
331 |             "classifies":[],
332 |             "csrs":[],
333 |             "fence":["fence","fence.i"],
334 |         },
335 |         "M": {
336 |             "loads": [],
337 |             "stores": [],
338 |             "imm computes": [],
339 |             "imm shifts": [],
340 |             "reg computes": [
341 |                 "div",
342 |                 "divu",
343 |                 "mul",
344 |                 "mulh",
345 |                 "mulhsu",
346 |                 "mulhu",
347 |                 "rem",
348 |                 "remu",
349 |             ],
350 |             "reg shifts": [],
351 |             "jumps": [],
352 |             "branches": [],
353 |             "compares":[],
354 |             "conversions":[],
355 |             "moves":[],
356 |             "classifies":[],
357 |             "csrs":[],
358 |             "fence":[],
359 |         },
360 |         "F": {
361 |             "loads": ["flw","flwsp","fld","fldsp"],
362 |             "stores": ["fsw","fswsp","fsd","fsdsp"],
363 |             "imm computes": [],
364 |             "imm shifts": [],
365 |             "reg computes": [
366 |                 "fmadd.s",
367 |                 "fmsub.s",
368 |                 "fadd.s",
369 |                 "fsub.s",
370 |                 "fmul.s",
371 |                 "fdiv.s",
372 |                 "fmin.s",
373 |                 "fmax.s",
374 |                 "fsqrt.s",
375 |                 "fmadd.s",
376 |                 "fmsub.s",
377 |                 "fnmsub.s"
378 |                 "fnmadd.s"
379 |             ],
380 |             "reg shifts": [],
381 |             "jumps": [],
382 |             "compares": ["flt.s","feq.s","fle.s"],
383 |             "conversions":[
384 |                 "fcvt.w.s",
385 |                 "fcvt.wu.s",
386 |                 "fcvt.s.w",
387 |                 "fcvt.s.wu",
388 |                 "fcvt.l.s",
389 |                 "fcvt.lu.s",
390 |                 "fcvt.s.l",
391 |                 "fcvt.s.lu",
392 |                 "fsgnj.s",
393 |                 "fsgnjn.s",
394 |                 "fsgnjx.s",
395 |             ],
396 |             "moves":["fmv.s","fmv.x.w","fmv.w.x"],
397 |             "classifies":["fclass.s"],
398 |             "branches": [],
399 |             "csrs":["frcsr","fscsr","frrm","fsrm","fsrmi",],
400 |             "fence":[],
401 |         },
402 |         "D": {
403 |             "loads": ["fld","fldsp"],
404 |             "stores": ["fsd","fsdsp"],
405 |             "imm computes": [],
406 |             "imm shifts": [],
407 |             "reg computes": [
408 |                 "fmadd.d",
409 |                 "fmsub.d",
410 |                 "fadd.d",
411 |                 "fsub.d",
412 |                 "fmul.d",
413 |                 "fdiv.d",
414 |                 "fmin.d",
415 |                 "fmax.d",
416 |                 "fsqrt.d",
417 |                 "fmadd.d",
418 |                 "fmsub.d",
419 |                 "fnmsub.d"
420 |                 "fnmadd.d"
421 |             ],
422 |             "reg shifts": [],
423 |             "jumps": [],
424 |             "compares": ["flt.d","feq.d","fle.d"],
425 |             "conversions":[
426 |                 "fcvt.w.d",
427 |                 "fcvt.wu.d",
428 |                 "fcvt.d.w",
429 |                 "fcvt.d.wu",
430 |                 "fcvt.l.d",
431 |                 "fcvt.lu.d",
432 |                 "fcvt.d.l",
433 |                 "fcvt.d.lu",
434 |                 "fsgnj.d",
435 |                 "fsgnjn.d",
436 |                 "fsgnjx.d",
437 |             ],
438 |             "moves":["fmv.x.d","fmv.d.x"],
439 |             "classifies":["fclass.d"],
440 |             "branches": [],
441 |             "csrs":["frcsr","fscsr","frrm","fsrm","fsrmi",],
442 |             "fence":[],
443 |             
444 |         },
445 |         "C": {
446 |             "loads": [
447 |                 "c.lwsp",
448 |                 "c.ldsp",
449 |                 "c.lw",
450 |                 "c.ld",
451 |             ],  
452 |             "stores": [
453 |                 "c.swsp",
454 |                 "c.sdsp",
455 |                 "c.sw",
456 |                 "c.sd",
457 |             ],  
458 |             "imm computes": [
459 |                 "c.addi4spn",
460 |                 "c.addi",
461 |                 "c.addiw",
462 |                 "c.li",
463 |                 "c.lui",
464 |                 "c.addi16sp",
465 |                 "c.addi4spn",
466 |                 "c.addi",
467 |                 "c.addiw",
468 |                 "c.li",
469 |                 "c.lui",
470 |                 "c.addi16sp",
471 |             ],
472 |             "imm shifts": ["c.slli", "c.srli", "c.srai"],
473 |             "reg computes": [
474 |                 "c.add",
475 |                 "c.sub",
476 |                 "c.xor",
477 |                 "c.or",
478 |                 "c.and",
479 |                 "c.subw",
480 |                 "c.addw",
481 |                 "c.mv",
482 |             ],
483 |             "reg shifts": ["c.sll", "c.srl", "c.sra"],
484 |             "jumps": ["c.j", "c.jal", "c.jr", "c.jalr"],
485 |             "branches": [
486 |                 "c.beqz",
487 |                 "c.bnez",
488 |                 "c.bltz",
489 |                 "c.bgez",
490 |                 "c.bltz",
491 |                 "c.bgez",
492 |                 "c.bltzal",
493 |                 "c.bgezal",
494 |             ],
495 |             "compares":[],
496 |             "conversions":[],
497 |             "moves":[],
498 |             "classifies":[],
499 |             "csrs":[],
500 |             "fence":[],
501 |         },
502 | 
503 |         "B": {
504 |             "loads": [],
505 |             "stores": [],
506 |             "imm computes": ["bclri","bexti","binvi","bseti",'slli.uw',
507 |                              "mergei","sbseti","sbinvi",],
508 |             "imm shifts": ['rori','roli','roriw','roliw'],
509 |             "reg computes": ["add.uw",
510 |                              "andn",
511 |                              "bclr",
512 |                              "bext",
513 |                              "binv",
514 |                              "bset",
515 |                              "clmul",
516 |                              "clmulh",
517 |                              "clmulr",
518 |                              "clz",
519 |                              "clzw",
520 |                              "cpop",'cpopw',"sbset","sbclr","sbseti","sbclri",
521 |                              "ctz",'ctzw',"pcnt",
522 |                              'max','maxu','min','minu',
523 |                              'orc.b','orn',
524 |                              "pack","packh","packu","packw",
525 |                              'rev8','rev.b',
526 |                              'sext.b','sexr.h','sh1add','sh1add.uw','sh2add','sh2add.uw','sh3add','sh3add.uw',
527 |                              'unzip','xnor','xprem.b','xprem.n','zip','zext.h',
528 |                              "funnel","unfunnel","merge","gather","gatheru","gatherx","scatter","scatteru","scatterx","sbext","sbextu","sbset","sbinv",
529 |                              "crc32.b","crc32.h","crc32.w","crc32c.b","crc32c.h","crc32c.w","crc32c.d","crc32.d"],
530 |             "reg shifts": ["sll","srl","sra","slo","sro","rol","ror",'rorw','rolw'],
531 |             "jumps": [],
532 |             "compares": [],
533 |             "conversions":[],
534 |             "moves":[],
535 |             "classifies":[],
536 |             "branches": [],
537 |             "csrs":[],
538 |             "fence":[],
539 |         },
540 |         "P": {
541 |             "loads": [
542 |                 "vld",
543 |             ],
544 |             "stores": [
545 |                 "vst",
546 |             ],
547 |             "imm computes": [
548 |                 "vaddi",
549 |                 "vsubi",
550 |                 "vslli",
551 |                 "vsrli",
552 |                 "vsrai",
553 |                 "vandi",
554 |                 "vori",
555 |                 "vxori",
556 |                 "vslti",
557 |                 "vsltiu",
558 |             ],
559 |             "imm shifts": [
560 |                 "vsll",
561 |                 "vsrl",
562 |                 "vsra",
563 |             ],
564 |             "reg computes": [
565 |                 "vadd",
566 |                 "vsub",
567 |                 "vand",
568 |                 "vor",
569 |                 "vxor",
570 |                 "vslt",
571 |                 "vsltu",
572 |                 "vmin",
573 |                 "vmax",
574 |                 "vseq",
575 |                 "vsne",
576 |                 "vzext",
577 |                 "vsext",
578 |             ],
579 |             "reg shifts": [
580 |                 "vssrl",
581 |                 "vssra",
582 |                 "vsll",
583 |                 "vsrl",
584 |                 "vsra",
585 |             ],
586 |             "jumps": [],
587 |             "branches": [],
588 |             "compares":[],
589 |             "conversions":[],
590 |             "moves":[],
591 |             "classifies":[],
592 |             "csrs":[],
593 |             "fence":[],
594 |         },
595 |         "Zicsr": {
596 |             "loads": [],
597 |             "stores": [],
598 |             "imm computes": [],
599 |             "imm shifts": [],
600 |             "reg computes": [],
601 |             "reg shifts": [],
602 |             "jumps": [],
603 |             "compares": [],
604 |             "conversions": [],
605 |             "moves": [],
606 |             "classifies": [],
607 |             "branches": [],
608 |             "csrs": ["csrrw","csrrs","csrrc","csrrwi","csrrsi","csrrci","rdtimeh","rdtime"], 
609 |             "fence":[],
610 |             },
611 |     },
612 | }
613 | 
614 | reg_file = {f'x{i}':'0x00000000' for i in range(32)}
615 | freg_file = {f'f{i}':'0' for i in range(32)}
616 | 
617 | csr_file = {'0x000': 'ustatus',
618 |             #Unprivileged Floating-Point CSRs
619 |             '0x001': 'fflags',
620 |             '0x002': 'frm',
621 |             '0x003': 'fcsr',
622 |             #Unprivileged Counter/Timers
623 |             '0xc00': 'cycle',
624 |             '0xc01': 'time',
625 |             '0xc02': 'instret',
626 |             '0xc03': 'hpmcounter3',
627 |             '0xc04': 'hpmcounter4',
628 |             '0xc05': 'hpmcounter5',
629 |             '0xc06': 'hpmcounter6',
630 |             '0xc07': 'hpmcounter7',
631 |             '0xc08': 'hpmcounter8',
632 |             '0xc09': 'hpmcounter9',
633 |             '0xc0a': 'hpmcounter10',
634 |             '0xc0b': 'hpmcounter11',
635 |             '0xc0c': 'hpmcounter12',
636 |             '0xc0d': 'hpmcounter13',
637 |             '0xc0e': 'hpmcounter14',
638 |             '0xc0f': 'hpmcounter15',
639 |             '0xc10': 'hpmcounter16',
640 |             '0xc11': 'hpmcounter17',
641 |             '0xc12': 'hpmcounter18',
642 |             '0xc13': 'hpmcounter19',
643 |             '0xc14': 'hpmcounter20',
644 |             '0xc15': 'hpmcounter21',
645 |             '0xc16': 'hpmcounter22',
646 |             '0xc17': 'hpmcounter23',
647 |             '0xc18': 'hpmcounter24',
648 |             '0xc19': 'hpmcounter25',
649 |             '0xc1a': 'hpmcounter26',
650 |             '0xc1b': 'hpmcounter27',
651 |             '0xc1c': 'hpmcounter28',
652 |             '0xc1d': 'hpmcounter29',
653 |             '0xc1e': 'hpmcounter30',
654 |             '0xc1f': 'hpmcounter31',
655 |             '0xc80': 'cycleh',
656 |             '0xc81': 'timeh',
657 |             '0xc82': 'instreth',
658 |             '0xc83': 'hpmcounter3h',
659 |             '0xc84': 'hpmcounter4h',
660 |             '0xc85': 'hpmcounter5h',
661 |             '0xc86': 'hpmcounter6h',
662 |             '0xc87': 'hpmcounter7h',
663 |             '0xc88': 'hpmcounter8h',
664 |             '0xc89': 'hpmcounter9h',
665 |             '0xc8a': 'hpmcounter10h',
666 |             '0xc8b': 'hpmcounter11h',
667 |             '0xc8c': 'hpmcounter12h',
668 |             '0xc8d': 'hpmcounter13h',
669 |             '0xc8e': 'hpmcounter14h',
670 |             '0xc8f': 'hpmcounter15h',
671 |             '0xc90': 'hpmcounter16h',
672 |             '0xc91': 'hpmcounter17h',
673 |             '0xc92': 'hpmcounter18h',
674 |             '0xc93': 'hpmcounter19h',
675 |             '0xc94': 'hpmcounter20h',
676 |             '0xc95': 'hpmcounter21h',
677 |             '0xc96': 'hpmcounter22h', 
678 |             '0xc97': 'hpmcounter23h',
679 |             '0xc98': 'hpmcounter24h',
680 |             '0xc99': 'hpmcounter25h',
681 |             '0xc9a': 'hpmcounter26h',
682 |             '0xc9b': 'hpmcounter27h',
683 |             '0xc9c': 'hpmcounter28h',
684 |             '0xc9d': 'hpmcounter29h',
685 |             '0xc9e': 'hpmcounter30h',
686 |             '0xc9f': 'hpmcounter31h',
687 |             #Supervisor Trap Setup
688 |             '0x100': 'sstatus',
689 |             '0x102': 'sedeleg',
690 |             '0x103': 'sideleg',
691 |             '0x104': 'sie',
692 |             '0x105': 'stvec',
693 |             '0x106': 'scounteren',
694 |             #Supervisor Configuration
695 |             '0x10a': 'senvcfg',
696 |             #Supervisor Trap Handling
697 |             '0x140': 'sscratch',
698 |             '0x141': 'sepc',
699 |             '0x142': 'scause',
700 |             '0x143': 'stval',
701 |             '0x144': 'sip',
702 |             #Supervisor Protection and Translation
703 |             '0x180': 'satp',
704 |             #Debug/Trace Registers
705 |             '0x5a8': 'scontext',
706 |             #Hypervisor Trap Setup
707 |             '0x600': 'hstatus',
708 |             '0x602': 'hedeleg',
709 |             '0x603': 'hideleg',
710 |             '0x604': 'hie',
711 |             '0x605': 'htvec',
712 |             '0x606': 'hcounteren',
713 |             '0x607': 'hgeie',
714 |             #Hypervisor Trap Handling
715 |             '0x643': 'htval',
716 |             '0x644': 'hip',
717 |             '0x645': 'hvip',
718 |             '0x64a': 'htinst',
719 |             '0xe12': 'hgeip',
720 |             #Hypervisor Configuration
721 |             '0x60a': 'henvcfg',
722 |             '0x61a': 'henvcfgh',
723 |             #Hypervisor Protection and Translation
724 |             '0x680': 'hgatp',
725 |             #Debug/Trace Registers
726 |             '0x6a8': 'hcontext',
727 |             #Hypervisor Counter/Timer Virtualization Registers
728 |             '0x605': 'htimedelta',
729 |             '0x615': 'htimedeltah',
730 |             #Virtual Supervisor Registers
731 |             '0x200': 'vsstatus',
732 |             '0x204': 'vsie',
733 |             '0x205': 'vstvec',
734 |             '0x240': 'vsscratch',
735 |             '0x241': 'vsepc',
736 |             '0x242': 'vscause',
737 |             '0x243': 'vstval',
738 |             '0x244': 'vsip',
739 |             '0x280': 'vsatp',
740 |             #Machine Information Registers
741 |             '0xf11': 'mvendorid',
742 |             '0xf12': 'marchid',
743 |             '0xf13': 'mimpid',
744 |             '0xf14': 'mhartid',
745 |             '0xf15': 'mconfigptr',
746 |             #Machine Trap Setup
747 |             '0x300': 'mstatus',
748 |             '0x301': 'misa',
749 |             '0x302': 'medeleg',
750 |             '0x303': 'mideleg',
751 |             '0x304': 'mie',
752 |             '0x305': 'mtvec',
753 |             '0x306': 'mcounteren',
754 |             '0x307': 'mtvt',
755 |             '0x310': 'mscratch',
756 |             #Machine Trap Handling
757 |             '0x340': 'mscratch',
758 |             '0x341': 'mepc',
759 |             '0x342': 'mcause',
760 |             '0x343': 'mtval',
761 |             '0x344': 'mip',
762 |             '0x34a': 'mtinst',
763 |             '0x34b': 'mtval2',
764 |             #Machine Configuration
765 |             '0x30a': 'menvcfg',
766 |             '0x31a': 'menvcfgh',
767 |             '0x747': 'mseccfg',
768 |             '0x757': 'mseccfgh',
769 |             #Machine Memory Protection
770 |             '0x3a0': 'pmpcfg0',
771 |             '0x3a1': 'pmpcfg1',
772 |             '0x3a2': 'pmpcfg2',
773 |             '0x3a3': 'pmpcfg3',
774 |             '0x3a4': 'pmpcfg4',
775 |             '0x3a5': 'pmpcfg5',
776 |             '0x3a6': 'pmpcfg6',
777 |             '0x3a7': 'pmpcfg7',
778 |             '0x3a8': 'pmpcfg8',
779 |             '0x3a9': 'pmpcfg9',
780 |             '0x3aa': 'pmpcfg10',
781 |             '0x3ab': 'pmpcfg11',
782 |             '0x3ac': 'pmpcfg12',
783 |             '0x3ad': 'pmpcfg13',
784 |             '0x3ae': 'pmpcfg14',
785 |             '0x3af': 'pmpcfg15',
786 |             '0x3b0': 'pmpaddr0',
787 |             '0x3b1': 'pmpaddr1',
788 |             '0x3b2': 'pmpaddr2',
789 |             '0x3b3': 'pmpaddr3',
790 |             '0x3b4': 'pmpaddr4',
791 |             '0x3b5': 'pmpaddr5',
792 |             '0x3b6': 'pmpaddr6',
793 |             '0x3b7': 'pmpaddr7',
794 |             '0x3b8': 'pmpaddr8',
795 |             '0x3b9': 'pmpaddr9',
796 |             '0x3ba': 'pmpaddr10',
797 |             '0x3bb': 'pmpaddr11',
798 |             '0x3bc': 'pmpaddr12',
799 |             '0x3bd': 'pmpaddr13',
800 |             '0x3be': 'pmpaddr14',   
801 |             '0x3bf': 'pmpaddr15',
802 |             '0x3c0': 'pmpaddr16',
803 |             '0x3c1': 'pmpaddr17',
804 |             '0x3c2': 'pmpaddr18',
805 |             '0x3c3': 'pmpaddr19',
806 |             '0x3c4': 'pmpaddr20',
807 |             '0x3c5': 'pmpaddr21',
808 |             '0x3c6': 'pmpaddr22',
809 |             '0x3c7': 'pmpaddr23',
810 |             '0x3c8': 'pmpaddr24',
811 |             '0x3c9': 'pmpaddr25',
812 |             '0x3ca': 'pmpaddr26',
813 |             '0x3cb': 'pmpaddr27',
814 |             '0x3cc': 'pmpaddr28',
815 |             '0x3cd': 'pmpaddr29',
816 |             '0x3ce': 'pmpaddr30',
817 |             '0x3cf': 'pmpaddr31',
818 |             '0x3d0': 'pmpaddr32',
819 |             '0x3d1': 'pmpaddr33',
820 |             '0x3d2': 'pmpaddr34',
821 |             '0x3d3': 'pmpaddr35',
822 |             '0x3d4': 'pmpaddr36',
823 |             '0x3d5': 'pmpaddr37',
824 |             '0x3d6': 'pmpaddr38',
825 |             '0x3d7': 'pmpaddr39',
826 |             '0x3d8': 'pmpaddr40',
827 |             '0x3d9': 'pmpaddr41',
828 |             '0x3da': 'pmpaddr42',
829 |             '0x3db': 'pmpaddr43',
830 |             '0x3dc': 'pmpaddr44',
831 |             '0x3dd': 'pmpaddr45',
832 |             '0x3de': 'pmpaddr46',
833 |             '0x3df': 'pmpaddr47',
834 |             '0x3e0': 'pmpaddr48',
835 |             '0x3e1': 'pmpaddr49',
836 |             '0x3e2': 'pmpaddr50',
837 |             '0x3e3': 'pmpaddr51',
838 |             '0x3e4': 'pmpaddr52',
839 |             '0x3e5': 'pmpaddr53',
840 |             '0x3e6': 'pmpaddr54',
841 |             '0x3e7': 'pmpaddr55',
842 |             '0x3e8': 'pmpaddr56',
843 |             '0x3e9': 'pmpaddr57',
844 |             '0x3ea': 'pmpaddr58',
845 |             '0x3eb': 'pmpaddr59',
846 |             '0x3ec': 'pmpaddr60',
847 |             '0x3ed': 'pmpaddr61',
848 |             '0x3ee': 'pmpaddr62',
849 |             '0x3ef': 'pmpaddr63',
850 |             #Machine Non-Maskable Interrupt Handling
851 |             '0x740': 'mnscratch',
852 |             '0x741': 'mnepc',
853 |             '0x742': 'mncause',
854 |             '0x743': 'mntval',
855 |             '0x744': 'mnstatus',
856 |             #Machine Counter/Timers
857 |             '0xb00': 'mcycle',
858 |             '0xb02': 'minstret',
859 |             '0xb03': 'mhpmcounter3',
860 |             '0xb04': 'mhpmcounter4',
861 |             '0xb05': 'mhpmcounter5',
862 |             '0xb06': 'mhpmcounter6',
863 |             '0xb07': 'mhpmcounter7',
864 |             '0xb08': 'mhpmcounter8',
865 |             '0xb09': 'mhpmcounter9',
866 |             '0xb0a': 'mhpmcounter10',
867 |             '0xb0b': 'mhpmcounter11',
868 |             '0xb0c': 'mhpmcounter12',
869 |             '0xb0d': 'mhpmcounter13',
870 |             '0xb0e': 'mhpmcounter14',
871 |             '0xb0f': 'mhpmcounter15',
872 |             '0xb10': 'mhpmcounter16',
873 |             '0xb11': 'mhpmcounter17',
874 |             '0xb12': 'mhpmcounter18',
875 |             '0xb13': 'mhpmcounter19',
876 |             '0xb14': 'mhpmcounter20',
877 |             '0xb15': 'mhpmcounter21',
878 |             '0xb16': 'mhpmcounter22',
879 |             '0xb17': 'mhpmcounter23',
880 |             '0xb18': 'mhpmcounter24',
881 |             '0xb19': 'mhpmcounter25',
882 |             '0xb1a': 'mhpmcounter26',
883 |             '0xb1b': 'mhpmcounter27',
884 |             '0xb1c': 'mhpmcounter28',
885 |             '0xb1d': 'mhpmcounter29',
886 |             '0xb1e': 'mhpmcounter30',
887 |             '0xb1f': 'mhpmcounter31',
888 |             '0xb80': 'mcycleh',
889 |             '0xb82': 'minstreth',
890 |             '0xb83': 'mhpmcounter3h',
891 |             '0xb84': 'mhpmcounter4h',
892 |             '0xb85': 'mhpmcounter5h',
893 |             '0xb86': 'mhpmcounter6h',
894 |             '0xb87': 'mhpmcounter7h',
895 |             '0xb88': 'mhpmcounter8h',
896 |             '0xb89': 'mhpmcounter9h',
897 |             '0xb8a': 'mhpmcounter10h',
898 |             '0xb8b': 'mhpmcounter11h',
899 |             '0xb8c': 'mhpmcounter12h',
900 |             '0xb8d': 'mhpmcounter13h',
901 |             '0xb8e': 'mhpmcounter14h',
902 |             '0xb8f': 'mhpmcounter15h',
903 |             '0xb90': 'mhpmcounter16h',
904 |             '0xb91': 'mhpmcounter17h',
905 |             '0xb92': 'mhpmcounter18h',
906 |             '0xb93': 'mhpmcounter19h',
907 |             '0xb94': 'mhpmcounter20h',
908 |             '0xb95': 'mhpmcounter21h',
909 |             '0xb96': 'mhpmcounter22h',
910 |             '0xb97': 'mhpmcounter23h',
911 |             '0xb98': 'mhpmcounter24h',
912 |             '0xb99': 'mhpmcounter25h',
913 |             '0xb9a': 'mhpmcounter26h',
914 |             '0xb9b': 'mhpmcounter27h',
915 |             '0xb9c': 'mhpmcounter28h',
916 |             '0xb9d': 'mhpmcounter29h',
917 |             '0xb9e': 'mhpmcounter30h',
918 |             '0xb9f': 'mhpmcounter31h',
919 |             #Machine Counter Setup
920 |             '0x320': 'mcountinhibit',
921 |             '0x323': 'mhpmevent3',
922 |             '0x324': 'mhpmevent4',
923 |             '0x325': 'mhpmevent5',
924 |             '0x326': 'mhpmevent6',
925 |             '0x327': 'mhpmevent7',
926 |             '0x328': 'mhpmevent8',
927 |             '0x329': 'mhpmevent9',
928 |             '0x32a': 'mhpmevent10',
929 |             '0x32b': 'mhpmevent11',
930 |             '0x32c': 'mhpmevent12',
931 |             '0x32d': 'mhpmevent13',
932 |             '0x32e': 'mhpmevent14',
933 |             '0x32f': 'mhpmevent15',
934 |             '0x330': 'mhpmevent16',
935 |             '0x331': 'mhpmevent17',
936 |             '0x332': 'mhpmevent18',
937 |             '0x333': 'mhpmevent19',
938 |             '0x334': 'mhpmevent20',
939 |             '0x335': 'mhpmevent21',
940 |             '0x336': 'mhpmevent22',
941 |             '0x337': 'mhpmevent23',
942 |             '0x338': 'mhpmevent24',
943 |             '0x339': 'mhpmevent25',
944 |             '0x33a': 'mhpmevent26',
945 |             '0x33b': 'mhpmevent27',
946 |             '0x33c': 'mhpmevent28',
947 |             '0x33d': 'mhpmevent29',
948 |             '0x33e': 'mhpmevent30',
949 |             '0x33f': 'mhpmevent31',
950 |             #Debug/Trace Registers (shared with Debug Mode)
951 |             '0x7a0': 'tselect',
952 |             '0x7a1': 'tdata1',
953 |             '0x7a2': 'tdata2',
954 |             '0x7a3': 'tdata3',
955 |             '0x7a8': 'mcontext',
956 |             #Debug Mode Registers
957 |             '0x7b0': 'dcsr',
958 |             '0x7b1': 'dpc',
959 |             '0x7b2': 'dscratch',
960 |             '0x7b3': 'dscratch1',
961 | 
962 |             '0x345': 'mnxti',
963 |             '0x347': 'mintthresh',
964 |             '0x346': 'mintstatus',
965 |             '0x348': 'mscratchcsw',
966 |             '0x349': 'mscratchcswl',
967 |             }
968 | 
969 | 


--------------------------------------------------------------------------------