├── .gitignore ├── .gitmodules ├── FEATURES.md ├── LICENSE.txt ├── Makefile ├── README.md ├── README_tools.txt ├── autograd ├── README.txt └── profiler.patch ├── docs └── perf_playbooks │ ├── README.md │ ├── collective-tuning.md │ ├── freq-capture.md │ ├── rpd-101.md │ ├── rpd-101.sql │ ├── rpd-tables.md │ ├── stackframe-analysis.md │ ├── variability-analysis.md │ ├── variability-analysis.sql │ └── variability-analysis_nolaunch.sql ├── examples ├── rocm-profile-data │ ├── README.md │ ├── image │ │ └── matmul_trace.png │ ├── results │ │ ├── matmul_result.json │ │ ├── matmul_result.rpd │ │ └── matmul_result_large.rpd │ └── src │ │ ├── matrix_mult.py │ │ └── rocm-profile-data-blog.ipynb ├── rocprofiler │ ├── rocprof │ └── update.patch └── subclass │ ├── README.md │ └── generate_subclass.py ├── helpful_queries ├── README.txt ├── README_builtin_views.txt ├── binOpByDuration.cmd ├── kernelKernelDelay.cmd ├── kernelLaunchDelay.cmd ├── topDiff.cmd ├── topEx.cmd └── window.cmd ├── hipMarker ├── README.txt ├── hipMarkerModule.c ├── hipScopedMarker.py ├── roctxMarkerModule.c └── setup.py ├── install.sh ├── manage.py ├── raptor ├── README.md ├── __init__.py ├── cookbook │ ├── cookbook_utils.py │ ├── debug.ipy │ ├── get_idle.ipy │ ├── instance_trace.ipy │ ├── simple_load_rpd.ipy │ └── test_cookbook.py ├── raptor.py ├── raptor_cat_vllm.json ├── raptor_parser.py └── tests │ ├── mytrace.rpd.gz │ ├── mytrace.rpd.xlsx │ ├── test_multi_gpu.py │ ├── test_mytrace.py │ ├── test_raptor_script.py │ ├── test_roi.py │ ├── test_utils.py │ └── test_variability.py ├── remote ├── Makefile ├── Remote.cpp └── rpdRemote.sh ├── rocmProfileData ├── __init__.py ├── settings.py ├── urls.py └── wsgi.py ├── rocpd ├── __init__.py ├── admin.py ├── apps.py ├── management │ └── commands │ │ ├── importRocprof.py │ │ └── importRpt.py ├── migrations │ ├── 0001_initial.py │ └── __init__.py ├── models.py ├── tests.py ├── urls.py └── views.py ├── rocpd_python ├── MANIFEST.in ├── Makefile ├── __init__.py ├── rocpd │ ├── autograd.py │ ├── call_stacks.py │ ├── deserialize.py │ ├── graph.py │ ├── importer.py │ ├── metadata.py │ ├── rocprofiler_import.py │ ├── schema.py │ ├── schema_data │ │ ├── index2Schema.cmd │ │ ├── indexSchema.cmd │ │ ├── tableSchema.cmd │ │ └── utilitySchema.cmd │ ├── strings.py │ └── subclass.py └── setup.py ├── rpd_tracer ├── ApiIdList.cpp ├── ApiIdList.h ├── ApiTable.cpp ├── BufferedTable.cpp ├── CopyApiTable.cpp ├── CuptiDataSource.cpp ├── CuptiDataSource.h ├── DataSource.h ├── DbResource.cpp ├── DbResource.h ├── KernelApiTable.cpp ├── Logger.cpp ├── Logger.h ├── Makefile ├── MetadataTable.cpp ├── MonitorTable.cpp ├── OpTable.cpp ├── README.md ├── RocmSmiDataSource.cpp ├── RocmSmiDataSource.h ├── RoctracerDataSource.cpp ├── RoctracerDataSource.h ├── StackFrameTable.cpp ├── StringTable.cpp ├── Table.cpp ├── Table.h ├── Unwind.cpp ├── Utility.h ├── loadTracer.sh ├── rpdTracerControl.py ├── rpd_tracer.h ├── runTracer.sh └── setup.py ├── shelve └── rocprof2rpd.py └── tools ├── README_nvvp.txt ├── README_rpt.txt ├── README_step1.txt ├── README_step2.txt ├── nvvp2rpd.py ├── rpd2table.py ├── rpd2tracing.py ├── rpd_autograd_summary.py ├── rpd_trim.py └── rpt2rpd.py /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/vim,python,django 3 | # Edit at https://www.gitignore.io/?templates=vim,python,django 4 | 5 | ### Django ### 6 | *.log 7 | *.pot 8 | *.pyc 9 | __pycache__/ 10 | local_settings.py 11 | db.sqlite3 12 | media 13 | 14 | # If your build process includes running collectstatic, then you probably don't need or want to include staticfiles/ 15 | # in your Git repository. Update and uncomment the following line accordingly. 16 | # /staticfiles/ 17 | 18 | ### Django.Python Stack ### 19 | # Byte-compiled / optimized / DLL files 20 | *.py[cod] 21 | *$py.class 22 | 23 | # C extensions 24 | *.so 25 | 26 | # Distribution / packaging 27 | .Python 28 | build/ 29 | develop-eggs/ 30 | dist/ 31 | downloads/ 32 | eggs/ 33 | .eggs/ 34 | lib/ 35 | lib64/ 36 | parts/ 37 | sdist/ 38 | var/ 39 | wheels/ 40 | pip-wheel-metadata/ 41 | share/python-wheels/ 42 | *.egg-info/ 43 | .installed.cfg 44 | *.egg 45 | MANIFEST 46 | 47 | # PyInstaller 48 | # Usually these files are written by a python script from a template 49 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 50 | *.manifest 51 | *.spec 52 | 53 | # Installer logs 54 | pip-log.txt 55 | pip-delete-this-directory.txt 56 | 57 | # Unit test / coverage reports 58 | htmlcov/ 59 | .tox/ 60 | .nox/ 61 | .coverage 62 | .coverage.* 63 | .cache 64 | nosetests.xml 65 | coverage.xml 66 | *.cover 67 | .hypothesis/ 68 | .pytest_cache/ 69 | 70 | # Translations 71 | *.mo 72 | 73 | # Django stuff: 74 | db.sqlite3-journal 75 | 76 | # Flask stuff: 77 | instance/ 78 | .webassets-cache 79 | 80 | # Scrapy stuff: 81 | .scrapy 82 | 83 | # Sphinx documentation 84 | docs/_build/ 85 | 86 | # PyBuilder 87 | target/ 88 | 89 | # Jupyter Notebook 90 | .ipynb_checkpoints 91 | 92 | # IPython 93 | profile_default/ 94 | ipython_config.py 95 | 96 | # pyenv 97 | .python-version 98 | 99 | # pipenv 100 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 101 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 102 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 103 | # install all needed dependencies. 104 | #Pipfile.lock 105 | 106 | # celery beat schedule file 107 | celerybeat-schedule 108 | 109 | # SageMath parsed files 110 | *.sage.py 111 | 112 | # Environments 113 | .env 114 | .venv 115 | env/ 116 | venv/ 117 | ENV/ 118 | env.bak/ 119 | venv.bak/ 120 | 121 | # Spyder project settings 122 | .spyderproject 123 | .spyproject 124 | 125 | # Rope project settings 126 | .ropeproject 127 | 128 | # mkdocs documentation 129 | /site 130 | 131 | # mypy 132 | .mypy_cache/ 133 | .dmypy.json 134 | dmypy.json 135 | 136 | # Pyre type checker 137 | .pyre/ 138 | 139 | ### Python ### 140 | # Byte-compiled / optimized / DLL files 141 | 142 | # C extensions 143 | 144 | # Distribution / packaging 145 | 146 | # PyInstaller 147 | # Usually these files are written by a python script from a template 148 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 149 | 150 | # Installer logs 151 | 152 | # Unit test / coverage reports 153 | 154 | # Translations 155 | 156 | # Django stuff: 157 | 158 | # Flask stuff: 159 | 160 | # Scrapy stuff: 161 | 162 | # Sphinx documentation 163 | 164 | # PyBuilder 165 | 166 | # Jupyter Notebook 167 | 168 | # IPython 169 | 170 | # pyenv 171 | 172 | # pipenv 173 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 174 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 175 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 176 | # install all needed dependencies. 177 | 178 | # celery beat schedule file 179 | 180 | # SageMath parsed files 181 | 182 | # Environments 183 | 184 | # Spyder project settings 185 | 186 | # Rope project settings 187 | 188 | # mkdocs documentation 189 | 190 | # mypy 191 | 192 | # Pyre type checker 193 | 194 | ### Vim ### 195 | # Swap 196 | [._]*.s[a-v][a-z] 197 | [._]*.sw[a-p] 198 | [._]s[a-rt-v][a-z] 199 | [._]ss[a-gi-z] 200 | [._]sw[a-p] 201 | 202 | # Session 203 | Session.vim 204 | Sessionx.vim 205 | 206 | # Temporary 207 | .netrwhist 208 | *~ 209 | # Auto-generated tag files 210 | tags 211 | # Persistent undo 212 | [._]*.un~ 213 | 214 | # End of https://www.gitignore.io/api/vim,python,django 215 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "rtg_tracer"] 2 | path = rtg_tracer 3 | url = https://github.com/jeffdaily/rocm-timeline-generator 4 | [submodule "autocoplite"] 5 | path = autocoplite 6 | url = https://github.com/brieflynn/autocoplite.git 7 | [submodule "cpptrace"] 8 | path = cpptrace 9 | url = https://github.com/jeremy-rifkin/cpptrace.git 10 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All rights Reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PYTHON ?= python3 2 | 3 | .PHONY: 4 | all: cpptrace rpd rocpd remote 5 | 6 | .PHONY: install 7 | install: all 8 | $(MAKE) install -C rocpd_python 9 | $(MAKE) install -C rpd_tracer 10 | $(MAKE) install -C remote 11 | 12 | .PHONY: uninstall 13 | uninstall: 14 | $(MAKE) uninstall -C rocpd_python 15 | $(MAKE) uninstall -C rpd_tracer 16 | $(MAKE) uninstall -C remote 17 | 18 | .PHONY: clean 19 | clean: cpptrace-clean 20 | $(MAKE) clean -C rocpd_python 21 | $(MAKE) clean -C rpd_tracer 22 | $(MAKE) clean -C remote 23 | 24 | .PHONY: rpd 25 | rpd: 26 | $(MAKE) -C rpd_tracer 27 | .PHONY: rocpd 28 | rocpd: 29 | $(MAKE) -C rocpd_python 30 | .PHONY: remote 31 | remote: 32 | $(MAKE) -C remote 33 | .PHONY: cpptrace 34 | 35 | CPPTRACE_MAKE?= $(wildcard cpptrace/Makefile) 36 | ifneq ($(CPPTRACE_MAKE),) 37 | cpptrace: 38 | cd cpptrace; cmake -B build -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../cpptrace_install; cmake --build build; cmake --install build; cd ../cpptrace_install; if [ ! -d ./lib ]; then ln -s lib64 lib; fi 39 | cpptrace-clean: 40 | $(MAKE) clean -C cpptrace 41 | rm -r cpptrace_install 42 | else 43 | cpptrace: 44 | cpptrace-clean: 45 | endif 46 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Rocm Profile Data 2 | 3 | -------------------------------------------------------------------------------- 4 | 5 | ROCm Profile Data is a collection of tools for tracing and analyzing gpu related activity on a system. This is represented by a timeline of api calls, app log messages, async gpu operations, and related interactions/dependencies. 6 | 7 | 8 | Contents: 9 | 10 | 11 | - [About](#about) 12 | - [Installation](#installation) 13 | - [Quickstart](#quickstart) 14 | - [Tools](#tools) 15 | - [runTracer.sh](#runtracer.sh) 16 | - [sqlite3](#sqlite3) 17 | - [rpd2tracing.py](#rpd2tracing.py) 18 | 19 | 20 | 21 | 22 | ## About 23 | 24 | The ROCm Profiler Data or RPD ecosystem consists of the following: 25 | - Collection tools - Profilers capable of detecting and logging gpu activity 26 | - File format - A standard file format (based on sqlite3) for collection and analysis tools to work with 27 | - Analysis tools - Programs capable of interpreting the profile data in a meaningful way. These can be written in SQL, C/C++, python, etc. 28 | 29 | 30 | ## Installation 31 | 32 | RPD makes heavy use to SQLite(https://sqlite.org/index.html). You will want to install the runtime and dev packages before preceeding. E.g. 33 | ``` 34 | apt-get install sqlite3 libsqlite3-dev 35 | ``` 36 | 37 | Additional packages required 38 | ``` 39 | apt-get install libfmt-dev 40 | ``` 41 | 42 | There are many tools in the RPD repo. A baseline set can be built and installed via make: 43 | ``` 44 | make; make install 45 | ``` 46 | This will install python modules that are used to manipulate trace files. 47 | It will also build and install the native tracer, rpd_tracer. 48 | 49 | ## Quickstart 50 | 51 | + Install per the [Installation](#installation) section. 52 | + Try each of the [Tools](#tools) below, in order. 53 | + You can refer to our step-by-step [tutorial](examples/rocm-profile-data/README.md) in this repository to profile and trace your application. Additionally, we have provided a list of key tables and views defined in the generated `rpd` file for your reference. 54 | 55 | ## Tools 56 | 57 | #### Raptor 58 | Raptor is an RPD post-processing script (raptor/raptor.py), plus a parsing class to enable deeper analysis with Python Pandas. 59 | See the raptor [README](raptor/README.md) for more information. 60 | 61 | #### runTracer.sh 62 | RunTracer.sh is used to launch and trace a program. It is installed in the system path as part of 'make install'. It can trace any process and its subprocesses. 63 | ``` 64 | runTracer.sh python exampleWorkload.py 65 | ``` 66 | By default the profile will be written to "trace.rpd". 67 | 68 | #### sqlite3 69 | Quick inspection of trace data can be performed with the sqlite3 command line 70 | ``` 71 | sqlite3 trace.rpd 72 | 73 | sqlite> select count(*) from rocpd_api; 74 | 978621 75 | sqlite> select count(*) from rocpd_op; 76 | 111899 77 | sqlite> select * from top; 78 | Name TotalCalls TotalDuration Ave Percentage 79 | ---------------------------------------- ---------- ------------- ---------- ---------------- 80 | Cijk_Alik_Bljk_SB_MT64x128x16_SN_1LDSB0_ 3180 3670897 1154 33.1596545434822 81 | Cijk_Alik_Bljk_SB_MT64x128x16_MI32x32x2x 12720 1703806 133 15.3906835540479 82 | Cijk_Alik_Bljk_SB_MT128x128x16_MI32x32x1 3180 1471672 462 13.2937917318908 83 | void at::native::legacy::elementwise_ker 22525 1059802 47 9.57331814908329 84 | void at::native::legacy::elementwise_ker 13593 515243 37 4.65425092430873 85 | ... 86 | sqlite> .exit 87 | 88 | ``` 89 | 90 | #### rpd2tracing.py 91 | Trace data can be viewed in chrome://tracing for visualization. Rpd2tracing.py exports the trace contents as a json file suitable for chrome://tracing or perfetto. 92 | ``` 93 | python3 tools/rpd2tracing.py trace.rpd trace.json 94 | ``` 95 | ### Autocop submodule setup 96 | 97 | The autocoplite submodule contains a visualization toolkit compatible with ```trace.rpd``` files. To use the visualization capabilities of this submodule, from within the main rocmProfileData repository, cd into the autcoplite submodule directory and initialize the submodule: 98 | 99 | ```sh 100 | git submodule update --init --recursive 101 | ``` 102 | 103 | This command will initialize, fetch and checkout the submodule to the commit specified in the main repository. 104 | 105 | To update the submodule at any time and pull the latest changes, run: 106 | 107 | ```sh 108 | git submodule update --remote 109 | ``` 110 | You can use the provided Makefile to install the dependencies into a python virtual environment named ```aclite```. Simply run: 111 | 112 | ```sh 113 | make install 114 | ``` 115 | 116 | Alternatively, you can manually install the packages using **pip**: 117 | 118 | ```sh 119 | pip install -r requirements.txt 120 | ``` 121 | 122 | To uninstall the packages and remove the virtual environment, you can use the Makefile: 123 | 124 | ```sh 125 | make clean 126 | ``` 127 | 128 | or manually remove the virtual env: 129 | 130 | ```sh 131 | rm -rf aclite 132 | ``` 133 | 134 | Follow the README.md file within the autocoplite submodule for additional instructions and examples for how to run. 135 | 136 | ### cpptrace submodule setup 137 | 138 | The cpptrace submodule adds the ability to capture stacktraces for every HIP API invocation. The module needs to be initialized and updated for this: 139 | ```sh 140 | git submodule update --init --recursive 141 | ``` 142 | 143 | This command will initialize, fetch and checkout the submodule to the commit specified in the main repository. 144 | 145 | To update the submodule at any time and pull the latest changes, run: 146 | 147 | ```sh 148 | git submodule update --remote 149 | ``` 150 | 151 | `make` will subsequently build `cpptrace` and link `rpd_tracer` against it. Enabling stacktrace capture requires setting `RPDT_STACKFRAMES=1`. 152 | -------------------------------------------------------------------------------- /README_tools.txt: -------------------------------------------------------------------------------- 1 | Tools Hidden in Plain Sight 2 | --------------------------- 3 | 4 | Pytorch Autograd: 5 | Patch pytorch allowing autograd to write directly to rpt. 6 | 7 | HipMarker (poorly named): 8 | Insert user markers (instrumentation) into your python code. Logs to roctx markers and ranges. 9 | 10 | TopEx: 11 | SQL to output top kernel and api summary (exclusive times). Inclusive time is available via a view embedded by most rpt importers. 12 | example: 'sqlite3 myprofile.rpt < topEx.cmd' 13 | 14 | 15 | -------------------------------------------------------------------------------- /autograd/README.txt: -------------------------------------------------------------------------------- 1 | This patch allow the pytorch autograd profiler to log directly to the rpd format. 2 | 3 | PATCH 4 | ----- 5 | To apply the patch (to an installation or src respectively): 6 | cd /site-packages (e.g. ~/.local/lib/python3.6/site-packages/) 7 | or 8 | cd (e.g. ~/pytorch/) 9 | git apply profiler.patch 10 | 11 | 12 | USAGE 13 | ----- 14 | Once installed use the profiler like normal. There is an additional output option to output rpd. 15 | Example: 16 | 17 | with torch.autograd.profiler.profile(use_cuda=True) as prof: 18 | 19 | prof.export_rpd("tracefile.rpd") 20 | -------------------------------------------------------------------------------- /autograd/profiler.patch: -------------------------------------------------------------------------------- 1 | diff --git a/torch/autograd/profiler.py b/torch/autograd/profiler.py 2 | index ad16529951..ee56ff199f 100644 3 | --- a/torch/autograd/profiler.py 4 | +++ b/torch/autograd/profiler.py 5 | @@ -197,6 +197,77 @@ class EventList(list): 6 | with_flops=self._with_flops, 7 | top_level_events_only=top_level_events_only) 8 | 9 | + def export_rpd(self, path): 10 | + """Exports an EventList as a Chrome tracing tools file. 11 | + 12 | + Arguments: 13 | + path (str): Path where the trace will be written. 14 | + """ 15 | + print("Exporting rpt...") 16 | + import sqlite3 17 | + 18 | + # FIXME: remove old file if it exists 19 | + 20 | + connection = sqlite3.connect(path) 21 | + connection.execute('CREATE TABLE IF NOT EXISTS "rocpd_string" ("id" integer NOT NULL PRIMARY KEY, "string" varchar(4096) NOT NULL)') 22 | + connection.execute('CREATE TABLE IF NOT EXISTS "rocpd_op" ("id" integer NOT NULL PRIMARY KEY, "gpuId" integer NOT NULL, "queueId" integer NOT NULL, "sequenceId" integer NOT NULL, "completionSignal" varchar(18) NOT NULL, "start" integer NOT NULL, "end" integer NOT NULL, "description_id" integer NOT NULL REFERENCES "rocpd_string" ("id") DEFERRABLE INITIALLY DEFERRED, "opType_id" integer NOT NULL REFERENCES "rocpd_string" ("id") DEFERRABLE INITIALLY DEFERRED)') 23 | + connection.execute('CREATE TABLE IF NOT EXISTS "rocpd_api" ("id" integer NOT NULL PRIMARY KEY, "pid" integer NOT NULL, "tid" integer NOT NULL, "start" integer NOT NULL, "end" integer NOT NULL, "apiName_id" integer NOT NULL REFERENCES "rocpd_string" ("id") DEFERRABLE INITIALLY DEFERRED, "args_id" integer NOT NULL REFERENCES "rocpd_string" ("id") DEFERRABLE INITIALLY DEFERRED)') 24 | + connection.execute('CREATE TABLE IF NOT EXISTS "rocpd_api_ops" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "api_id" integer NOT NULL REFERENCES "rocpd_api" ("id") DEFERRABLE INITIALLY DEFERRED, "op_id" integer NOT NULL REFERENCES "rocpd_op" ("id") DEFERRABLE INITIALLY DEFERRED)') 25 | + 26 | + #Set up primary keys 27 | + string_id = 1 28 | + op_id = 1 29 | + api_id = 1 30 | + 31 | + # Dicts 32 | + strings = {} # string -> id 33 | + 34 | + # rows to bulk insert 35 | + string_inserts = [] 36 | + api_inserts = [] 37 | + op_inserts = [] 38 | + api_ops_inserts = [] 39 | + 40 | + #empty string 41 | + empty = string_id 42 | + strings[""] = string_id 43 | + string_inserts.append((string_id, "")) 44 | + string_id = string_id + 1 45 | + 46 | + for evt in self: 47 | + try: 48 | + name = strings[evt.name] 49 | + except: 50 | + strings[evt.name] = string_id 51 | + string_inserts.append((string_id, evt.name)) 52 | + name = string_id 53 | + string_id = string_id + 1 54 | + 55 | + api_inserts.append((api_id, 100, evt.thread, evt.cpu_interval.start * 1000, (evt.cpu_interval.start + evt.cpu_interval.elapsed_us()) * 1000, name, empty)) 56 | + 57 | + #--------------------- 58 | + for k in evt.kernels: 59 | + try: 60 | + kname = strings[k.name] 61 | + except: 62 | + strings[k.name] = string_id 63 | + string_inserts.append((string_id, k.name)) 64 | + kname = string_id 65 | + string_id = string_id + 1 66 | + 67 | + op_inserts.append((op_id, k.device, 0, k.interval.start * 1000, (k.interval.start + k.interval.elapsed_us()) * 1000, kname, empty)) 68 | + api_ops_inserts.append((api_id, op_id)) 69 | + op_id = op_id + 1 70 | + #--------------------- 71 | + api_id = api_id + 1 72 | + 73 | + connection.executemany("insert into rocpd_string(id, string) values (?,?)", string_inserts) 74 | + connection.executemany("insert into rocpd_api(id, pid, tid, start, end, apiName_id, args_id) values (?,?,?,?,?,?,?)", api_inserts) 75 | + connection.executemany("insert into rocpd_op(id, gpuId, queueId, sequenceId, completionSignal, start, end, description_id, opType_id) values (?,?,?,'','',?,?,?,?)", op_inserts) 76 | + connection.executemany("insert into rocpd_api_ops(api_id, op_id) values (?,?)", api_ops_inserts) 77 | + connection.commit() 78 | + connection.close() 79 | + 80 | def export_chrome_trace(self, path): 81 | """Exports an EventList as a Chrome tracing tools file. 82 | 83 | @@ -533,6 +604,11 @@ class profile(object): 84 | ) 85 | table.__doc__ = EventList.table.__doc__ 86 | 87 | + def export_rpd(self, path): 88 | + self._check_finish() 89 | + return self.function_events.export_rpd(path) 90 | + export_rpd.__doc__ = EventList.export_rpd.__doc__ 91 | + 92 | def export_chrome_trace(self, path): 93 | self._check_finish() 94 | if self.kineto_results is not None: 95 | -------------------------------------------------------------------------------- /docs/perf_playbooks/README.md: -------------------------------------------------------------------------------- 1 | # PerformancePlaybooks 2 | ML workload performance analysis documentation and scripts. 3 | 4 | Available currently: 5 | 6 | - Introduction to rpd profile analysis [RPD-101](rpd-101.md) and [its SQL commands](rpd-101.sql). 7 | - Documentation of rpd tables and their use in performance analysis [RPD tables](rpd-tables.md). 8 | - Capturing and analyzing GPU frequencies during workload execution in [frequency capture](freq-capture.md). 9 | - Extracting collectives and benchmarking them standlone in [collective tuning](collective-tuning.md). 10 | - Variability analysis to gauge performance impact in [variability-analysis](variability-analysis.md) and example SQL commands for [matched](variability-analysis.sql) and [unmatched](variability-analysis_nolaunch.sql) rpds. 11 | - Call stack analysis to analyze where HIP APIs were called from in [stackframe-analysis](stackframe-analysis.md). 12 | -------------------------------------------------------------------------------- /docs/perf_playbooks/rpd-101.sql: -------------------------------------------------------------------------------- 1 | .headers on 2 | .print '### Top-10 kernels' 3 | select * from top limit 10; 4 | 5 | .print '### Contribution of matrix multiplications' 6 | select sum(Percentage) from top where Name like 'Cijk%'; 7 | .print '### Contribution of elementwise kernels' 8 | select sum(Percentage) from top where Name like '%elementwise%'; 9 | .print '### Contribution of collective communication' 10 | select sum(Percentage) from top where Name like '%ccl%'; 11 | .print '### Contribution of reduction kernels' 12 | select sum(Percentage) from top where Name like '%reduce_kernel%'; 13 | 14 | .print '### Busy time on GPU' 15 | select * from busy; 16 | -------------------------------------------------------------------------------- /docs/perf_playbooks/variability-analysis.sql: -------------------------------------------------------------------------------- 1 | .header on 2 | .print "Registered GPUs and wall times" 3 | select * from busy; 4 | 5 | .print "Dropping old intermediate views if they exist" 6 | drop view kernels_gpu0; 7 | drop view kernels_gpu1; 8 | drop view kernels_gpu2; 9 | drop view kernels_gpu3; 10 | drop view kernels_gpu4; 11 | drop view kernels_gpu5; 12 | drop view kernels_gpu6; 13 | drop view kernels_gpu7; 14 | drop view kernels_allgpu; 15 | drop view kernel_minmaxavg; 16 | drop view kernel_totalvariability; 17 | 18 | .print "Types of kernel launches per GPU - ensure they are all the same! If not - project out types that mismatch" 19 | select apiname, gpuId, count(*) from api join rocpd_api_ops on api.id = rocpd_api_ops.api_id join rocpd_op on rocpd_api_ops.op_id=rocpd_op.id group by apiname, gpuId; 20 | 21 | .print "Creating GPU-resolved kernel views w/o collectives" 22 | create view kernels_gpu0 as select apiName, s2.string as kernelName, (o.end-o.start) as duration, o.gpuId, row_number() over () as numRow from rocpd_op o inner join rocpd_string s2 on s2.id = o.description_id inner join rocpd_string s3 on s3.id = o.opType_id inner join rocpd_api_ops rao on rao.op_id=o.id inner join api on api.id = rao.api_id where s3.string like 'KernelExecution' and s2.string not like '%ccl%' and o.gpuId = 0 order by o.start; 23 | create view kernels_gpu1 as select apiName, s2.string as kernelName, (o.end-o.start) as duration, o.gpuId, row_number() over () as numRow from rocpd_op o inner join rocpd_string s2 on s2.id = o.description_id inner join rocpd_string s3 on s3.id = o.opType_id inner join rocpd_api_ops rao on rao.op_id=o.id inner join api on api.id = rao.api_id where s3.string like 'KernelExecution' and s2.string not like '%ccl%' and o.gpuId = 1 order by o.start; 24 | create view kernels_gpu2 as select apiName, s2.string as kernelName, (o.end-o.start) as duration, o.gpuId, row_number() over () as numRow from rocpd_op o inner join rocpd_string s2 on s2.id = o.description_id inner join rocpd_string s3 on s3.id = o.opType_id inner join rocpd_api_ops rao on rao.op_id=o.id inner join api on api.id = rao.api_id where s3.string like 'KernelExecution' and s2.string not like '%ccl%' and o.gpuId = 2 order by o.start; 25 | create view kernels_gpu3 as select apiName, s2.string as kernelName, (o.end-o.start) as duration, o.gpuId, row_number() over () as numRow from rocpd_op o inner join rocpd_string s2 on s2.id = o.description_id inner join rocpd_string s3 on s3.id = o.opType_id inner join rocpd_api_ops rao on rao.op_id=o.id inner join api on api.id = rao.api_id where s3.string like 'KernelExecution' and s2.string not like '%ccl%' and o.gpuId = 3 order by o.start; 26 | create view kernels_gpu4 as select apiName, s2.string as kernelName, (o.end-o.start) as duration, o.gpuId, row_number() over () as numRow from rocpd_op o inner join rocpd_string s2 on s2.id = o.description_id inner join rocpd_string s3 on s3.id = o.opType_id inner join rocpd_api_ops rao on rao.op_id=o.id inner join api on api.id = rao.api_id where s3.string like 'KernelExecution' and s2.string not like '%ccl%' and o.gpuId = 4 order by o.start; 27 | create view kernels_gpu5 as select apiName, s2.string as kernelName, (o.end-o.start) as duration, o.gpuId, row_number() over () as numRow from rocpd_op o inner join rocpd_string s2 on s2.id = o.description_id inner join rocpd_string s3 on s3.id = o.opType_id inner join rocpd_api_ops rao on rao.op_id=o.id inner join api on api.id = rao.api_id where s3.string like 'KernelExecution' and s2.string not like '%ccl%' and o.gpuId = 5 order by o.start; 28 | create view kernels_gpu6 as select apiName, s2.string as kernelName, (o.end-o.start) as duration, o.gpuId, row_number() over () as numRow from rocpd_op o inner join rocpd_string s2 on s2.id = o.description_id inner join rocpd_string s3 on s3.id = o.opType_id inner join rocpd_api_ops rao on rao.op_id=o.id inner join api on api.id = rao.api_id where s3.string like 'KernelExecution' and s2.string not like '%ccl%' and o.gpuId = 6 order by o.start; 29 | create view kernels_gpu7 as select apiName, s2.string as kernelName, (o.end-o.start) as duration, o.gpuId, row_number() over () as numRow from rocpd_op o inner join rocpd_string s2 on s2.id = o.description_id inner join rocpd_string s3 on s3.id = o.opType_id inner join rocpd_api_ops rao on rao.op_id=o.id inner join api on api.id = rao.api_id where s3.string like 'KernelExecution' and s2.string not like '%ccl%' and o.gpuId = 7 order by o.start; 30 | 31 | .print "Number of kernels per GPU track - ensure they are all the same!" 32 | select count(*) as numKernelsGPU0 from kernels_gpu0; 33 | select count(*) as numKernelsGPU1 from kernels_gpu1; 34 | select count(*) as numKernelsGPU2 from kernels_gpu2; 35 | select count(*) as numKernelsGPU3 from kernels_gpu3; 36 | select count(*) as numKernelsGPU4 from kernels_gpu4; 37 | select count(*) as numKernelsGPU5 from kernels_gpu5; 38 | select count(*) as numKernelsGPU6 from kernels_gpu6; 39 | select count(*) as numKernelsGPU7 from kernels_gpu7; 40 | 41 | .print "Creating union of all non-collective kernels" 42 | create view kernels_allgpu as select * from kernels_gpu0 union all select * from kernels_gpu1 union all select * from kernels_gpu2 union all select * from kernels_gpu3 union all select * from kernels_gpu4 union all select * from kernels_gpu5 union all select * from kernels_gpu6 union all select * from kernels_gpu7 order by numRow; 43 | 44 | .print "Creating kernel-resolved statistics" 45 | create view kernel_minmaxavg as select distinct numRow, avg(duration) over (partition by numRow) as avgDuration, min(duration) over (partition by numRow) as minDuration, max(duration) over (partition by numRow) as maxDuration from kernels_allgpu; 46 | 47 | 48 | .print "Total variabilities in us" 49 | create view kernel_totalvariability as select sum(maxDuration-minDuration) as maxToMinVariability, sum(maxDuration-avgDuration) as maxToAvgVariability from kernel_minmaxavg; 50 | select * from kernel_totalvariability; 51 | 52 | select max(GpuTime) as maxGPUTime from busy; 53 | -------------------------------------------------------------------------------- /docs/perf_playbooks/variability-analysis_nolaunch.sql: -------------------------------------------------------------------------------- 1 | .header on 2 | .print "Registered GPUs and wall times" 3 | select * from busy; 4 | 5 | .print "Dropping old intermediate views if they exist" 6 | drop view kernels_gpu2; 7 | drop view kernels_gpu3; 8 | drop view kernels_gpu4; 9 | drop view kernels_gpu5; 10 | drop view kernels_gpu6; 11 | drop view kernels_gpu7; 12 | drop view kernels_gpu8; 13 | drop view kernels_gpu9; 14 | drop view kernels_allgpu; 15 | drop view kernel_minmaxavg; 16 | drop view kernel_totalvariability; 17 | 18 | .print "Types of kernel launches per GPU - ensure they are all the same! If not - project out types that mismatch" 19 | select apiname, gpuId, count(*) from api join rocpd_api_ops on api.id = rocpd_api_ops.api_id join rocpd_op on rocpd_api_ops.op_id=rocpd_op.id group by apiname, gpuId; 20 | 21 | .print "Creating GPU-resolved kernel views w/o collectives" 22 | create view kernels_gpu2 as select apiName, s2.string as kernelName, (o.end-o.start) as duration, o.gpuId, row_number() over () as numRow from rocpd_op o inner join rocpd_string s2 on s2.id = o.description_id inner join rocpd_string s3 on s3.id = o.opType_id inner join rocpd_api_ops rao on rao.op_id=o.id inner join api on api.id = rao.api_id where s3.string like 'KernelExecution' and s2.string not like '%ccl%' and apiName not like 'hipLaunchKernel' and o.gpuId = 2 order by o.start; 23 | create view kernels_gpu3 as select apiName, s2.string as kernelName, (o.end-o.start) as duration, o.gpuId, row_number() over () as numRow from rocpd_op o inner join rocpd_string s2 on s2.id = o.description_id inner join rocpd_string s3 on s3.id = o.opType_id inner join rocpd_api_ops rao on rao.op_id=o.id inner join api on api.id = rao.api_id where s3.string like 'KernelExecution' and s2.string not like '%ccl%' and apiName not like 'hipLaunchKernel' and o.gpuId = 3 order by o.start; 24 | create view kernels_gpu4 as select apiName, s2.string as kernelName, (o.end-o.start) as duration, o.gpuId, row_number() over () as numRow from rocpd_op o inner join rocpd_string s2 on s2.id = o.description_id inner join rocpd_string s3 on s3.id = o.opType_id inner join rocpd_api_ops rao on rao.op_id=o.id inner join api on api.id = rao.api_id where s3.string like 'KernelExecution' and s2.string not like '%ccl%' and apiName not like 'hipLaunchKernel' and o.gpuId = 4 order by o.start; 25 | create view kernels_gpu5 as select apiName, s2.string as kernelName, (o.end-o.start) as duration, o.gpuId, row_number() over () as numRow from rocpd_op o inner join rocpd_string s2 on s2.id = o.description_id inner join rocpd_string s3 on s3.id = o.opType_id inner join rocpd_api_ops rao on rao.op_id=o.id inner join api on api.id = rao.api_id where s3.string like 'KernelExecution' and s2.string not like '%ccl%' and apiName not like 'hipLaunchKernel' and o.gpuId = 5 order by o.start; 26 | create view kernels_gpu6 as select apiName, s2.string as kernelName, (o.end-o.start) as duration, o.gpuId, row_number() over () as numRow from rocpd_op o inner join rocpd_string s2 on s2.id = o.description_id inner join rocpd_string s3 on s3.id = o.opType_id inner join rocpd_api_ops rao on rao.op_id=o.id inner join api on api.id = rao.api_id where s3.string like 'KernelExecution' and s2.string not like '%ccl%' and apiName not like 'hipLaunchKernel' and o.gpuId = 6 order by o.start; 27 | create view kernels_gpu7 as select apiName, s2.string as kernelName, (o.end-o.start) as duration, o.gpuId, row_number() over () as numRow from rocpd_op o inner join rocpd_string s2 on s2.id = o.description_id inner join rocpd_string s3 on s3.id = o.opType_id inner join rocpd_api_ops rao on rao.op_id=o.id inner join api on api.id = rao.api_id where s3.string like 'KernelExecution' and s2.string not like '%ccl%' and apiName not like 'hipLaunchKernel' and o.gpuId = 7 order by o.start; 28 | create view kernels_gpu8 as select apiName, s2.string as kernelName, (o.end-o.start) as duration, o.gpuId, row_number() over () as numRow from rocpd_op o inner join rocpd_string s2 on s2.id = o.description_id inner join rocpd_string s3 on s3.id = o.opType_id inner join rocpd_api_ops rao on rao.op_id=o.id inner join api on api.id = rao.api_id where s3.string like 'KernelExecution' and s2.string not like '%ccl%' and apiName not like 'hipLaunchKernel' and o.gpuId = 8 order by o.start; 29 | create view kernels_gpu9 as select apiName, s2.string as kernelName, (o.end-o.start) as duration, o.gpuId, row_number() over () as numRow from rocpd_op o inner join rocpd_string s2 on s2.id = o.description_id inner join rocpd_string s3 on s3.id = o.opType_id inner join rocpd_api_ops rao on rao.op_id=o.id inner join api on api.id = rao.api_id where s3.string like 'KernelExecution' and s2.string not like '%ccl%' and apiName not like 'hipLaunchKernel' and o.gpuId = 9 order by o.start; 30 | 31 | 32 | .print "Number of kernels per GPU track - ensure they are all the same!" 33 | select count(*) as numKernelsGPU2 from kernels_gpu2; 34 | select count(*) as numKernelsGPU3 from kernels_gpu3; 35 | select count(*) as numKernelsGPU4 from kernels_gpu4; 36 | select count(*) as numKernelsGPU5 from kernels_gpu5; 37 | select count(*) as numKernelsGPU6 from kernels_gpu6; 38 | select count(*) as numKernelsGPU7 from kernels_gpu7; 39 | select count(*) as numKernelsGPU8 from kernels_gpu8; 40 | select count(*) as numKernelsGPU9 from kernels_gpu9; 41 | 42 | .print "Creating union of all non-collective kernels" 43 | create view kernels_allgpu as select * from kernels_gpu2 union all select * from kernels_gpu3 union all select * from kernels_gpu4 union all select * from kernels_gpu5 union all select * from kernels_gpu6 union all select * from kernels_gpu7 union all select * from kernels_gpu8 union all select * from kernels_gpu9 order by numRow; 44 | 45 | .print "Creating kernel-resolved statistics" 46 | create view kernel_minmaxavg as select distinct numRow, avg(duration) over (partition by numRow) as avgDuration, min(duration) over (partition by numRow) as minDuration, max(duration) over (partition by numRow) as maxDuration from kernels_allgpu; 47 | 48 | 49 | .print "Total variabilities in us" 50 | create view kernel_totalvariability as select sum(maxDuration-minDuration) as maxToMinVariability, sum(maxDuration-avgDuration) as maxToAvgVariability from kernel_minmaxavg; 51 | select * from kernel_totalvariability; 52 | 53 | select max(GpuTime) as maxGPUTime from busy; 54 | -------------------------------------------------------------------------------- /examples/rocm-profile-data/image/matmul_trace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rocmProfileData/92d13a08328625463e9ba944cece82fc5eea36e6/examples/rocm-profile-data/image/matmul_trace.png -------------------------------------------------------------------------------- /examples/rocm-profile-data/results/matmul_result.rpd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rocmProfileData/92d13a08328625463e9ba944cece82fc5eea36e6/examples/rocm-profile-data/results/matmul_result.rpd -------------------------------------------------------------------------------- /examples/rocm-profile-data/results/matmul_result_large.rpd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rocmProfileData/92d13a08328625463e9ba944cece82fc5eea36e6/examples/rocm-profile-data/results/matmul_result_large.rpd -------------------------------------------------------------------------------- /examples/rocm-profile-data/src/matrix_mult.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import torch 3 | 4 | def matmult_gpu(input_data, weights): 5 | """ 6 | Perform matrix multiplication of two tensors on GPU. 7 | 8 | Args: 9 | input_data (torch.Tensor): Input tensor. 10 | weights (torch.Tensor): Weight tensor. 11 | 12 | Returns: 13 | torch.Tensor: Result of matrix multiplication. 14 | """ 15 | # Creating tensors on GPU 16 | input_data = input_data.to('cuda') 17 | weights = weights.to('cuda') 18 | 19 | # Optimized matrix multiplication using torch.matmul 20 | output = torch.matmul(input_data, weights) 21 | 22 | return output 23 | 24 | if __name__ == "__main__": 25 | parser = argparse.ArgumentParser(description='Perform matrix multiplication of two tensors.') 26 | parser.add_argument('--x_shape', nargs=2, type=int, default=[1000, 500], metavar=('N', 'M'), help='Shape of input data matrix') 27 | parser.add_argument('--w_shape', nargs=2, type=int, default=[500, 500], metavar=('J', 'K'), help='Shape of weight matrix') 28 | args = parser.parse_args() 29 | 30 | input_data = torch.randn(*args.x_shape) 31 | weights = torch.randn(*args.w_shape) 32 | 33 | output = matmult_gpu(input_data, weights) 34 | print(f'Shape of input data matrix: {args.x_shape}, weight matrix: {args.w_shape}, result matrix:{output.shape}') 35 | print(output) 36 | -------------------------------------------------------------------------------- /examples/rocprofiler/update.patch: -------------------------------------------------------------------------------- 1 | 521,524c521,524 2 | < rpd_output=$(echo $csv_output | sed -n "/\.rpd/p") 3 | < if [ -n "$rpd_output" ] ; then 4 | < echo $OUTPUT_LIST 5 | < python3.6 -m rocpd.rocprofiler_import --input_dir $OUTPUT_LIST $rpd_output 6 | --- 7 | > if [ "$GEN_STATS" = "1" ] ; then 8 | > db_output=$(echo $csv_output | sed "s/\.csv/.db/") 9 | > merge_output $OUTPUT_LIST 10 | > python $BIN_DIR/tblextr.py $db_output $OUTPUT_LIST 11 | 526,532c526 12 | < if [ "$GEN_STATS" = "1" ] ; then 13 | < db_output=$(echo $csv_output | sed "s/\.csv/.db/") 14 | < merge_output $OUTPUT_LIST 15 | < python $BIN_DIR/tblextr.py $db_output $OUTPUT_LIST 16 | < else 17 | < python $BIN_DIR/tblextr.py $csv_output $OUTPUT_LIST 18 | < fi 19 | --- 20 | > python $BIN_DIR/tblextr.py $csv_output $OUTPUT_LIST 21 | -------------------------------------------------------------------------------- /examples/subclass/README.md: -------------------------------------------------------------------------------- 1 | Using Deserialization and Subclassing for instrumentation 2 | -------------------------------------------------------------------------------- /examples/subclass/generate_subclass.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import argparse 3 | import sqlite3 4 | from rocpd.importer import RocpdImportData 5 | from rocpd.deserialize import deserializeApis 6 | from rocpd.subclass import createSubclassTable 7 | 8 | if __name__ == "__main__": 9 | parser = argparse.ArgumentParser(description='Deserialize caching allocator block instrumentation messages and create a subclass table') 10 | parser.add_argument('input_rpd', type=str, help="input rpd db") 11 | args = parser.parse_args() 12 | 13 | 14 | connection = sqlite3.connect(args.input_rpd) 15 | importData = RocpdImportData() 16 | importData.resumeExisting(connection) # load the current db state 17 | 18 | roctxApis = ["UserMarker"] 19 | 20 | blockApis = [ 21 | 'BlockAlloc', 22 | 'BlockFreeDeallocate', 23 | 'BlockInsertEvents', 24 | 'ProcessEvents', 25 | 'BlockFreeDeactivate', 26 | ] 27 | 28 | argTypes = { 29 | 'size' : 'integer NOT NULL', 30 | 'block' : 'varchar(18) NOT NULL', 31 | 'stream' : 'integer NOT NULL', 32 | 'event' : 'varchar(18) NOT NULL', 33 | } 34 | 35 | print(f"Deserializing apis in: {str(roctxApis)[1:-1]}") 36 | deserializeApis(importData, roctxApis) 37 | print(f"Creating subclass table for 'block' apis: {blockApis}") 38 | createSubclassTable(importData, 'api', 'block', blockApis, argTypes) 39 | 40 | -------------------------------------------------------------------------------- /helpful_queries/README.txt: -------------------------------------------------------------------------------- 1 | This directory contains scripts and tools to assist with analyzing traces. 2 | Each tool will include usage and docs within the body. 3 | 4 | Sqlite3 scripts 5 | --------------- 6 | Files ending in .cmd are raw sql scripts. They can be executed simalar to: 7 | sqlite3 trace_file.rpd < script.cmd 8 | 9 | These scripts can do just about anything, including: 10 | - complex reports 11 | - adding views (which will persits in the .rpd) 12 | - trimming or modifying existing data 13 | - creating new tables to hold supplmental data 14 | 15 | Python scripts 16 | -------------- 17 | Some tasks are better suited for python, which can also directly modify the rpd files. 18 | Check the individual usage by running with no args. General format: 19 | python script.py 20 | -------------------------------------------------------------------------------- /helpful_queries/README_builtin_views.txt: -------------------------------------------------------------------------------- 1 | RPD files created by the default tools will include some in-built views (for free!) 2 | Views are stored queries. They can be accessed like a normal table but are just 3 | an alternate presentation. 4 | 5 | These views are useful when interacing with the db directly via the sqlite3 cli. 6 | To start try the following: 7 | 8 | $ sqlite3 trace.rpd 9 | 10 | sqlite> .mode column 11 | sqlite> .header on 12 | sqlite> .width 10 20 20 20 20 (to overide the default column width as you see fit) 13 | 14 | 15 | 16 | API and OP views 17 | ---------------- 18 | 19 | In the rpd schema strings are stored seperately from the api and ops. This makes it 20 | hard to browse those events directly. The 'api' and 'op' views join in the string table. 21 | 22 | sqlite> select * from api limit 1; 23 | id pid tid start end apiName args 24 | ----- -------- -------- --------------- --------------- --------------- --------------- 25 | 1 78436 78436 106107011171065 106107011184359 hipMalloc size=0x3d0900 26 | 27 | sqlite> select * from op limit 1; 28 | id gpuId queueId sequence start end description opType 29 | ----- -------- -------- -------- ------------ ------------ ------------ --------------- 30 | 1 0 0 0 106107047182 106107047229 CopyHostToDevice 31 | 32 | 33 | 34 | TOP View 35 | -------- 36 | 37 | This does an aggregation over all the ops and presents the ops that used the greatest gpu time. 38 | 39 | sqlite> select * from top; 40 | Name TotalCalls TotalDuration Ave Percentage 41 | ---------------------------- ---------- ------------- ---------- --------------- 42 | _Z13vector_squareIfEvPT_S1_m 499878 4719747 9 99.989909080028 43 | CopyHostToDevice 1 476 476 0.0100909199719 44 | 45 | 46 | 47 | KTOP View 48 | --------- 49 | 50 | Similar to TOP except it only includes kernel ops. This view requires the rocd_kernelop table 51 | be populated. Some collection tools may not do this. The output may be empty. 52 | 53 | 54 | 55 | BUSY View 56 | --------- 57 | 58 | This reports average gpu usage over the run. WARNING: this is only accurate on 'trimmed runs' 59 | where any warmup has been removed. Or possibly very long runs that amortize the warmpup. 60 | 61 | sqlite> select * from busy; 62 | gpuId GpuTime WallTime Busy 63 | ---------- ---------- ---------- ----------------- 64 | 0 4720223739 6453518297 0.731418665256478 65 | 66 | -------------------------------------------------------------------------------- /helpful_queries/binOpByDuration.cmd: -------------------------------------------------------------------------------- 1 | --# 2 | --# Divide ops into bins based on execution time 3 | --# This will let you divide short, medium, long running, etc 4 | --# 5 | --# Define your bins by altering the insert lines below 6 | --# 7 | --# Overlapping bins are valid. An item will be counted once for each 8 | --# bin it fits into. E.g. bins like: "< 10", "< 100", "< 1000" 9 | --# 10 | 11 | 12 | CREATE TEMPORARY TABLE bins ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "lower" integer NOT NULL, "upper" integer NOT NULL, "name" varchar(255) NOT NULL); 13 | insert into bins(lower, upper, name) values (0, 10000, "< 10us"); 14 | insert into bins(lower, upper, name) values (10000, 25000, "10-25 us"); 15 | insert into bins(lower, upper, name) values (25000, 100000, "25-100 us"); 16 | insert into bins(lower, upper, name) values (100000, 1000000, "100-1000 us"); 17 | insert into bins(lower, upper, name) values (1000000, 1000000000, "> 1ms"); 18 | 19 | select B.name as Bin, count(*) as Count from op A join bins B on (A.end-A.start) < B.upper and (A.end-A.start) >= B.lower group by B.id; 20 | -------------------------------------------------------------------------------- /helpful_queries/kernelKernelDelay.cmd: -------------------------------------------------------------------------------- 1 | --# 2 | --# Calculate delay between adjacently running kernels 3 | --# Only considers kernels when the gpu is busy (i.e. queueDepth > 0) 4 | --# 5 | --# 6 | --# 7 | --# 8 | --# 9 | --# 10 | 11 | 12 | --# Flesh out joined api_op entries 13 | create view if not exists api_op as select A.pid, A.tid, A.apiName, A.start as apiStart, A.end as apiEnd, B.gpuId, B.queueId, B.start as gpuStart, B.end as gpuEnd, kernelName from rocpd_api_ops Z join api A on A.id = Z.api_id join kernel B on B.id = Z.op_id order by A.start; 14 | 15 | --# View of queueDepth transitions (intermediate result, not useful in itself) 16 | create view if not exists api_op_transition as select *, "1" as offset, apiStart as time from api_op UNION ALL select *, "-1" as offset, gpuEnd as time from api_op order by time; 17 | 18 | 19 | --# View of kernel timing with queueDepth included 20 | create view if not exists kernel_timing as select gpuId, gpuStart, gpuEnd, sum(offset) over (partition by gpuId order by time) as queueDepth from api_op_transition where offset = "1"; 21 | 22 | --# View of duration and kernel delay from pervious kernel completion 23 | create view if not exists kernel_delay as select gpuId, gpuStart - LAG(gpuEnd) over (partition by gpuid order by gpuEnd) as kkDelay, gpuEnd - gpuStart as duration from kernel_timing where queueDepth > 1; 24 | 25 | --# Output the average kernel time and inter-kernel delays per gpu 26 | 27 | .mode column 28 | .header on 29 | 30 | select gpuId, avg(kkDelay) as avgDelay, avg(duration) as avgDuration, avg(kkDelay) * 100 / (avg(kkDelay) + avg(duration)) as percentGapLoss from kernel_delay group by gpuId; 31 | -------------------------------------------------------------------------------- /helpful_queries/kernelLaunchDelay.cmd: -------------------------------------------------------------------------------- 1 | --# 2 | --# Calculate delay between kernel enqueue and kernel start 3 | --# Only considers kernels enqueues on an idle gpu (i.e. queueDepth = 0) 4 | --# 5 | --# Some of the intermeditate views can be repurposed to display queueDepth at time of launch 6 | --# 7 | --# 8 | --# 9 | --# 10 | 11 | 12 | --# Flesh out joined api_op entries 13 | create view if not exists api_op as select A.pid, A.tid, A.apiName, A.start as apiStart, A.end as apiEnd, B.gpuId, B.queueId, B.start as gpuStart, B.end as gpuEnd, kernelName from rocpd_api_ops Z join api A on A.id = Z.api_id join kernel B on B.id = Z.op_id order by A.start; 14 | 15 | --# View of queueDepth transitions (intermediate result, not useful in itself) 16 | create view if not exists api_op_transition as select *, "1" as offset, apiStart as time from api_op UNION ALL select *, "-1" as offset, gpuEnd as time from api_op order by time; 17 | 18 | --# View of launch delays. Includes those with queueDepth, which are being delayed because of kernel in front of them 19 | create view if not exists launch_delay as select gpuId, apiName, kernelName, (gpuStart - apiEnd) as delay, sum(offset) over (partition by gpuId order by time) as queueDepth from api_op_transition where offset = "1"; 20 | 21 | --# Good to go. Lets output only delays from when the gpu is idle (i.e. queueDepth = 1) 22 | --# Group by gpu and kernel name. Is a certain gpu or kernel a problem spot? 23 | 24 | .mode column 25 | .header on 26 | 27 | select gpuId, apiName, kernelName, count(gpuId) as count, min(delay) as min, max(delay) as max, avg(delay) avg from launch_delay where queueDepth = 1 group by gpuId, kernelName; 28 | 29 | -------------------------------------------------------------------------------- /helpful_queries/topDiff.cmd: -------------------------------------------------------------------------------- 1 | --# 2 | --# topDiff - Compare two traces and list the kernels in descending order of % change in execution time from one trace to another 3 | --# This is useful to figure out which kernels were the most impacted in terms of relative performance from one trace to another 4 | --# It is also important to pay attention to the Percentage columns as they indicate how much of the total execution time was taken 5 | --# up by each kernel. So, an entry with a high value for Percentage *and* PctDiff should be one of the top contributors to a perf drop 6 | --# 7 | --# NOTE: You'll need to update the rpd filenames below 8 | 9 | ATTACH "" as rpdA; 10 | ATTACH "" as rpdB; 11 | 12 | SELECT *, (100.0*(B.Ave-A.Ave)/A.Ave) AS PctDiff FROM rpdA.top A JOIN rpdB.top B USING(Name) ORDER BY PctDiff DESC; 13 | -------------------------------------------------------------------------------- /helpful_queries/topEx.cmd: -------------------------------------------------------------------------------- 1 | --# 2 | --# TopEx - Output 'top' apis (host/CPU) and ops (GPU), exclusive. 3 | --# Time from nested calls is only attributed to the callee, not the caller. 4 | --# 5 | 6 | .mode column 7 | .header on 8 | .width 40 12 12 12 12 9 | 10 | --# Apis 11 | CREATE TABLE temp.seq ("id" integer NOT NULL PRIMARY KEY, "srcId" integer, "ts" integer NOT NULL, "trans_type" integer NOT NULL, "total" integer, "topmost" integer); 12 | INSERT INTO temp.seq(srcId, ts, trans_type) SELECT id, start AS ts, '1' FROM rocpd_api UNION ALL SELECT id, end, '-1' FROM rocpd_api ORDER BY ts ASC; 13 | UPDATE temp.seq SET total = 1 where id = 1; 14 | UPDATE temp.seq SET total = temp.seq.trans_type + (SELECT total FROM temp.seq AS A WHERE A.id = temp.seq.id - 1) WHERE id > 1; 15 | 16 | UPDATE temp.seq SET topmost = (SELECT srcId) WHERE trans_type=1; 17 | UPDATE temp.seq SET topmost = (SELECT topmost FROM temp.seq AS A WHERE A.total = temp.seq.total AND A.id < temp.seq.id ORDER BY id DESC LIMIT 1) WHERE trans_type=-1; 18 | 19 | CREATE VIEW temp.exclusive_api AS select A.id, C.pid, C.tid, A.ts as start, B.ts as end, C.apiName_id, C.args_id from temp.seq A join temp.seq B on B.id = a.id + 1 join rocpd_api C on C.id = A.topmost; 20 | 21 | --# raw ranges 22 | --# select A.topmost, A.ts as start, B.ts as end, B.ts - A.ts as Duration from temp.seq A join temp.seq B on B.id = a.id + 1 23 | --# select A.id, C.pid, C.tid, A.ts as start, B.ts as end, C.apiName_id, C.args_id from temp.seq A join temp.seq B on B.id = a.id + 1 join rocpd_api C on C.id = A.topmost; 24 | 25 | 26 | select A.string as ApiName, count(A.string) as TotalSections, sum(B.end - B.start) / 1000 as totalDuration, (sum(B.end-B.start)/count(A.string)) / 1000 as Ave, sum(B.end-B.start) * 100 / (select sum(end-start) from temp.exclusive_api) as Percentage from temp.exclusive_api B join rocpd_string A on A.id = B.apiName_id group by ApiName order by TotalDuration desc; 27 | 28 | --#Ops 29 | DROP TABLE temp.seq; 30 | CREATE TABLE temp.seq ("id" integer NOT NULL PRIMARY KEY, "srcId" integer, "ts" integer NOT NULL, "trans_type" integer NOT NULL, "total" integer, "topmost" integer); 31 | INSERT INTO temp.seq(srcId, ts, trans_type) SELECT id, start AS ts, '1' FROM rocpd_op UNION ALL SELECT id, end, '-1' FROM rocpd_op ORDER BY ts ASC; 32 | UPDATE temp.seq SET total = 1 where id = 1; 33 | UPDATE temp.seq SET total = temp.seq.trans_type + (SELECT total FROM temp.seq AS A WHERE A.id = temp.seq.id - 1) WHERE id > 1; 34 | 35 | UPDATE temp.seq SET topmost = (SELECT srcId) WHERE trans_type=1; 36 | UPDATE temp.seq SET topmost = (SELECT topmost FROM temp.seq AS A WHERE A.total = temp.seq.total AND A.id < temp.seq.id ORDER BY id DESC LIMIT 1) WHERE trans_type=-1; 37 | 38 | CREATE VIEW temp.exclusive_op AS select A.id, C.gpuId, C.queueId, C.sequenceId, C.completionSignal, A.ts as start, B.ts as end, C.description_id, C.opType_id from temp.seq A join temp.seq B on B.id = a.id + 1 join rocpd_op C on C.id = A.topmost; 39 | 40 | select A.string as KernelName, count(A.string) as TotalSections, sum(B.end - B.start) / 1000 as totalDuration, (sum(B.end-B.start)/count(A.string)) / 1000 as Ave, sum(B.end-B.start) * 100 / (select sum(end-start) from temp.exclusive_op) as Percentage from temp.exclusive_op B join rocpd_string A on A.id = B.description_id group by KernelName order by TotalDuration desc; 41 | 42 | 43 | -------------------------------------------------------------------------------- /helpful_queries/window.cmd: -------------------------------------------------------------------------------- 1 | --# 2 | --# Remove all rpd data that falls outside a desired window based on timestamps. 3 | --# This can be useful to: 4 | --# 1. create an rpd which doesn't have any warmup iterations, or contains data only within the desired time window, to perform further queries on 5 | --# 2. dump a json trace which doesn't have any warmup iteration data, or data outside the desired time window 6 | --# 7 | --# It might be convenient to insert roctx markers from the workload script to mark the start and end of the desired window. 8 | --# To do this, use the hipScopedMarker python package (more details in ../hipMarker/README.txt) as follows: 9 | --# Example to start window after 10 iterations and end window after 15 iterations: 10 | --# from hipScopedMarker import hipScopedMarker 11 | --# while iteration < args.train_iters: 12 | --# if iteration == 10: 13 | --# hipScopedMarker.emitMarker("start_profile") 14 | --# if iteration == 15: 15 | --# hipScopedMarker.emitMarker("end_profile") 16 | --# train(...); 17 | 18 | 19 | --# Apis 20 | DELETE FROM rocpd_api WHERE start < (SELECT start FROM api WHERE apiname = "UserMarker" and args = "start_profile"); 21 | DELETE FROM rocpd_api WHERE start > (SELECT end FROM api WHERE apiname = "UserMarker" and args = "end_profile"); 22 | 23 | --# Ops 24 | DELETE FROM rocpd_op WHERE start < (SELECT start FROM api WHERE apiname = "UserMarker" and args = "start_profile"); 25 | DELETE FROM rocpd_op WHERE start > (SELECT end FROM api WHERE apiname = "UserMarker" and args = "end_profile"); 26 | 27 | --# Cleanup 28 | VACUUM 29 | -------------------------------------------------------------------------------- /hipMarker/README.txt: -------------------------------------------------------------------------------- 1 | This is a module to add ranged and "impulse" user markers directly from python. 2 | 3 | Usage examples: 4 | 5 | from hipScopedMarker import hipScopedMarker 6 | 7 | hipScopedMarker.emitMarker("This is a single event with no duration") 8 | with hipScopedMarker("This is a range"): 9 | pass # this block's duration is logged 10 | 11 | Notes: 12 | The current rocprofiler backend only logs push and pop events. You have to assemble these calls into ranges if you are using the RocTX logs directly. The rocprof2rpd.py importer does this automatically. 13 | 14 | 15 | Installation: 16 | 17 | This requires python3.x. 18 | This incarnation requires rocm2.9+. 19 | 20 | CC=/opt/rocm/bin/hipcc python3.6 setup.py install 21 | -------------------------------------------------------------------------------- /hipMarker/hipMarkerModule.c: -------------------------------------------------------------------------------- 1 | #define PY_SSIZE_T_CLEAN 2 | #include 3 | 4 | #include "roctracer_ext.h" 5 | 6 | 7 | static PyObject *hipMarker_emitMarker(PyObject *self, PyObject *args) 8 | { 9 | const char *eventString = ""; 10 | if (PyArg_ParseTuple(args, "s", &eventString)) { 11 | //roctracer_add_user_event(eventString); 12 | roctracer_mark(eventString); 13 | printf("MARKER: %s\n", eventString); 14 | } 15 | Py_INCREF(Py_None); 16 | return Py_None; 17 | }; 18 | 19 | static PyMethodDef hipMarkerMethods[] = { 20 | {"emitMarker", hipMarker_emitMarker, METH_VARARGS, "Insert a hip user marker"} 21 | , {NULL, NULL, 0, NULL} 22 | }; 23 | 24 | static struct PyModuleDef hipMarkermodule = { 25 | PyModuleDef_HEAD_INIT, 26 | "hipMarker", 27 | NULL, 28 | -1, 29 | hipMarkerMethods 30 | }; 31 | 32 | PyMODINIT_FUNC PyInit_hipMarker(void) 33 | { 34 | return PyModule_Create(&hipMarkermodule); 35 | } 36 | 37 | -------------------------------------------------------------------------------- /hipMarker/hipScopedMarker.py: -------------------------------------------------------------------------------- 1 | import roctxMarker 2 | 3 | class hipScopedMarker: 4 | def __init__(self, description): 5 | self.description = description 6 | 7 | @staticmethod 8 | def emitMarker(description): 9 | roctxMarker.emitMarker(description) 10 | 11 | def __enter__(self): 12 | roctxMarker.pushMarker(f"{self.description}") 13 | def __exit__(self, etype, evalue, etraceback): 14 | roctxMarker.popMarker() 15 | def __del__(self): 16 | pass 17 | -------------------------------------------------------------------------------- /hipMarker/roctxMarkerModule.c: -------------------------------------------------------------------------------- 1 | #define PY_SSIZE_T_CLEAN 2 | #include 3 | 4 | #include "roctracer/roctx.h" 5 | 6 | 7 | static PyObject *roctxMarker_emitMarker(PyObject *self, PyObject *args) 8 | { 9 | const char *eventString = ""; 10 | if (PyArg_ParseTuple(args, "s", &eventString)) { 11 | roctxMarkA(eventString); 12 | //printf("EMIT: %s\n", eventString); 13 | } 14 | Py_INCREF(Py_None); 15 | return Py_None; 16 | }; 17 | 18 | static PyObject *roctxMarker_pushMarker(PyObject *self, PyObject *args) 19 | { 20 | const char *eventString = ""; 21 | if (PyArg_ParseTuple(args, "s", &eventString)) { 22 | roctxRangePushA(eventString); 23 | //printf("PUSH: %s\n", eventString); 24 | } 25 | Py_INCREF(Py_None); 26 | return Py_None; 27 | }; 28 | 29 | static PyObject *roctxMarker_popMarker(PyObject *self, PyObject *args) 30 | { 31 | roctxRangePop(); 32 | //printf("POP:\n"); 33 | Py_INCREF(Py_None); 34 | return Py_None; 35 | }; 36 | 37 | static PyMethodDef roctxMarkerMethods[] = { 38 | {"emitMarker", roctxMarker_emitMarker, METH_VARARGS, "Insert a roxtx marker"} 39 | , {"pushMarker", roctxMarker_pushMarker, METH_VARARGS, "Start a roxtx range"} 40 | , {"popMarker", roctxMarker_popMarker, METH_VARARGS, "End most recent roxtx range"} 41 | , {NULL, NULL, 0, NULL} 42 | }; 43 | 44 | static struct PyModuleDef roctxMarkermodule = { 45 | PyModuleDef_HEAD_INIT, 46 | "roctxMarker", 47 | NULL, 48 | -1, 49 | roctxMarkerMethods 50 | }; 51 | 52 | PyMODINIT_FUNC PyInit_roctxMarker(void) 53 | { 54 | return PyModule_Create(&roctxMarkermodule); 55 | } 56 | 57 | -------------------------------------------------------------------------------- /hipMarker/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup, Extension 2 | 3 | #module1 = Extension('hipMarker', 4 | # sources = ['hipMarkerModule.c'], 5 | # include_dirs=['/opt/rocm/rocprofiler/include', '/opt/rocm/roctracer/include'], 6 | # library_dirs=['/opt/rocm/rocprofiler/lib', '/opt/rocm/roctracer/lib'], 7 | # libraries=['roctracer64'] 8 | #) 9 | 10 | module2 = Extension('roctxMarker', 11 | sources = ['roctxMarkerModule.c'], 12 | include_dirs=['/opt/rocm/include'], 13 | library_dirs=['/opt/rocm/lib'], 14 | libraries=['roctx64'] 15 | ) 16 | 17 | setup (name = 'HipMarker', 18 | version = '1.0', 19 | description = 'User markers for hip', 20 | py_modules = ['hipScopedMarker'], 21 | #ext_modules = [module1, module2]) 22 | ext_modules = [module2]) 23 | -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | apt-get install -y sqlite3 libsqlite3-dev libfmt-dev 3 | apt-get install -y libzstd-dev 4 | 5 | make; make install 6 | -------------------------------------------------------------------------------- /manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Django's command-line utility for administrative tasks.""" 3 | import os 4 | import sys 5 | 6 | 7 | def main(): 8 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'rocmProfileData.settings') 9 | try: 10 | from django.core.management import execute_from_command_line 11 | except ImportError as exc: 12 | raise ImportError( 13 | "Couldn't import Django. Are you sure it's installed and " 14 | "available on your PYTHONPATH environment variable? Did you " 15 | "forget to activate a virtual environment?" 16 | ) from exc 17 | execute_from_command_line(sys.argv) 18 | 19 | 20 | if __name__ == '__main__': 21 | main() 22 | -------------------------------------------------------------------------------- /raptor/__init__.py: -------------------------------------------------------------------------------- 1 | print ("running __init__.py") 2 | import os 3 | import sys 4 | #sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "./") 5 | sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../") 6 | -------------------------------------------------------------------------------- /raptor/cookbook/cookbook_utils.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import shlex 4 | 5 | def setup(argv, script_name): 6 | """ 7 | Perform common setup for the cookbooks 8 | Designed for running ipython using one of these two syntaxes: 9 | 10 | 1. Args after double-dash: 11 | $ ipython -i COOKBOOK_SCRIPT -- [COOKBOOK_SCRIPT_ARGS] 12 | 13 | 2. Args from file: 14 | $ ipython -i COOKBOOK_SCRIPT 15 | (or when running interactive python without arguments) 16 | if no arguments are specified, read arguments from a "./args" file if it exists. 17 | """ 18 | 19 | pd.set_option('display.max_rows', 100) 20 | pd.options.display.max_colwidth = 40 21 | pd.set_option('display.float_format', '{:.1f}'.format) 22 | 23 | if len(argv) == 1: 24 | args_file = os.path.abspath("args") 25 | if os.path.exists(args_file): 26 | print("info: reading args from '" + args_file + "'") 27 | try: 28 | with open(args_file, 'r') as args_file: 29 | data = args_file.read() 30 | argv += shlex.split(data) 31 | except FileNotFoundError: 32 | pass 33 | 34 | 35 | print("info: args='" + " ".join(argv) + "'") 36 | 37 | return argv 38 | 39 | -------------------------------------------------------------------------------- /raptor/cookbook/debug.ipy: -------------------------------------------------------------------------------- 1 | """ 2 | Script for loading an RPD for interactive usage, useful for debugging script dev 3 | 4 | Usage: 5 | $ ipython cookbook/debug.ipy -i 6 | """ 7 | %load_ext autoreload 8 | %autoreload 2 9 | 10 | rpd_file = "tests/mytrace.rpd.gz" 11 | 12 | from importlib import reload 13 | import pandas as pd 14 | import numpy as np 15 | from raptor_parser import RaptorParser 16 | 17 | pd.set_option('display.max_rows', 100) 18 | pd.options.display.max_colwidth = 40 19 | pd.set_option('display.float_format', '{:.1f}'.format) 20 | 21 | print ("info: reading RPD=", rpd_file) 22 | 23 | raptor = RaptorParser(rpd_file) 24 | 25 | raptor.get_category_df() 26 | kernelseq_df = raptor.get_kernelseq_df() 27 | 28 | pretty_kernelseq_df = raptor.get_pretty_kernelseq_df(kernelseq_df) 29 | 30 | display(pretty_kernelseq_df) 31 | print() 32 | display(raptor.category_df) 33 | -------------------------------------------------------------------------------- /raptor/cookbook/get_idle.ipy: -------------------------------------------------------------------------------- 1 | # Simple script to pull the GPU_Idle stat for a specified set of RPD files 2 | # Could be easily modified to extract other stats. 3 | # Run with ipython3 cookbook/get_idle.ipy 4 | 5 | auto_roi=False 6 | 7 | import os 8 | from raptor_parser import RaptorParser 9 | 10 | # Add your directory paths here: 11 | rpd_dirs = ["traces/vllm_benchmark_latency_results/","traces/vllm_benchmark_throughput_results"] 12 | 13 | rpd_files = [] 14 | for dir in rpd_dirs: 15 | rpd_files += [os.path.join(dir,f) for f in os.listdir(dir) if f.endswith(".rpd")] 16 | 17 | for rpd_file in rpd_files: 18 | raptor = RaptorParser(rpd_file) 19 | if auto_roi: 20 | raptor.set_auto_roi() 21 | cat_df = raptor.get_category_df() 22 | print(rpd_file, ": %2.1f%%" % cat_df.loc["_GPU_Idle","Pct"]) 23 | -------------------------------------------------------------------------------- /raptor/cookbook/instance_trace.ipy: -------------------------------------------------------------------------------- 1 | """ 2 | Extract instances for a given kernel into 'instance_df' and draw a chart. 3 | Run from an interactive python with graphing capabilities (ie VSCode) 4 | """ 5 | #%% 6 | import sys 7 | import os 8 | sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../") 9 | 10 | import pandas as pd 11 | import numpy as np 12 | from raptor_parser import RaptorParser 13 | pd.options.display.max_colwidth = 40 14 | 15 | #%% 16 | rpd_file = "../tests/mytrace.rpd.gz" 17 | rpd_file = "../traces/mlperf_train/mlperf_llama_train.rpd" 18 | kernel_index=2 19 | 20 | #%% 21 | print ("info: reading RPD=", rpd_file) 22 | raptor = RaptorParser(rpd_file, zscore_threshold=-1) 23 | 24 | display(raptor.get_category_df()) 25 | 26 | 27 | # %% 28 | kernel_df = raptor.get_kernelseq_df().iloc[kernel_index] 29 | 30 | # %% 31 | def chart_instance(instance_df:pd.DataFrame, xlabel:str): 32 | 33 | instance_df = instance_df.reset_index() 34 | instance_df['iter_num'] = instance_df.index 35 | instance_df['Duration_us'] = instance_df['Duration_ns']/1000 36 | 37 | kname = kernel_df.name[0][:80] 38 | instance_df.plot.scatter(x='iter_num', y='Duration_us', 39 | title='Kernel#%d Duration\n%s\n'% (kernel_index, kname), 40 | ylabel='Duration_us', 41 | xlabel=xlabel, 42 | grid=True) 43 | 44 | # %% 45 | instance_df = raptor.get_instance_df_from_kernel_df(kernel_df, sort_by=None) 46 | chart_instance(instance_df, "Instance#") 47 | 48 | instance_df = raptor.get_instance_df_from_kernel_df(kernel_df, sort_by='Duration_ns') 49 | instance_df.sort_values(by='Duration_ns') 50 | chart_instance(instance_df, "Sorted Instance#") 51 | 52 | # %% 53 | instance_df -------------------------------------------------------------------------------- /raptor/cookbook/simple_load_rpd.ipy: -------------------------------------------------------------------------------- 1 | %load_ext autoreload 2 | %autoreload 2 3 | 4 | import sys 5 | import argparse 6 | from importlib import reload 7 | import pandas as pd 8 | import numpy as np 9 | from raptor_parser import RaptorParser 10 | import cookbook_utils 11 | 12 | sys.argv = cookbook_utils.setup(sys.argv, __file__) 13 | 14 | parser = argparse.ArgumentParser(prog=__file__) 15 | parser.add_argument("rpd_file_name") 16 | args=parser.parse_args() 17 | 18 | print ("info: reading RPD=", args.rpd_file_name) 19 | 20 | raptor = RaptorParser(args.rpd_file_name) 21 | 22 | raptor.get_category_df() 23 | kernelseq_df = raptor.get_kernelseq_df() 24 | 25 | pretty_kernelseq_df = raptor.get_pretty_kernelseq_df(kernelseq_df) 26 | 27 | display(pretty_kernelseq_df) 28 | print() 29 | display(raptor.category_df) 30 | print() 31 | display(raptor.get_variability_df()) 32 | 33 | -------------------------------------------------------------------------------- /raptor/cookbook/test_cookbook.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pathlib 3 | import pytest 4 | 5 | test_dir = os.path.dirname(os.path.realpath(__file__)) 6 | recipes = list(pathlib.Path(test_dir, '..', 'cookbook').resolve().glob('*.ipy')) 7 | 8 | @pytest.mark.parametrize('recipe', recipes, ids=[os.path.basename(r) for r in recipes]) 9 | def test_recipe(recipe): 10 | cmd = 'ipython ' + str(recipe) 11 | assert not os.system(cmd) 12 | 13 | -------------------------------------------------------------------------------- /raptor/raptor_cat_vllm.json: -------------------------------------------------------------------------------- 1 | { 2 | "GEMM" : ["^Cijk_", "^void wvSpltK"], 3 | "CopyD2D" : ["CopyDeviceToDevice"], 4 | "_Collective" : ["ncclDevKernel_Generic","void vllm::cross_device_reduce"], 5 | "aten" : ["^.* at::"], 6 | "vllm" : ["vllm::.*"], 7 | "topk" : ["^.*topk"], 8 | "DecodeAttn" : ["paged_attention*"], 9 | "PrefillAttn" : ["attn_fwd"], 10 | "SILU" : ["_ZN4vllm25scaled_act_and_mul_kernelIN3c104HalfETnPFT_RKS3_EXadL_ZNS_11silu_kernelIS2_EES3_S5_EEEEvPNS1_15Float8_e4m3fnuzEPS4_if"] 11 | } 12 | -------------------------------------------------------------------------------- /raptor/tests/mytrace.rpd.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rocmProfileData/92d13a08328625463e9ba944cece82fc5eea36e6/raptor/tests/mytrace.rpd.gz -------------------------------------------------------------------------------- /raptor/tests/mytrace.rpd.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rocmProfileData/92d13a08328625463e9ba944cece82fc5eea36e6/raptor/tests/mytrace.rpd.xlsx -------------------------------------------------------------------------------- /raptor/tests/test_multi_gpu.py: -------------------------------------------------------------------------------- 1 | # Tests for multi-GPU transition points and filtering 2 | 3 | import sys 4 | import os 5 | sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../") 6 | 7 | from raptor_parser import RaptorParser 8 | import copy 9 | import pandas as pd 10 | import numpy as np 11 | custom_op_df = pd.DataFrame({ 12 | 'start':[0,10,20, 2,6, 8], 13 | 'end' :[9,16,25, 5,20,9], 14 | 'gpuId':[0,0,0, 1,1,1], 15 | 'Kernel' : ['A', 'B', 'C', 'AA', 'B', 'C'] 16 | }) 17 | 18 | raptor=RaptorParser() 19 | raptor.set_op_df(custom_op_df, set_roi=True) 20 | 21 | def test_multi_gpu(): 22 | 23 | op_df = raptor.get_op_df() 24 | print(op_df) 25 | assert np.isnan(op_df.iloc[0].PreGap_ns) 26 | assert op_df.iloc[1].PreGap_ns == 1.0 27 | assert op_df.iloc[2].PreGap_ns == 4.0 28 | 29 | assert np.isnan(op_df.iloc[3].PreGap_ns) # should reset to NAN for first record in new GPU 30 | assert op_df.iloc[4].PreGap_ns == 1.0 31 | assert op_df.iloc[5].PreGap_ns == 0.0 32 | 33 | assert list(op_df['Duration_ns'].T) == [9,6,5, 3,14,1] 34 | assert list(op_df['sequenceId'].T) == [1,2,3, 1,2,3] 35 | 36 | # make sure we can print it, 37 | raptor.print_op_trace() 38 | 39 | def test_gpu_filter(): 40 | r = copy.deepcopy(raptor) 41 | 42 | assert r.sql_filter_str() == "where start>=0 and start<=25" 43 | r.set_gpu_id(1) 44 | assert r.sql_filter_str() == "where start>=0 and start<=25 and gpuId==1" 45 | 46 | def test_gpu_df(): 47 | gpu_df = raptor.get_gpu_ts_df() 48 | gpu_df = raptor.get_gpu_ts_df(duration_unit='ns') 49 | print(gpu_df) 50 | assert gpu_df['Idle_pct'].iloc[0] == 20 51 | -------------------------------------------------------------------------------- /raptor/tests/test_mytrace.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../") 4 | # Read an actual RPD file and call all the key functions (top, categories, op_trace, etc): 5 | 6 | import pytest 7 | from raptor_parser import RaptorParser 8 | import pandas as pd 9 | import numpy as np 10 | 11 | (test_path, test_file) = os.path.split(__file__) 12 | rpd_file = os.path.join(test_path, "mytrace.rpd.gz") 13 | raptor = RaptorParser(rpd_file) 14 | 15 | def test_print_op_trace(): 16 | print(raptor.get_op_df()) 17 | raptor.print_op_trace(max_ops=50) 18 | 19 | def test_kernelseq_df(): 20 | print(raptor.get_kernelseq_df()) 21 | 22 | def test_pretty_kernelseq_df(): 23 | print(raptor.get_pretty_kernelseq_df()) 24 | 25 | def test_cat_df(): 26 | print(raptor.get_category_df()) 27 | -------------------------------------------------------------------------------- /raptor/tests/test_raptor_script.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | raptor = "./raptor.py " 4 | 5 | (test_path, test_file) = os.path.split(__file__) 6 | rpd_file = os.path.join(test_path, "mytrace.rpd.gz") 7 | 8 | def test_help(): 9 | assert not os.system(raptor + " --help") 10 | 11 | def test_trace(): 12 | assert not os.system(raptor + rpd_file + " -c -k") 13 | 14 | def test_zscore(): 15 | assert not os.system(raptor + rpd_file + " -ck -z 3") 16 | 17 | def test_instance(): 18 | assert not os.system(raptor + rpd_file + " -i 0 -z 3 --op-trace-cmd-width=60") 19 | -------------------------------------------------------------------------------- /raptor/tests/test_roi.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../") 4 | 5 | import pytest 6 | from raptor_parser import RaptorParser 7 | import pandas as pd 8 | import numpy as np 9 | 10 | (test_path, test_file) = os.path.split(__file__) 11 | rpd_file = os.path.join(test_path, "mytrace.rpd.gz") 12 | 13 | def test_make_roi_plus(): 14 | raptor = RaptorParser(rpd_file, roi_start="+10ms", roi_end="+16.7ms") 15 | raptor.print_timestamps() 16 | assert raptor.roi_start_ns == 10*1e6 17 | assert raptor.roi_end_ns == 16.7*1e6 18 | 19 | def test_make_roi_minus(): 20 | raptor = RaptorParser(rpd_file, roi_start="-8ms", roi_end="-5ms") 21 | raptor.print_timestamps() 22 | assert raptor.roi_start_ns == 490594451 23 | assert raptor.roi_end_ns == 493594451 24 | 25 | def test_make_roi_pct(): 26 | raptor = RaptorParser(rpd_file, roi_start="50%", roi_end="70%") 27 | raptor.print_timestamps() 28 | assert raptor.roi_start_ns == 249297225 29 | assert raptor.roi_end_ns == 349016115 30 | 31 | def test_make_roi_from_kernel(): 32 | raptor = RaptorParser(rpd_file) 33 | raptor.print_timestamps() 34 | assert raptor.roi_start_ns == 0 35 | raptor.set_roi_from_str(roi_start="Cijk_") 36 | print("\nAfter setting ROI to kernel name:") 37 | raptor.print_timestamps() 38 | assert raptor.roi_start_ns == 10496683 39 | 40 | def test_make_roi_from_bad_kernel(): 41 | raptor = RaptorParser(rpd_file) 42 | with pytest.raises(RuntimeError): 43 | raptor.set_roi_from_str(roi_start="ZZZ_") 44 | 45 | def test_empty_roi(): 46 | """ Ensure code can handle ranges with no ops """ 47 | raptor = RaptorParser(rpd_file, roi_start="97%", roi_end="98%") 48 | raptor.print_timestamps() 49 | print(raptor.get_op_df()) 50 | print(raptor.get_kernelseq_df()) 51 | print(raptor.get_category_df()) 52 | print(raptor.get_variability_df()) 53 | -------------------------------------------------------------------------------- /raptor/tests/test_utils.py: -------------------------------------------------------------------------------- 1 | # Test misc functions and utils 2 | 3 | import sys 4 | import os 5 | sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../") 6 | 7 | from raptor_parser import RaptorParser 8 | import numpy as np 9 | 10 | 11 | def test_gaps_label(): 12 | assert RaptorParser._make_gaps_labels( 13 | (0,np.inf)) == ["GAP >0us"] 14 | assert RaptorParser._make_gaps_labels( 15 | (0,20,np.inf)) == ["GAP <=20us", "GAP >20us"] 16 | assert RaptorParser._make_gaps_labels( 17 | (0,20,100,np.inf)) == ["GAP <=20us", 18 | "GAP 20us-100us", 19 | "GAP >100us"] 20 | -------------------------------------------------------------------------------- /raptor/tests/test_variability.py: -------------------------------------------------------------------------------- 1 | # Tests for variability calcs and outliers (zscore) 2 | import sys 3 | import os 4 | sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../") 5 | 6 | from raptor_parser import RaptorParser 7 | import copy 8 | import pandas as pd 9 | import numpy as np 10 | 11 | def test_calc_zscore(): 12 | """ Compute group-wise zscore for kernels with the same name """ 13 | mock_var_df = pd.DataFrame({ 14 | 'start': [ 0, 1000, 2000, 4000, 5000, 8000, 10000, 20000, 25000] , 15 | 'end': [ 900, 2500, 2900, 5700, 6000, 9600, 17777, 22000, 29000] , 16 | 'gpuId': [1,1,1, 1,1,1, 1, 1,1 ], 17 | 'Kernel' : ['A', 'B', 'A', 'B', 'A', 'B', 'C', 'D', 'D'] 18 | }) 19 | var_raptor=RaptorParser(prekernel_seq=0, zscore_threshold=RaptorParser.default_zscore) 20 | var_raptor.set_op_df(copy.deepcopy(mock_var_df), set_roi=True) 21 | 22 | import math 23 | print(var_raptor.get_kernelseq_df()) 24 | print ("op_df with Duration_zscore") 25 | op_df = var_raptor.get_op_df() 26 | print (op_df) 27 | 28 | dur_zscore = op_df['Duration_zscore'] 29 | assert math.isclose(dur_zscore[1], -0.707, rel_tol=.001) 30 | assert math.isclose(dur_zscore[2], -1.225, rel_tol=.001) 31 | assert math.isclose(dur_zscore[3], -0.707, rel_tol=.001) 32 | assert math.isclose(dur_zscore[4], 1.225, rel_tol=.001) 33 | assert math.isclose(dur_zscore[5], 1.414, rel_tol=.001) 34 | assert math.isclose(dur_zscore[6], 0, rel_tol=.001) 35 | 36 | assert math.isclose(dur_zscore[7], 0, rel_tol=.001) # 'C" - only one kernel 37 | 38 | assert math.isclose(dur_zscore[8], -1, rel_tol=.001) # 'D' - two kernels 39 | assert math.isclose(dur_zscore[9], +1, rel_tol=.001) # 'D' - two kernels 40 | 41 | def test_find_outliers(): 42 | """ Flag outlier where zscore exceeds the specified threshold """ 43 | 44 | # Create data-set where the 11th item is a black sheep outlier and make sure we can detect it 45 | mock_var_df = pd.DataFrame({ 46 | 'start': [0]*11, 47 | 'end': [10000]*10 + [10], 48 | 'gpuId': [1]*11, 49 | 'Kernel' : ['AAA']*11 50 | }) 51 | var_raptor=RaptorParser(prekernel_seq=0, zscore_threshold=3) 52 | var_raptor.set_op_df(copy.deepcopy(mock_var_df), set_roi=True) 53 | print(var_raptor.get_kernelseq_df()) 54 | print ("op_df with Duration_zscore") 55 | op_df = var_raptor.get_op_df() 56 | print (op_df) 57 | for i in range(10): 58 | assert op_df.iloc[i].Outlier == False 59 | assert op_df.iloc[10].Outlier 60 | -------------------------------------------------------------------------------- /remote/Makefile: -------------------------------------------------------------------------------- 1 | 2 | PREFIX = /usr/local 3 | 4 | RPD_INCLUDES = 5 | RPDREMOTE_SRCS = Remote.cpp 6 | 7 | 8 | RPDREMOTE_OBJS = $(RPDREMOTE_SRCS:.cpp=.o) 9 | 10 | 11 | RPDREMOTE_MAIN = librpd_remote.so 12 | 13 | 14 | all: | $(RPDREMOTE_MAIN) 15 | 16 | .PHONY: all 17 | 18 | $(RPDREMOTE_MAIN): $(RPDREMOTE_OBJS) 19 | $(CXX) -o $@ $^ -shared -rdynamic -std=c++11 -g 20 | 21 | .cpp.o: 22 | $(CXX) -o $@ -c $< $(RPD_INCLUDES) -std=c++11 -fPIC -g -O3 23 | 24 | 25 | .PHONY: install 26 | install: all 27 | cp rpdRemote.sh $(PREFIX)/bin/rpdRemote 28 | cp $(RPDREMOTE_MAIN) $(PREFIX)/lib/ 29 | ldconfig 30 | 31 | .PHONY: uninstall 32 | uninstall: 33 | rm $(PREFIX)/lib/$(RPDREMOTE_MAIN) 34 | 35 | .PHONY: clean 36 | clean: 37 | rm -f *.o *.so 38 | -------------------------------------------------------------------------------- /remote/Remote.cpp: -------------------------------------------------------------------------------- 1 | /********************************************************************************* 2 | * Copyright (c) 2021 - 2024 Advanced Micro Devices, Inc. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | ********************************************************************************/ 22 | #include 23 | #include 24 | #include 25 | 26 | static void remoteInit() __attribute__((constructor)); 27 | 28 | namespace { 29 | bool init = false; 30 | int refCount = 0; 31 | void (*dl) = nullptr; 32 | }; 33 | 34 | void startTracing(int sig); 35 | void stopTracing(int sig); 36 | 37 | void remoteInit() 38 | { 39 | fprintf(stderr, "rpdRemote: init()\n"); 40 | signal(SIGUSR1, startTracing); 41 | signal(SIGUSR2, stopTracing); 42 | } 43 | 44 | void startTracing(int sig) 45 | { 46 | signal(SIGUSR1, startTracing); 47 | if (refCount > 0) 48 | return; 49 | if (dl == nullptr) { 50 | dl = dlopen("librpd_tracer.so", RTLD_LAZY); 51 | } 52 | if (dl) { 53 | void (*start_func) (void) = reinterpret_cast(dlsym(dl, "rpdstart")); 54 | if (start_func) { 55 | fprintf(stderr, "rpdRemote: tracing started\n"); 56 | start_func(); 57 | } 58 | else { 59 | fprintf(stderr, "rpdRemote: tracing failed\n"); 60 | } 61 | } 62 | ++refCount; 63 | } 64 | 65 | void stopTracing(int sig) 66 | { 67 | signal(SIGUSR2, stopTracing); 68 | if (refCount > 1) 69 | return; 70 | if (dl) { 71 | void (*stop_func) (void) = reinterpret_cast(dlsym(dl, "rpdstop")); 72 | if (stop_func) { 73 | fprintf(stderr, "rpdRemote: tracing stopped\n"); 74 | stop_func(); 75 | } 76 | void (*flush_func) (void) = reinterpret_cast(dlsym(dl, "rpdflush")); 77 | if (flush_func) { 78 | fprintf(stderr, "rpdRemote: trace flushed\n"); 79 | flush_func(); 80 | } 81 | // FIXME unloading is tricky, so don't 82 | #if 0 83 | int ret = dlclose(dl); 84 | if (ret == 0) 85 | dl = nullptr; 86 | #endif 87 | --refCount; 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /remote/rpdRemote.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ################################################################################ 3 | # Copyright (c) 2021 - 2024 Advanced Micro Devices, Inc. All rights reserved. 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | ################################################################################ 23 | 24 | usage() 25 | { 26 | echo "Usage: $0 " 27 | exit 1 28 | } 29 | 30 | if [ $# -ne 2 ] ; then 31 | usage 32 | fi 33 | 34 | if [ "$1" = "start" ] ; then 35 | kill -s SIGUSR1 $2 36 | elif [ "$1" = "stop" ] ; then 37 | kill -s SIGUSR2 $2 38 | else 39 | usage 40 | fi 41 | -------------------------------------------------------------------------------- /rocmProfileData/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rocmProfileData/92d13a08328625463e9ba944cece82fc5eea36e6/rocmProfileData/__init__.py -------------------------------------------------------------------------------- /rocmProfileData/settings.py: -------------------------------------------------------------------------------- 1 | """ 2 | Django settings for rocmProfileData project. 3 | 4 | Generated by 'django-admin startproject' using Django 2.2.4. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/2.2/topics/settings/ 8 | 9 | For the full list of settings and their values, see 10 | https://docs.djangoproject.com/en/2.2/ref/settings/ 11 | """ 12 | 13 | import os 14 | 15 | # Build paths inside the project like this: os.path.join(BASE_DIR, ...) 16 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 17 | 18 | 19 | # Quick-start development settings - unsuitable for production 20 | # See https://docs.djangoproject.com/en/2.2/howto/deployment/checklist/ 21 | 22 | # SECURITY WARNING: keep the secret key used in production secret! 23 | # Recreate: 24 | # from django.core.management.utils import get_random_secret_key; get_random_secret_key() 25 | SECRET_KEY = '' 26 | 27 | # SECURITY WARNING: don't run with debug turned on in production! 28 | DEBUG = True 29 | 30 | ALLOWED_HOSTS = ['172.31.8.245'] 31 | 32 | 33 | # Application definition 34 | 35 | INSTALLED_APPS = [ 36 | 'rocpd.apps.RocpdConfig', 37 | 'django.contrib.admin', 38 | 'django.contrib.auth', 39 | 'django.contrib.contenttypes', 40 | 'django.contrib.sessions', 41 | 'django.contrib.messages', 42 | 'django.contrib.staticfiles', 43 | ] 44 | 45 | MIDDLEWARE = [ 46 | 'django.middleware.security.SecurityMiddleware', 47 | 'django.contrib.sessions.middleware.SessionMiddleware', 48 | 'django.middleware.common.CommonMiddleware', 49 | 'django.middleware.csrf.CsrfViewMiddleware', 50 | 'django.contrib.auth.middleware.AuthenticationMiddleware', 51 | 'django.contrib.messages.middleware.MessageMiddleware', 52 | 'django.middleware.clickjacking.XFrameOptionsMiddleware', 53 | ] 54 | 55 | ROOT_URLCONF = 'rocmProfileData.urls' 56 | 57 | TEMPLATES = [ 58 | { 59 | 'BACKEND': 'django.template.backends.django.DjangoTemplates', 60 | 'DIRS': [], 61 | 'APP_DIRS': True, 62 | 'OPTIONS': { 63 | 'context_processors': [ 64 | 'django.template.context_processors.debug', 65 | 'django.template.context_processors.request', 66 | 'django.contrib.auth.context_processors.auth', 67 | 'django.contrib.messages.context_processors.messages', 68 | ], 69 | }, 70 | }, 71 | ] 72 | 73 | WSGI_APPLICATION = 'rocmProfileData.wsgi.application' 74 | 75 | 76 | # Database 77 | # https://docs.djangoproject.com/en/2.2/ref/settings/#databases 78 | 79 | DATABASES = { 80 | 'default': { 81 | 'ENGINE': 'django.db.backends.sqlite3', 82 | 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'), 83 | } 84 | } 85 | 86 | 87 | # Password validation 88 | # https://docs.djangoproject.com/en/2.2/ref/settings/#auth-password-validators 89 | 90 | AUTH_PASSWORD_VALIDATORS = [ 91 | { 92 | 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', 93 | }, 94 | { 95 | 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', 96 | }, 97 | { 98 | 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', 99 | }, 100 | { 101 | 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', 102 | }, 103 | ] 104 | 105 | 106 | # Internationalization 107 | # https://docs.djangoproject.com/en/2.2/topics/i18n/ 108 | 109 | LANGUAGE_CODE = 'en-us' 110 | 111 | TIME_ZONE = 'UTC' 112 | 113 | USE_I18N = True 114 | 115 | USE_L10N = True 116 | 117 | USE_TZ = True 118 | 119 | 120 | # Static files (CSS, JavaScript, Images) 121 | # https://docs.djangoproject.com/en/2.2/howto/static-files/ 122 | 123 | STATIC_URL = '/static/' 124 | -------------------------------------------------------------------------------- /rocmProfileData/urls.py: -------------------------------------------------------------------------------- 1 | """rocmProfileData URL Configuration 2 | 3 | The `urlpatterns` list routes URLs to views. For more information please see: 4 | https://docs.djangoproject.com/en/2.2/topics/http/urls/ 5 | Examples: 6 | Function views 7 | 1. Add an import: from my_app import views 8 | 2. Add a URL to urlpatterns: path('', views.home, name='home') 9 | Class-based views 10 | 1. Add an import: from other_app.views import Home 11 | 2. Add a URL to urlpatterns: path('', Home.as_view(), name='home') 12 | Including another URLconf 13 | 1. Import the include() function: from django.urls import include, path 14 | 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) 15 | """ 16 | from django.contrib import admin 17 | from django.urls import include, path 18 | 19 | urlpatterns = [ 20 | path('rocpd/', include('rocpd.urls')), 21 | path('admin/', admin.site.urls), 22 | ] 23 | -------------------------------------------------------------------------------- /rocmProfileData/wsgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | WSGI config for rocmProfileData project. 3 | 4 | It exposes the WSGI callable as a module-level variable named ``application``. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/2.2/howto/deployment/wsgi/ 8 | """ 9 | 10 | import os 11 | 12 | from django.core.wsgi import get_wsgi_application 13 | 14 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'rocmProfileData.settings') 15 | 16 | application = get_wsgi_application() 17 | -------------------------------------------------------------------------------- /rocpd/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rocmProfileData/92d13a08328625463e9ba944cece82fc5eea36e6/rocpd/__init__.py -------------------------------------------------------------------------------- /rocpd/admin.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | 3 | # Register your models here. 4 | -------------------------------------------------------------------------------- /rocpd/apps.py: -------------------------------------------------------------------------------- 1 | from django.apps import AppConfig 2 | 3 | 4 | class RocpdConfig(AppConfig): 5 | name = 'rocpd' 6 | -------------------------------------------------------------------------------- /rocpd/management/commands/importRocprof.py: -------------------------------------------------------------------------------- 1 | from django.core.management.base import BaseCommand, CommandError 2 | from django.db import transaction 3 | from rocpd.models import Version, String, Op, Api 4 | import re 5 | 6 | class Command(BaseCommand): 7 | help = 'Import data from an RPT text file' 8 | 9 | def add_arguments(self, parser): 10 | parser.add_argument('--ops_input_file', type=str, help="hcc_ops_trace.txt from rocprofiler") 11 | parser.add_argument('--api_input_file', type=str, help="hip_api_trace.txt from rocprofiler") 12 | parser.add_argument('--preview', action='store_true', help='Preview import only') 13 | 14 | def handle(self, *args, **options): 15 | with transaction.atomic(): 16 | apiKeys = {} 17 | if options['api_input_file']: 18 | print(f"Importing hip api calls from {options['api_input_file']}") 19 | 20 | exp = re.compile("^(\d*):(\d*)\s+(\d*):(\d*)\s+(\w+)\((.*)\).*$") 21 | infile = open(options['api_input_file'], 'r', encoding="utf-8") 22 | count = 0 23 | for line in infile: 24 | m = exp.match(line) 25 | if m: 26 | entry = Api(pid = m.group(3) \ 27 | ,tid = m.group(4) \ 28 | ,start = m.group(1) \ 29 | ,end = m.group(2) \ 30 | ) 31 | apiName = None 32 | apiArgs = None 33 | try: 34 | apiName = String.objects.get(string=m.group(5)) 35 | except: 36 | apiName = String(string=m.group(5)) 37 | apiName.save() 38 | try: 39 | apiArgs = String.objects.get(string=m.group(6)) 40 | except: 41 | apiArgs = String(string=m.group(6)) 42 | apiArgs.save() 43 | entry.apiName = apiName 44 | entry.args = apiArgs 45 | entry.save() 46 | apiKeys[count+1]=entry.id 47 | 48 | count = count + 1 49 | if count % 100 == 99: 50 | self.stdout.write(f"{count+1}") 51 | #if count > 3000: break 52 | infile.close() 53 | 54 | if options['ops_input_file']: 55 | print(f"Importing hcc ops from {options['ops_input_file']}") 56 | 57 | exp = re.compile("^(\d*):(\d*)\s+(\d*):(\d*)\s+(\w+):(\d*).*$") 58 | infile = open(options['ops_input_file'], 'r', encoding="utf-8") 59 | count = 0 60 | for line in infile: 61 | m = exp.match(line) 62 | if m: 63 | entry=Op(gpuId = m.group(3) \ 64 | ,queueId = m.group(4) \ 65 | ,start = m.group(1) \ 66 | ,end = m.group(2) \ 67 | ) 68 | opName = None 69 | opDescription = None 70 | try: 71 | opName = String.objects.get(string=m.group(5)) 72 | except: 73 | opName = String(string=m.group(5)) 74 | opName.save() 75 | try: 76 | opDescription = String.objects.get(string="") 77 | except: 78 | opDescription = String(string="") 79 | opDescription.save() 80 | 81 | entry.opType = opName 82 | entry.description = opDescription 83 | entry.save() 84 | 85 | # Look up and link the related API call 86 | try: 87 | #print(f"{entry.opType} {int(m.group(6))} -> {apiKeys[int(m.group(6))]}") 88 | apiEntry = Api.objects.get(pk=apiKeys[int(m.group(6))]) 89 | apiEntry.ops.add(entry) 90 | apiEntry.save() 91 | except: 92 | pass 93 | count = count + 1 94 | if count % 100 == 99: 95 | self.stdout.write(f"{count+1}") 96 | #if count > 100: break 97 | infile.close() 98 | 99 | -------------------------------------------------------------------------------- /rocpd/management/commands/importRpt.py: -------------------------------------------------------------------------------- 1 | from django.core.management.base import BaseCommand, CommandError 2 | from rocpd.models import Op 3 | 4 | class Command(BaseCommand): 5 | help = 'Import data from an RPT text file' 6 | 7 | def add_arguments(self, parser): 8 | parser.add_argument('input_file', nargs='+') 9 | parser.add_argument('--preview', action='store_true', help='Preview import only') 10 | 11 | def handle(self, *args, **options): 12 | infile = open(options['input_file'][0], 'r', encoding="utf-8") 13 | count = 0 14 | for line in infile: 15 | count = count + 1 16 | fields = line.split(';') 17 | print(fields) 18 | 19 | self.stdout.write(self.style.SUCCESS('Imported %s events' % count)) 20 | -------------------------------------------------------------------------------- /rocpd/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rocmProfileData/92d13a08328625463e9ba944cece82fc5eea36e6/rocpd/migrations/__init__.py -------------------------------------------------------------------------------- /rocpd/models.py: -------------------------------------------------------------------------------- 1 | 2 | ################################################################################ 3 | # Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All rights reserved. 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | ################################################################################ 23 | 24 | from django.db import models 25 | 26 | 27 | class Metadata(models.Model): 28 | tag = models.CharField(max_length=4096) 29 | value = models.CharField(max_length=4096) 30 | 31 | class String(models.Model): 32 | string = models.CharField(max_length=4096) 33 | class Meta: 34 | indexes = [ 35 | models.Index(fields=['string']) 36 | ] 37 | 38 | class KernelCodeObject(models.Model): 39 | vgpr = models.IntegerField(default=0) 40 | sgpr = models.IntegerField(default=0) 41 | fbar = models.IntegerField(default=0) 42 | 43 | class Api(models.Model): 44 | pid = models.IntegerField(default=0) 45 | tid = models.IntegerField(default=0) 46 | apiName = models.ForeignKey(String, related_name='+', on_delete=models.PROTECT) 47 | args = models.ForeignKey(String, related_name='+', on_delete=models.PROTECT) 48 | #ops = models.ManyToManyField(Op, through = 'ApiOps') 49 | ops = models.ManyToManyField('Op') 50 | start = models.IntegerField(default=0) 51 | end = models.IntegerField(default=0) 52 | 53 | class Op(models.Model): 54 | gpuId = models.IntegerField(default=0) 55 | queueId = models.IntegerField(default=0) 56 | sequenceId = models.IntegerField(default=0) 57 | opType = models.ForeignKey(String, related_name='+', on_delete=models.PROTECT) 58 | description = models.ForeignKey(String, related_name='+', on_delete=models.PROTECT) 59 | #inputSignals = models.ManyToManyField(Op, through = 'InputSignal') 60 | inputSignals = models.ManyToManyField('self') 61 | completionSignal = models.CharField(max_length=18) #64 bit int 62 | start = models.IntegerField(default=0) 63 | end = models.IntegerField(default=0) 64 | 65 | class KernelApi(Api): 66 | #api = models.OneToOneField(Api, on_delete=models.PROTECT, primary_key=True) 67 | stream = models.CharField(max_length=18) 68 | gridX = models.IntegerField(default=0) 69 | gridY = models.IntegerField(default=0) 70 | gridZ = models.IntegerField(default=0) 71 | workgroupX = models.IntegerField(default=0) 72 | workgroupY = models.IntegerField(default=0) 73 | workgroupZ = models.IntegerField(default=0) 74 | groupSegmentSize = models.IntegerField(default=0) 75 | privateSegmentSize = models.IntegerField(default=0) 76 | codeObject = models.ForeignKey(KernelCodeObject, on_delete=models.PROTECT) 77 | kernelName = models.ForeignKey(String, related_name='+', on_delete=models.PROTECT) 78 | kernelArgAddress = models.CharField(max_length=18) #64 bit int 79 | aquireFence = models.CharField(max_length=8) #(none, agent, system) 80 | releaseFence = models.CharField(max_length=8) #(none, agent, system) 81 | 82 | class CopyApi(Api): 83 | #api = models.OneToOneField(Api, on_delete=models.PROTECT, primary_key=True) 84 | stream = models.CharField(max_length=18) 85 | size = models.IntegerField(default=0) 86 | width = models.IntegerField(default=0) 87 | height = models.IntegerField(default=0) 88 | kind = models.IntegerField(default=0) # enum 89 | dst = models.CharField(max_length=18) 90 | src = models.CharField(max_length=18) 91 | dstDevice = models.IntegerField(default=0) # GPU id or -1 92 | srcDevice = models.IntegerField(default=0) # GPU id or -1 93 | sync = models.BooleanField() 94 | pinned = models.BooleanField() 95 | 96 | class AnnotationApi(Api): 97 | domain = models.ForeignKey(String, related_name='+', on_delete=models.PROTECT) 98 | category = models.ForeignKey(String, related_name='+', on_delete=models.PROTECT) 99 | data = models.CharField(max_length=8) 100 | 101 | class BarrierOp(Op): 102 | #op = models.OneToOneField(Ops, on_delete=models.PROTECT, primary_key=True) 103 | signalCount = models.IntegerField() 104 | aquireFence = models.CharField(max_length=8) #(none, agent, system) 105 | releaseFence = models.CharField(max_length=8) #(none, agent, system) 106 | 107 | class Monitor(models.Model): 108 | class DeviceType(models.TextChoices): 109 | GPU = 'gpu' 110 | CPU = 'cpu' 111 | class MonitorType(models.TextChoices): 112 | MCLK = "mclk" 113 | SCLK = "sclk" 114 | TEMP = "temp" 115 | POWER = "power" 116 | FAN = "fan%" 117 | VRAM = "vram%" 118 | GPU = "gpu%" 119 | deviceType = models.CharField(max_length = 16, choices = DeviceType.choices) 120 | deviceId = models.IntegerField(default=0) 121 | monitorType = models.CharField(max_length = 16, choices = MonitorType.choices) 122 | start = models.IntegerField(default=0) 123 | end = models.IntegerField(default=0) 124 | value = models.CharField(max_length=255) 125 | 126 | class StackFrame(models.Model): 127 | api = models.ForeignKey(Api, related_name='+', on_delete=models.PROTECT) 128 | depth = models.IntegerField(default=0) 129 | name = models.ForeignKey(String, related_name='+', on_delete=models.PROTECT) 130 | 131 | #class InputSignal(models.Model) 132 | # op = models.ForeignKey(Ops, on_delete=models.PROTECT) 133 | # inputOp = models.ForeignKey(Ops, on_delete=models.PROTECT) 134 | 135 | #class ApiOps(models.Model) 136 | # api = models.ForeignKey(Api, on_delete=models.PROTECT) 137 | # op = models.ForeignKey(Ops, on_delete=models.PROTECT) 138 | -------------------------------------------------------------------------------- /rocpd/tests.py: -------------------------------------------------------------------------------- 1 | from django.test import TestCase 2 | 3 | # Create your tests here. 4 | -------------------------------------------------------------------------------- /rocpd/urls.py: -------------------------------------------------------------------------------- 1 | from django.urls import path 2 | 3 | from . import views 4 | 5 | urlpatterns = [ 6 | path('', views.index, name='index'), 7 | ] 8 | -------------------------------------------------------------------------------- /rocpd/views.py: -------------------------------------------------------------------------------- 1 | from django.shortcuts import render 2 | 3 | # Create your views here. 4 | 5 | def index(request): 6 | return HttpResponse("rocpd index") 7 | -------------------------------------------------------------------------------- /rocpd_python/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include rocpd/schema_data/*.cmd 2 | -------------------------------------------------------------------------------- /rocpd_python/Makefile: -------------------------------------------------------------------------------- 1 | PYTHON = python3 2 | PIP = pip3 3 | PACKAGE = rocpd 4 | 5 | install: 6 | @echo "Building rocpd_python" 7 | pip install --user . 8 | 9 | develop: 10 | @echo "Building $(PACKAGE)" 11 | $(PYTHON) setup.py develop 12 | 13 | uninstall: 14 | @echo "Uninstalling $(PACKAGE)" 15 | $(PIP) uninstall $(PACKAGE) 16 | 17 | clean: 18 | @echo "Cleaning build directory" 19 | rm -rf build rocpd.egg-info dist 20 | -------------------------------------------------------------------------------- /rocpd_python/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rocmProfileData/92d13a08328625463e9ba944cece82fc5eea36e6/rocpd_python/__init__.py -------------------------------------------------------------------------------- /rocpd_python/rocpd/call_stacks.py: -------------------------------------------------------------------------------- 1 | ########################################################################### 2 | # Copyright (c) 2022 Advanced Micro Devices, Inc. 3 | ########################################################################### 4 | 5 | # Create and auxilarly table to express parent/child api calls 6 | # 7 | # 8 | # 9 | 10 | import argparse 11 | import sqlite3 12 | from rocpd.importer import RocpdImportData 13 | from rocpd.metadata import Metadata 14 | 15 | from collections import deque 16 | 17 | def generateCallStacks(imp): 18 | meta = Metadata(imp) 19 | if meta.get("Callstack::Generated") != None: 20 | raise Exception("Callstack data has already been generated") 21 | 22 | count = 0 23 | call_inserts = [] 24 | 25 | class StackFrame: 26 | def __init__(self, id, start): 27 | self.id = id 28 | self.start = start 29 | self.child_cpu_time = 0 30 | pass 31 | 32 | def commitRecords(): 33 | nonlocal call_inserts 34 | imp.connection.executemany("insert into ext_callstack(id, parent_id, child_id, depth, cpu_time, gpu_time) values (?,?,?,?,?,?)", call_inserts) 35 | imp.connection.commit() 36 | call_inserts = [] 37 | 38 | 39 | for pidtid in imp.connection.execute("select distinct pid, tid from rocpd_api"): 40 | stack = deque() 41 | maxdepth = 0; 42 | 43 | for row in imp.connection.execute("select id, start as ts, '1', '' from rocpd_api where pid=? and tid=? UNION ALL select X.id, X.end as timestamp, '-1', Y.gpu_time from rocpd_api X LEFT JOIN (select A.api_id, sum(B.end - B.start) as gpu_time from rocpd_api_ops A join rocpd_op B on B.id = A.op_id group by A.api_id) Y on Y.api_id = X.id where pid=? and tid=? order by ts", (pidtid[0], pidtid[1], pidtid[0], pidtid[1])): 44 | if row[2] == '1': 45 | stack.append(StackFrame(row[0], row[1])) 46 | elif row[2] == '-1': 47 | if len(stack) > maxdepth: 48 | maxdepth = len(stack) 49 | depth = len(stack) 50 | cpu_time = row[1] - stack[depth - 1].start - stack[depth - 1].child_cpu_time # cpu duration of returning call 51 | gpu_time = 0 if row[3] == None else row[3] 52 | for span in stack: 53 | depth = depth - 1 54 | if (depth > 0): 55 | stack[depth].child_cpu_time = stack[depth].child_cpu_time + cpu_time 56 | call_inserts.append((count, span.id, row[0], depth, cpu_time, gpu_time)) 57 | count = count + 1 58 | if (count % 100000 == 99999): 59 | commitRecords() 60 | stack.pop() 61 | 62 | print(f"pid {pidtid[0]} tid {pidtid[1]} maxDepth {maxdepth}") 63 | 64 | meta.set("Callstack::Generated", "True") 65 | commitRecords() 66 | 67 | 68 | def createCallStackTable(imp): 69 | meta = Metadata(imp) 70 | if meta.get("Callstack::Table") != None: 71 | raise Exception("Callstack table has already been created") 72 | 73 | #Create table 74 | imp.connection.execute('CREATE TABLE IF NOT EXISTS "ext_callstack" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "parent_id" integer NOT NULL REFERENCES "rocpd_api" ("id") DEFERRABLE INITIALLY DEFERRED, "child_id" integer NOT NULL REFERENCES "rocpd_api" ("id") DEFERRABLE INITIALLY DEFERRED, "depth" integer NOT NULL, "cpu_time" integer NOT NULL DEFAULT 0, "gpu_time" integer NOT NULL DEFAULT 0)') 75 | 76 | # Make some working views 77 | imp.connection.execute('CREATE VIEW IF NOT EXISTS callStack_inclusive as select parent_id, sum(cpu_time) as cpu_time, sum(gpu_time) as gpu_time from ext_callstack group by parent_id') 78 | imp.connection.execute('CREATE VIEW IF NOT EXISTS callStack_exclusive as select parent_id, sum(cpu_time) as cpu_time, sum(gpu_time) as gpu_time from ext_callstack where depth = 0 group by parent_id') 79 | imp.connection.execute('CREATE VIEW IF NOT EXISTS callStack_inclusive_name as select A.parent_id, B.apiName, B.args, sum(cpu_time) as cpu_time, sum(gpu_time) as gpu_time from ext_callstack A join api B on B.id = A.parent_id group by parent_id') 80 | imp.connection.execute('CREATE VIEW IF NOT EXISTS callStack_exclusive_name as select A.parent_id, B.apiName, B.args, sum(cpu_time) as cpu_time, sum(gpu_time) as gpu_time from ext_callstack A join api B on B.id = A.parent_id where A.depth = 0 group by parent_id') 81 | 82 | meta.set("Callstack::Table", "True") 83 | 84 | 85 | 86 | if __name__ == "__main__": 87 | 88 | parser = argparse.ArgumentParser(description='Generate call stack table to express caller/callee relation') 89 | parser.add_argument('input_rpd', type=str, help="input rpd db") 90 | args = parser.parse_args() 91 | 92 | connection = sqlite3.connect(args.input_rpd) 93 | 94 | importData = RocpdImportData() 95 | importData.resumeExisting(connection) # load the current db state 96 | 97 | createCallStackTable(importData) 98 | generateCallStacks(importData) 99 | 100 | importData.connection.commit() 101 | -------------------------------------------------------------------------------- /rocpd_python/rocpd/deserialize.py: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | # copies of the Software, and to permit persons to whom the Software is 9 | # furnished to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in 12 | # all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | # THE SOFTWARE. 21 | ################################################################################ 22 | 23 | # Elevate roctx logging messages to 1st class api events 24 | # 25 | # Logged messages should be formatted as: api=<> args1=<> arg2=<> 26 | # 27 | 28 | import sys 29 | import os 30 | import csv 31 | import re 32 | import sqlite3 33 | from collections import defaultdict 34 | from datetime import datetime 35 | import argparse 36 | 37 | from rocpd.importer import RocpdImportData 38 | 39 | def deserializeApis(imp, srcApis): 40 | count = 0 41 | api_inserts = [] # rows to bulk insert 42 | op_inserts = [] # rows to bulk insert 43 | api_removes = [] # rows to bulk remove 44 | 45 | def commitRecords(): 46 | nonlocal api_inserts 47 | nonlocal op_inserts 48 | nonlocal api_removes 49 | imp.commitStrings() 50 | imp.connection.executemany("insert into rocpd_api(id, pid, tid, start, end, apiName_id, args_id) values (?,?,?,?,?,?,?)", api_inserts) 51 | imp.connection.executemany("insert into rocpd_op(id, gpuId, queueId, sequenceId, completionSignal, start, end, description_id, opType_id) values (?,?,?,?,?,?,?,?,?)", op_inserts) 52 | imp.connection.executemany("delete from rocpd_api where id = ?", api_removes) 53 | imp.connection.commit() 54 | api_inserts = [] 55 | op_inserts = [] 56 | api_removes = [] 57 | 58 | for row in imp.connection.execute("select A.id, A.pid, A.tid, A.start, A.end, A.args_id, B.string, C.string from rocpd_api A join rocpd_string B on B.id = A.apiName_id join rocpd_string C on C.id = A.args_id where A.apiName_id in (select id from rocpd_string where string in (%s))" % str(srcApis)[1:-1]): 59 | args = {} 60 | for line in row[7].split('|'): 61 | key, value = line.partition("=")[::2] 62 | args[key.strip()] = value.strip() 63 | 64 | if 's_api' in args: 65 | name_id = imp.getStringId(args['s_api']) 66 | api_inserts.append((imp.api_id, row[1], row[2], row[3], row[4], name_id, row[5])) 67 | api_removes.append((row[0],)) 68 | imp.api_id = imp.api_id + 1 69 | count = count + 1 70 | 71 | if 's_op' in args: 72 | name_id = imp.getStringId(args['s_op']) 73 | gpuId = args['gpuId'] if 'gpuId' in args else '0' 74 | queueId = args['queueId'] if 'queueId' in args else '0' 75 | sequenceId = args['sequenceId'] if 'sequenceId' in args else '0' 76 | completionSignal = args['completionSignal'] if 'completionSignal' in args else '' 77 | start = args['start'] if 'start' in args else row[3] 78 | end = args['end'] if 'end' in args else row[4] 79 | op_inserts.append((imp.op_id, gpuId, queueId, sequenceId, completionSignal, start, end, row[5], name_id)) 80 | api_removes.append((row[0],)) 81 | imp.op_id = imp.op_id + 1 82 | count = count + 1 83 | 84 | if (count % 100000 == 99999): 85 | commitRecords() 86 | commitRecords() 87 | 88 | 89 | if __name__ == "__main__": 90 | 91 | parser = argparse.ArgumentParser(description='Promote roctx serialized ops to actual ops') 92 | parser.add_argument('input_rpd', type=str, help="input rpd db") 93 | args = parser.parse_args() 94 | 95 | connection = sqlite3.connect(args.input_rpd) 96 | 97 | importData = RocpdImportData() 98 | importData.resumeExisting(connection) # load the current db state 99 | 100 | roctxApis = ["UserMarker"] 101 | print(f"Deserializing apis in: {str(roctxApis)[1:-1]}") 102 | deserializeApis(importData, roctxApis) 103 | -------------------------------------------------------------------------------- /rocpd_python/rocpd/importer.py: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | # copies of the Software, and to permit persons to whom the Software is 9 | # furnished to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in 12 | # all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | # THE SOFTWARE. 21 | ################################################################################ 22 | 23 | # 24 | # Utility classes to simplify generating rpd files 25 | # 26 | # 27 | 28 | 29 | 30 | class RocpdImportData: 31 | def __init__(self): 32 | #Set up primary keys 33 | self.string_id = 1 34 | self.op_id = 1 35 | self.api_id = 1 36 | #self.hsa_id = 1 37 | # Dicts 38 | self.strings = {} # string -> id 39 | # Empty string 40 | self.empty_string_id = 1 41 | self.connection = None 42 | self.string_inserts = [] 43 | 44 | def __del__(self): 45 | self.commitStrings(True) 46 | pass 47 | 48 | # initialize 49 | def initNew(self, connection): 50 | self.connection = connection 51 | self.initEmptyString() 52 | 53 | def resumeExisting(self, connection): 54 | self.connection = connection 55 | self.buildStringCache() 56 | self.buildCurrentIds() 57 | 58 | def initEmptyString(self): 59 | self.empty_string_id = self.string_id 60 | self.string_id = self.string_id + 1 61 | self.strings[""] = self.empty_string_id 62 | self.connection.execute("insert into rocpd_string(id, string) values (?,?)", (self.empty_string_id, "")) 63 | 64 | def buildStringCache(self): 65 | self.strings = {} 66 | # Find the empty string, create if needed 67 | self.empty_string_id = -1 68 | for row in self.connection.execute("select id from rocpd_string where string=''"): 69 | self.empty_string_id = row[0] 70 | if self.empty_string_id == -1: 71 | self.initEmptyString() 72 | 73 | for row in self.connection.execute("select id from rocpd_string order by id desc limit 1"): 74 | self.string_id = row[0] + 1 75 | 76 | for row in self.connection.execute("select id, string from rocpd_string"): 77 | self.strings[row[1]] = row[0] 78 | 79 | def buildCurrentIds(self): 80 | for row in self.connection.execute("select id from rocpd_op order by id desc limit 1"): 81 | self.op_id = row[0] + 1 82 | for row in self.connection.execute("select id from rocpd_api order by id desc limit 1"): 83 | self.api_id = row[0] + 1 84 | 85 | 86 | # Handle string cache and string table insert in one place 87 | 88 | def getStringId(self, val): 89 | id = None 90 | try: 91 | id = self.strings[val] 92 | except: 93 | self.strings[val] = self.string_id 94 | self.string_inserts.append((self.string_id, val)) 95 | id = self.string_id 96 | self.string_id = self.string_id + 1 97 | return id 98 | 99 | def commitStrings(self, commit = False): 100 | #if self.string_inserts.count() > 0: 101 | if len(self.string_inserts) > 0: 102 | self.connection.executemany("insert into rocpd_string(id, string) values (?,?)", self.string_inserts) 103 | if commit == True: 104 | self.connection.commit() 105 | self.string_inserts = [] 106 | 107 | 108 | 109 | -------------------------------------------------------------------------------- /rocpd_python/rocpd/metadata.py: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | # copies of the Software, and to permit persons to whom the Software is 9 | # furnished to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in 12 | # all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | # THE SOFTWARE. 21 | ################################################################################ 22 | 23 | # Utilities for accessing rocpd_metadata 24 | # 25 | 26 | import sys 27 | import os 28 | import argparse 29 | import sqlite3 30 | from rocpd.importer import RocpdImportData 31 | 32 | 33 | class Metadata: 34 | def __init__(self, importer): 35 | self.importer = importer 36 | 37 | def get(self, tag): 38 | result = self.importer.connection.execute('SELECT value from rocpd_metadata where tag = ?', (tag, )) 39 | result = result.fetchone() 40 | return None if result == None else result[0] 41 | 42 | def set(self, tag, value): 43 | if self.get(tag) == None: 44 | self.importer.connection.execute('INSERT INTO rocpd_metadata (tag, value) values (?,?)', (tag, value, )) 45 | else: 46 | self.importer.connection.execute('UPDATE rocpd_metadata SET value = ? WHERE tag = ?', (value, tag, )) 47 | 48 | def clear(self, tag): 49 | self.importer.connection.execute('DELETE FROM rocpd_metadata WHERE tag = ?', (tag, )) 50 | 51 | def listAll(self): 52 | for row in self.importer.connection.execute('SELECT * from rocpd_metadata ORDER BY tag'): 53 | print(f"{(row[1], row[2])}") 54 | 55 | if __name__ == "__main__": 56 | 57 | parser = argparse.ArgumentParser(description='Utility for reading/writing metadata') 58 | parser.add_argument('input_rpd', type=str, help="input rpd db") 59 | parser.add_argument('--list', action='store_true', help="List all metadata rows") 60 | args = parser.parse_args() 61 | 62 | connection = sqlite3.connect(args.input_rpd) 63 | importer = RocpdImportData() 64 | importer.resumeExisting(connection) 65 | 66 | meta = Metadata(importer) 67 | meta.listAll() 68 | -------------------------------------------------------------------------------- /rocpd_python/rocpd/schema.py: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | # copies of the Software, and to permit persons to whom the Software is 9 | # furnished to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in 12 | # all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | # THE SOFTWARE. 21 | ################################################################################ 22 | 23 | # 24 | # Utility Class to create the rocpd schema on an existing sqlite connection 25 | # 26 | # Requires a current copy of the schema in the 'schema' subdirectory 27 | # Executes the contained sql 'scripts' to create the schema 28 | # 29 | 30 | import os 31 | import sqlite3 32 | import argparse 33 | from pathlib import Path 34 | 35 | class RocpdSchema: 36 | 37 | def __init__(self): 38 | schemadir = Path(os.path.dirname(os.path.abspath(__file__))) 39 | 40 | with open(str(schemadir/'schema_data/tableSchema.cmd'), 'r') as schema: 41 | self.tableSchema = schema.read() 42 | with open(str(schemadir/'schema_data/indexSchema.cmd'), 'r') as schema: 43 | self.indexSchema = schema.read() 44 | with open(str(schemadir/'schema_data/index2Schema.cmd'), 'r') as schema: 45 | self.index2Schema = schema.read() 46 | with open(str(schemadir/'schema_data/utilitySchema.cmd'), 'r') as schema: 47 | self.utilitySchema = schema.read() 48 | 49 | def writeSchema(self, connection): 50 | connection.executescript(self.tableSchema) 51 | connection.executescript(self.indexSchema) 52 | connection.executescript(self.utilitySchema) 53 | 54 | def writeBasicSchema(self, connection): 55 | connection.executescript(self.tableSchema) 56 | connection.executescript(self.indexSchema) 57 | 58 | def writeFullSchema(self, connection): 59 | connection.executescript(self.tableSchema) 60 | connection.executescript(self.indexSchema) 61 | connection.executescript(self.index2Schema) 62 | connection.executescript(self.utilitySchema) 63 | 64 | if __name__ == "__main__": 65 | parser = argparse.ArgumentParser(description='convert rocprofiler output to an RPD database') 66 | parser.add_argument('--create', type=str, help="filename in create empty db") 67 | args = parser.parse_args() 68 | 69 | schema = RocpdSchema() 70 | 71 | if args.create: 72 | print(f"Creating empty rpd: {args.create}") 73 | connection = sqlite3.connect(args.create) 74 | schema.writeSchema(connection) 75 | connection.commit() 76 | else: 77 | print(schema.tableSchema) 78 | -------------------------------------------------------------------------------- /rocpd_python/rocpd/schema_data/index2Schema.cmd: -------------------------------------------------------------------------------- 1 | --CREATE TABLE IF NOT EXISTS "rocpd_kernelcodeobject" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "vgpr" integer NOT NULL, "sgpr" integer NOT NULL, "fbar" integer NOT NULL); 2 | --CREATE TABLE IF NOT EXISTS "rocpd_string" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "string" varchar(4096) NOT NULL); 3 | --CREATE TABLE IF NOT EXISTS "rocpd_barrierop" ("op_ptr_id" integer NOT NULL PRIMARY KEY REFERENCES "rocpd_op" ("id") DEFERRABLE INITIALLY DEFERRED, "signalCount" integer NOT NULL, "aquireFence" varchar(8) NOT NULL, "releaseFence" varchar(8) NOT NULL); 4 | --CREATE TABLE IF NOT EXISTS "rocpd_copyop" ("op_ptr_id" integer NOT NULL PRIMARY KEY REFERENCES "rocpd_op" ("id") DEFERRABLE INITIALLY DEFERRED, "size" integer NOT NULL, "dst" varchar(18) NOT NULL, "sync" bool NOT NULL, "pinned" bool NOT NULL, "dstDevice" integer NOT NULL, "height" integer NOT NULL, "kind" integer NOT NULL, "srcDevice" integer NOT NULL, "width" integer NOT NULL, "src" varchar(18) NOT NULL); 5 | --CREATE TABLE IF NOT EXISTS "rocpd_op_inputSignals" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "from_op_id" integer NOT NULL REFERENCES "rocpd_op" ("id") DEFERRABLE INITIALLY DEFERRED, "to_op_id" integer NOT NULL REFERENCES "rocpd_op" ("id") DEFERRABLE INITIALLY DEFERRED); 6 | --CREATE TABLE IF NOT EXISTS "rocpd_op" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "gpuId" integer NOT NULL, "queueId" integer NOT NULL, "sequenceId" integer NOT NULL, "completionSignal" varchar(18) NOT NULL, "start" integer NOT NULL, "end" integer NOT NULL, "description_id" integer NOT NULL REFERENCES "rocpd_string" ("id") DEFERRABLE INITIALLY DEFERRED, "opType_id" integer NOT NULL REFERENCES "rocpd_string" ("id") DEFERRABLE INITIALLY DEFERRED); 7 | --CREATE TABLE IF NOT EXISTS "rocpd_api" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "pid" integer NOT NULL, "tid" integer NOT NULL, "start" integer NOT NULL, "end" integer NOT NULL, "apiName_id" integer NOT NULL REFERENCES "rocpd_string" ("id") DEFERRABLE INITIALLY DEFERRED, "args_id" integer NOT NULL REFERENCES "rocpd_string" ("id") DEFERRABLE INITIALLY DEFERRED); 8 | --CREATE TABLE IF NOT EXISTS "rocpd_api_ops" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "api_id" integer NOT NULL REFERENCES "rocpd_api" ("id") DEFERRABLE INITIALLY DEFERRED, "op_id" integer NOT NULL REFERENCES "rocpd_op" ("id") DEFERRABLE INITIALLY DEFERRED); 9 | --CREATE TABLE IF NOT EXISTS "rocpd_kernelop" ("op_ptr_id" integer NOT NULL PRIMARY KEY REFERENCES "rocpd_op" ("id") DEFERRABLE INITIALLY DEFERRED, "gridX" integer NOT NULL, "gridY" integer NOT NULL, "gridz" integer NOT NULL, "workgroupX" integer NOT NULL, "workgroupY" integer NOT NULL, "workgroupZ" integer NOT NULL, "groupSegmentSize" integer NOT NULL, "privateSegmentSize" integer NOT NULL, "kernelArgAddress" varchar(18) NOT NULL, "aquireFence" varchar(8) NOT NULL, "releaseFence" varchar(8) NOT NULL, "codeObject_id" integer NOT NULL REFERENCES "rocpd_kernelcodeobject" ("id") DEFERRABLE INITIALLY DEFERRED, "kernelName_id" integer NOT NULL REFERENCES "rocpd_string" ("id") DEFERRABLE INITIALLY DEFERRED); 10 | --CREATE TABLE IF NOT EXISTS "rocpd_metadata" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "tag" varchar(4096) NOT NULL, "value" varchar(4096) NOT NULL); 11 | 12 | --CREATE INDEX "rocpd_strin_string_c7b9cd_idx" ON "rocpd_string" ("string"); 13 | CREATE INDEX "rocpd_api_apiName_id_54bc0a6c" ON "rocpd_api" ("apiName_id"); 14 | CREATE INDEX "rocpd_api_args_id_6a626626" ON "rocpd_api" ("args_id"); 15 | CREATE UNIQUE INDEX "rocpd_api_ops_api_id_op_id_e7e52317_uniq" ON "rocpd_api_ops" ("api_id", "op_id"); 16 | CREATE INDEX "rocpd_api_ops_api_id_f87632ad" ON "rocpd_api_ops" ("api_id"); 17 | CREATE INDEX "rocpd_api_ops_op_id_b35ab7c9" ON "rocpd_api_ops" ("op_id"); 18 | CREATE INDEX "rocpd_kernelop_codeObject_id_ad04be1d" ON "rocpd_kernelop" ("codeObject_id"); 19 | CREATE INDEX "rocpd_kernelop_kernelName_id_96546171" ON "rocpd_kernelop" ("kernelName_id"); 20 | CREATE UNIQUE INDEX "rocpd_op_inputSignals_from_op_id_to_op_id_163a30a7_uniq" ON "rocpd_op_inputSignals" ("from_op_id", "to_op_id"); 21 | CREATE INDEX "rocpd_op_inputSignals_from_op_id_5fd8f825" ON "rocpd_op_inputSignals" ("from_op_id"); 22 | CREATE INDEX "rocpd_op_inputSignals_to_op_id_d34a7779" ON "rocpd_op_inputSignals" ("to_op_id"); 23 | CREATE INDEX "rocpd_op_description_id_c8dc8310" ON "rocpd_op" ("description_id"); 24 | CREATE INDEX "rocpd_op_opType_id_810aaccd" ON "rocpd_op" ("opType_id"); 25 | 26 | -------------------------------------------------------------------------------- /rocpd_python/rocpd/schema_data/indexSchema.cmd: -------------------------------------------------------------------------------- 1 | --CREATE TABLE IF NOT EXISTS "rocpd_kernelcodeobject" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "vgpr" integer NOT NULL, "sgpr" integer NOT NULL, "fbar" integer NOT NULL); 2 | --CREATE TABLE IF NOT EXISTS "rocpd_string" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "string" varchar(4096) NOT NULL); 3 | --CREATE TABLE IF NOT EXISTS "rocpd_barrierop" ("op_ptr_id" integer NOT NULL PRIMARY KEY REFERENCES "rocpd_op" ("id") DEFERRABLE INITIALLY DEFERRED, "signalCount" integer NOT NULL, "aquireFence" varchar(8) NOT NULL, "releaseFence" varchar(8) NOT NULL); 4 | --CREATE TABLE IF NOT EXISTS "rocpd_copyop" ("op_ptr_id" integer NOT NULL PRIMARY KEY REFERENCES "rocpd_op" ("id") DEFERRABLE INITIALLY DEFERRED, "size" integer NOT NULL, "dst" varchar(18) NOT NULL, "sync" bool NOT NULL, "pinned" bool NOT NULL, "dstDevice" integer NOT NULL, "height" integer NOT NULL, "kind" integer NOT NULL, "srcDevice" integer NOT NULL, "width" integer NOT NULL, "src" varchar(18) NOT NULL); 5 | --CREATE TABLE IF NOT EXISTS "rocpd_op_inputSignals" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "from_op_id" integer NOT NULL REFERENCES "rocpd_op" ("id") DEFERRABLE INITIALLY DEFERRED, "to_op_id" integer NOT NULL REFERENCES "rocpd_op" ("id") DEFERRABLE INITIALLY DEFERRED); 6 | --CREATE TABLE IF NOT EXISTS "rocpd_op" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "gpuId" integer NOT NULL, "queueId" integer NOT NULL, "sequenceId" integer NOT NULL, "completionSignal" varchar(18) NOT NULL, "start" integer NOT NULL, "end" integer NOT NULL, "description_id" integer NOT NULL REFERENCES "rocpd_string" ("id") DEFERRABLE INITIALLY DEFERRED, "opType_id" integer NOT NULL REFERENCES "rocpd_string" ("id") DEFERRABLE INITIALLY DEFERRED); 7 | --CREATE TABLE IF NOT EXISTS "rocpd_api" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "pid" integer NOT NULL, "tid" integer NOT NULL, "start" integer NOT NULL, "end" integer NOT NULL, "apiName_id" integer NOT NULL REFERENCES "rocpd_string" ("id") DEFERRABLE INITIALLY DEFERRED, "args_id" integer NOT NULL REFERENCES "rocpd_string" ("id") DEFERRABLE INITIALLY DEFERRED); 8 | --CREATE TABLE IF NOT EXISTS "rocpd_api_ops" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "api_id" integer NOT NULL REFERENCES "rocpd_api" ("id") DEFERRABLE INITIALLY DEFERRED, "op_id" integer NOT NULL REFERENCES "rocpd_op" ("id") DEFERRABLE INITIALLY DEFERRED); 9 | --CREATE TABLE IF NOT EXISTS "rocpd_kernelop" ("op_ptr_id" integer NOT NULL PRIMARY KEY REFERENCES "rocpd_op" ("id") DEFERRABLE INITIALLY DEFERRED, "gridX" integer NOT NULL, "gridY" integer NOT NULL, "gridz" integer NOT NULL, "workgroupX" integer NOT NULL, "workgroupY" integer NOT NULL, "workgroupZ" integer NOT NULL, "groupSegmentSize" integer NOT NULL, "privateSegmentSize" integer NOT NULL, "kernelArgAddress" varchar(18) NOT NULL, "aquireFence" varchar(8) NOT NULL, "releaseFence" varchar(8) NOT NULL, "codeObject_id" integer NOT NULL REFERENCES "rocpd_kernelcodeobject" ("id") DEFERRABLE INITIALLY DEFERRED, "kernelName_id" integer NOT NULL REFERENCES "rocpd_string" ("id") DEFERRABLE INITIALLY DEFERRED); 10 | --CREATE TABLE IF NOT EXISTS "rocpd_metadata" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "tag" varchar(4096) NOT NULL, "value" varchar(4096) NOT NULL); 11 | 12 | CREATE INDEX "rocpd_strin_string_c7b9cd_idx" ON "rocpd_string" ("string"); 13 | --CREATE INDEX "rocpd_api_apiName_id_54bc0a6c" ON "rocpd_api" ("apiName_id"); 14 | --CREATE INDEX "rocpd_api_args_id_6a626626" ON "rocpd_api" ("args_id"); 15 | --CREATE UNIQUE INDEX "rocpd_api_ops_api_id_op_id_e7e52317_uniq" ON "rocpd_api_ops" ("api_id", "op_id"); 16 | --CREATE INDEX "rocpd_api_ops_api_id_f87632ad" ON "rocpd_api_ops" ("api_id"); 17 | --CREATE INDEX "rocpd_api_ops_op_id_b35ab7c9" ON "rocpd_api_ops" ("op_id"); 18 | --CREATE INDEX "rocpd_kernelop_codeObject_id_ad04be1d" ON "rocpd_kernelop" ("codeObject_id"); 19 | --CREATE INDEX "rocpd_kernelop_kernelName_id_96546171" ON "rocpd_kernelop" ("kernelName_id"); 20 | --CREATE UNIQUE INDEX "rocpd_op_inputSignals_from_op_id_to_op_id_163a30a7_uniq" ON "rocpd_op_inputSignals" ("from_op_id", "to_op_id"); 21 | --CREATE INDEX "rocpd_op_inputSignals_from_op_id_5fd8f825" ON "rocpd_op_inputSignals" ("from_op_id"); 22 | --CREATE INDEX "rocpd_op_inputSignals_to_op_id_d34a7779" ON "rocpd_op_inputSignals" ("to_op_id"); 23 | --CREATE INDEX "rocpd_op_description_id_c8dc8310" ON "rocpd_op" ("description_id"); 24 | --CREATE INDEX "rocpd_op_opType_id_810aaccd" ON "rocpd_op" ("opType_id"); 25 | 26 | -------------------------------------------------------------------------------- /rocpd_python/rocpd/schema_data/tableSchema.cmd: -------------------------------------------------------------------------------- 1 | CREATE TABLE IF NOT EXISTS "rocpd_kernelcodeobject" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "vgpr" integer NOT NULL, "sgpr" integer NOT NULL, "fbar" integer NOT NULL); 2 | CREATE TABLE IF NOT EXISTS "rocpd_string" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "string" varchar(4096) NOT NULL); 3 | CREATE TABLE IF NOT EXISTS "rocpd_barrierop" ("op_ptr_id" integer NOT NULL PRIMARY KEY REFERENCES "rocpd_op" ("id") DEFERRABLE INITIALLY DEFERRED, "signalCount" integer NOT NULL, "aquireFence" varchar(8) NOT NULL, "releaseFence" varchar(8) NOT NULL); 4 | CREATE TABLE IF NOT EXISTS "rocpd_copyapi" ("api_ptr_id" integer NOT NULL PRIMARY KEY REFERENCES "rocpd_api" ("id") DEFERRABLE INITIALLY DEFERRED, "stream" varchar(18) NOT NULL, "size" integer NOT NULL, "width" integer NOT NULL, "height" integer NOT NULL, "kind" integer NOT NULL, "dst" varchar(18) NOT NULL, "src" varchar(18) NOT NULL, "dstDevice" integer NOT NULL, "srcDevice" integer NOT NULL, "sync" bool NOT NULL, "pinned" bool NOT NULL); 5 | CREATE TABLE IF NOT EXISTS "rocpd_op_inputSignals" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "from_op_id" integer NOT NULL REFERENCES "rocpd_op" ("id") DEFERRABLE INITIALLY DEFERRED, "to_op_id" integer NOT NULL REFERENCES "rocpd_op" ("id") DEFERRABLE INITIALLY DEFERRED); 6 | CREATE TABLE IF NOT EXISTS "rocpd_op" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "gpuId" integer NOT NULL, "queueId" integer NOT NULL, "sequenceId" integer NOT NULL, "completionSignal" varchar(18) NOT NULL, "start" integer NOT NULL, "end" integer NOT NULL, "description_id" integer NOT NULL REFERENCES "rocpd_string" ("id") DEFERRABLE INITIALLY DEFERRED, "opType_id" integer NOT NULL REFERENCES "rocpd_string" ("id") DEFERRABLE INITIALLY DEFERRED); 7 | CREATE TABLE IF NOT EXISTS "rocpd_api" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "pid" integer NOT NULL, "tid" integer NOT NULL, "start" integer NOT NULL, "end" integer NOT NULL, "apiName_id" integer NOT NULL REFERENCES "rocpd_string" ("id") DEFERRABLE INITIALLY DEFERRED, "args_id" integer NOT NULL REFERENCES "rocpd_string" ("id") DEFERRABLE INITIALLY DEFERRED); 8 | CREATE TABLE IF NOT EXISTS "rocpd_api_ops" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "api_id" integer NOT NULL REFERENCES "rocpd_api" ("id") DEFERRABLE INITIALLY DEFERRED, "op_id" integer NOT NULL REFERENCES "rocpd_op" ("id") DEFERRABLE INITIALLY DEFERRED); 9 | CREATE TABLE IF NOT EXISTS "rocpd_kernelapi" ("api_ptr_id" integer NOT NULL PRIMARY KEY REFERENCES "rocpd_api" ("id") DEFERRABLE INITIALLY DEFERRED, "stream" varchar(18) NOT NULL, "gridX" integer NOT NULL, "gridY" integer NOT NULL, "gridZ" integer NOT NULL, "workgroupX" integer NOT NULL, "workgroupY" integer NOT NULL, "workgroupZ" integer NOT NULL, "groupSegmentSize" integer NOT NULL, "privateSegmentSize" integer NOT NULL, "kernelArgAddress" varchar(18) NOT NULL, "aquireFence" varchar(8) NOT NULL, "releaseFence" varchar(8) NOT NULL, "codeObject_id" integer NOT NULL REFERENCES "rocpd_kernelcodeobject" ("id") DEFERRABLE INITIALLY DEFERRED, "kernelName_id" integer NOT NULL REFERENCES "rocpd_string" ("id") DEFERRABLE INITIALLY DEFERRED); 10 | CREATE TABLE IF NOT EXISTS "rocpd_metadata" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "tag" varchar(4096) NOT NULL, "value" varchar(4096) NOT NULL); 11 | CREATE TABLE IF NOT EXISTS "rocpd_monitor" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "deviceType" varchar(16) NOT NULL, "deviceId" integer NOT NULL, "monitorType" varchar(16) NOT NULL, "start" integer NOT NULL, "end" integer NOT NULL, "value" varchar(255) NOT NULL); 12 | CREATE TABLE IF NOT EXISTS "rocpd_stackframe" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "api_ptr_id" integer NOT NULL REFERENCES "rocpd_api" ("id") DEFERRABLE INITIALLY DEFERRED, "depth" integer NOT NULL, "name_id" integer NOT NULL REFERENCES "rocpd_string" ("id") DEFERRABLE INITIALLY DEFERRED); 13 | 14 | 15 | INSERT INTO "rocpd_metadata"(tag, value) VALUES ("schema_version", "2") 16 | 17 | --CREATE INDEX "rocpd_strin_string_c7b9cd_idx" ON "rocpd_string" ("string"); 18 | --CREATE INDEX "rocpd_api_apiName_id_54bc0a6c" ON "rocpd_api" ("apiName_id"); 19 | --CREATE INDEX "rocpd_api_args_id_6a626626" ON "rocpd_api" ("args_id"); 20 | --CREATE UNIQUE INDEX "rocpd_api_ops_api_id_op_id_e7e52317_uniq" ON "rocpd_api_ops" ("api_id", "op_id"); 21 | --CREATE INDEX "rocpd_api_ops_api_id_f87632ad" ON "rocpd_api_ops" ("api_id"); 22 | --CREATE INDEX "rocpd_api_ops_op_id_b35ab7c9" ON "rocpd_api_ops" ("op_id"); 23 | --CREATE INDEX "rocpd_kernelop_codeObject_id_ad04be1d" ON "rocpd_kernelop" ("codeObject_id"); 24 | --CREATE INDEX "rocpd_kernelop_kernelName_id_96546171" ON "rocpd_kernelop" ("kernelName_id"); 25 | --CREATE UNIQUE INDEX "rocpd_op_inputSignals_from_op_id_to_op_id_163a30a7_uniq" ON "rocpd_op_inputSignals" ("from_op_id", "to_op_id"); 26 | --CREATE INDEX "rocpd_op_inputSignals_from_op_id_5fd8f825" ON "rocpd_op_inputSignals" ("from_op_id"); 27 | --CREATE INDEX "rocpd_op_inputSignals_to_op_id_d34a7779" ON "rocpd_op_inputSignals" ("to_op_id"); 28 | --CREATE INDEX "rocpd_op_description_id_c8dc8310" ON "rocpd_op" ("description_id"); 29 | --CREATE INDEX "rocpd_op_opType_id_810aaccd" ON "rocpd_op" ("opType_id"); 30 | 31 | -------------------------------------------------------------------------------- /rocpd_python/rocpd/schema_data/utilitySchema.cmd: -------------------------------------------------------------------------------- 1 | CREATE VIEW api AS SELECT rocpd_api.id,pid,tid,start,end,A.string AS apiName, B.string AS args FROM rocpd_api INNER JOIN rocpd_string A ON A.id = rocpd_api.apiName_id INNER JOIN rocpd_string B ON B.id = rocpd_api.args_id; 2 | CREATE VIEW op AS SELECT rocpd_op.id,gpuId,queueId,sequenceId,start,end,A.string AS description, B.string AS opType FROM rocpd_op INNER JOIN rocpd_string A ON A.id = rocpd_op.description_id INNER JOIN rocpd_string B ON B.id = rocpd_op.opType_id; 3 | CREATE VIEW busy AS select A.gpuId, GpuTime, WallTime, GpuTime*1.0/WallTime as Busy from (select gpuId, sum(end-start) as GpuTime from rocpd_op group by gpuId) A INNER JOIN (select max(end) - min(start) as WallTime from rocpd_op); 4 | 5 | create view top as select C.string as Name, count(C.string) as TotalCalls, sum(A.end-A.start) / 1000 as TotalDuration_us, (sum(A.end-A.start)/count(C.string))/ 1000.0 as Ave_us, sum(A.end-A.start) * 100.0 / (select sum(A.end-A.start) from rocpd_op A) as Percentage from (select opType_id as name_id, start, end from rocpd_op where description_id in (select id from rocpd_string where string='') union select description_id, start, end from rocpd_op where description_id not in (select id from rocpd_string where string='')) A join rocpd_string C on C.id = A.name_id group by Name order by TotalDuration_us desc; 6 | 7 | 8 | -- Kernel ops with launch args 9 | CREATE VIEW kernel AS SELECT B.id, gpuId, queueId, sequenceId, start, end, (end-start) AS duration, stream, gridX, gridY, gridz, workgroupX, workgroupY, workgroupZ, groupSegmentSize, privateSegmentSize, D.string AS kernelName FROM rocpd_api_ops A JOIN rocpd_op B on B.id = A.op_id JOIN rocpd_kernelapi C ON C.api_ptr_id = A.api_id JOIN rocpd_string D on D.id = kernelName_id; 10 | 11 | -- All copies (api timing) 12 | CREATE VIEW copy AS SELECT B.id, pid, tid, start, end, C.string AS apiName, stream, size, width, height, kind, dst, src, dstDevice, srcDevice, sync, pinned FROM rocpd_copyApi A JOIN rocpd_api B ON B.id = A.api_ptr_id JOIN rocpd_string C on C.id = B.apiname_id; 13 | 14 | -- Async copies (op timing) 15 | CREATE VIEW copyop AS SELECT B.id, gpuId, queueId, sequenceId, B.start, B.end, (B.end-B.start) AS duration, stream, size, width, height, kind, dst, src, dstDevice, srcDevice, sync, pinned, E.string AS apiName FROM rocpd_api_ops A JOIN rocpd_op B ON B.id = A.op_id JOIN rocpd_copyapi C ON C.api_ptr_id = A.api_id JOIN rocpd_api D on D.id = A.api_id JOIN rocpd_string E ON E.id = D.apiName_id; 16 | 17 | -- Stack Frames 18 | CREATE VIEW stackframe AS SELECT B.id, C.string, depth, D.string FROM rocpd_stackframe A JOIN rocpd_api B ON B.id = A.api_ptr_id JOIN rocpd_string C ON C.id = B.apiname_id JOIN rocpd_string D ON D.id = A.name_id; 19 | -------------------------------------------------------------------------------- /rocpd_python/rocpd/strings.py: -------------------------------------------------------------------------------- 1 | ########################################################################### 2 | # Copyright (c) 2022 Advanced Micro Devices, Inc. 3 | ########################################################################### 4 | 5 | # Create and auxilarly table to express parent/child api calls 6 | # 7 | # 8 | # 9 | 10 | import argparse 11 | import sqlite3 12 | from rocpd.importer import RocpdImportData 13 | 14 | 15 | def cleanStrings(imp, fix_autograd): 16 | # Create backup table 17 | imp.connection.execute('CREATE TEMPORARY TABLE IF NOT EXISTS "rocpd_string_original" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "string" varchar(4096) NOT NULL)') 18 | imp.connection.execute('INSERT into rocpd_string_original SELECT * from rocpd_string') 19 | 20 | 21 | # Make a list of all columns that reference rocpd_string 22 | # These will need to be updated and relinked 23 | # Also, we can detect and remove unreferences strings 24 | 25 | # Well known references 26 | string_users = [ 27 | ("rocpd_op", "description_id"), 28 | ("rocpd_op", "opType_id"), 29 | ("rocpd_api", "apiName_id"), 30 | ("rocpd_api", "args_id"), 31 | ("rocpd_kernelapi", "kernelName_id"), 32 | ] 33 | # Explicity declared in rocpd_metadata. Format is: tag = 'references::rocpd_string.id', value = '("table_name", "column_name")' 34 | for row in imp.connection.execute("SELECT value from rocpd_metadata where tag='references::rocpd_string.id'"): 35 | value = eval(row[0]) 36 | if type(value) == tuple: 37 | if value not in string_users: 38 | string_users.append(value) 39 | 40 | # Normalize autograd strings 41 | if fix_autograd: 42 | imp.connection.execute(""" 43 | UPDATE rocpd_string_original set string = SUBSTR(string, 1, INSTR(string, ", seq") - 1) where string like "%, seq%" 44 | """) 45 | imp.connection.execute(""" 46 | UPDATE rocpd_string_original set string = SUBSTR(string, 1, INSTR(string, ", op_id") - 1) where string like "%, op_id%" 47 | """) 48 | imp.connection.execute(""" 49 | UPDATE rocpd_string_original set string = SUBSTR(string, 1, INSTR(string, ", sizes") - 1) where string like "%, sizes%" 50 | """) 51 | imp.connection.execute(""" 52 | UPDATE rocpd_string_original set string = SUBSTR(string, 1, INSTR(string, ", input_op_ids") - 1) where string like "%, input_op_ids%" 53 | """) 54 | 55 | 56 | # Clean up unrefferenced strings. Tools can change strings (names, args, etc) and may leave strings that are no longer being used. 57 | imp.connection.execute(""" 58 | CREATE TEMPORARY TABLE IF NOT EXISTS "activeString" ("id" integer NOT NULL PRIMARY KEY); 59 | """) 60 | for column in string_users: 61 | query = f"""INSERT OR IGNORE INTO "activeString" SELECT {column[1]} from {column[0]}""" 62 | #print(query) 63 | imp.connection.execute(query) 64 | 65 | 66 | # Drop, recreate, and populate the string table 67 | imp.connection.execute(""" 68 | DROP TABLE "rocpd_string"; 69 | """) 70 | imp.connection.execute(""" 71 | CREATE TABLE IF NOT EXISTS "rocpd_string" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "string" varchar(4096) NOT NULL); 72 | """) 73 | imp.connection.execute(""" 74 | CREATE INDEX "rocpd_strin_string_c7b9cd_idx" ON "rocpd_string" ("string"); 75 | """) 76 | imp.connection.execute(""" 77 | INSERT into rocpd_string(string) SELECT distinct string from rocpd_string_original where id in (SELECT id FROM "activeString") order by id; 78 | """) 79 | 80 | 81 | # Map from old id to new; UPDATE all table with new string id 82 | # WARNING: the 2nd union term handles a string referenced but not present in rocpd_string. 83 | # E.g. a corrupt file. 84 | imp.connection.execute(""" 85 | CREATE TEMPORARY VIEW IF NOT EXISTS mapper as SELECT A.id as before, B.id as after from rocpd_string_original A join rocpd_string B on B.string = A.string UNION ALL select id, 1 from activeString where id not in (select distinct id from rocpd_string_original); 86 | """) 87 | 88 | for column in string_users: 89 | query = f"""UPDATE {column[0]} set {column[1]} = (SELECT after from mapper A where {column[1]}=A.before) """ 90 | #print(query) 91 | imp.connection.execute(query) 92 | 93 | # cleanup 94 | imp.connection.execute(""" 95 | DROP TABLE rocpd_string_original 96 | """) 97 | imp.connection.execute(""" 98 | DROP TABLE "activeString" 99 | """) 100 | 101 | imp.connection.commit() 102 | imp.resumeExisting(imp.connection) # reload state 103 | 104 | if __name__ == "__main__": 105 | 106 | parser = argparse.ArgumentParser(description='Utilities for tidying rocpd_string table') 107 | parser.add_argument('input_rpd', type=str, help="input rpd db") 108 | parser.add_argument('--dedupe', action='store_true', help="Remove duplicate strings") 109 | parser.add_argument('--clean_autograd', action='store_true', help="Remove 'op' and 'seq' tags from strings") 110 | args = parser.parse_args() 111 | 112 | connection = sqlite3.connect(args.input_rpd) 113 | 114 | importData = RocpdImportData() 115 | importData.resumeExisting(connection) # load the current db state 116 | 117 | if args.dedupe or args.clean_autograd: 118 | cleanStrings(importData, args.clean_autograd) 119 | else: 120 | print("No action taken. Check --help") 121 | 122 | -------------------------------------------------------------------------------- /rocpd_python/setup.py: -------------------------------------------------------------------------------- 1 | 2 | ################################################################################ 3 | # Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All rights reserved. 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | ################################################################################ 23 | 24 | from setuptools import setup, find_packages 25 | 26 | setup(name = 'rocpd', 27 | version = '1.0', 28 | description = 'RocmProfileData profiling format', 29 | #packages = find_packages(), 30 | packages = { 'rocpd' }, 31 | include_package_data=True, 32 | python_requires='>=3.6', 33 | zip_safe=False, 34 | ) 35 | -------------------------------------------------------------------------------- /rpd_tracer/ApiIdList.cpp: -------------------------------------------------------------------------------- 1 | 2 | /********************************************************************************* 3 | * Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All rights reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | * THE SOFTWARE. 22 | ********************************************************************************/ 23 | #include "ApiIdList.h" 24 | 25 | //#include 26 | // FIXME: make this work for cud and hip or turn into interface 27 | 28 | ApiIdList::ApiIdList() 29 | : m_invert(true) 30 | { 31 | } 32 | 33 | void ApiIdList::add(const std::string &apiName) 34 | { 35 | uint32_t cid = mapName(apiName); 36 | if (cid > 0) 37 | m_filter[cid] = 1; 38 | #if 0 39 | uint32_t cid = 0; 40 | if (roctracer_op_code(ACTIVITY_DOMAIN_HIP_API, apiName.c_str(), &cid, NULL) == ROCTRACER_STATUS_SUCCESS) 41 | m_filter[cid] = 1; 42 | #endif 43 | } 44 | void ApiIdList::remove(const std::string &apiName) 45 | { 46 | uint32_t cid = mapName(apiName); 47 | if (cid > 0) 48 | m_filter.erase(cid); 49 | #if 0 50 | uint32_t cid = 0; 51 | if (roctracer_op_code(ACTIVITY_DOMAIN_HIP_API, apiName.c_str(), &cid, NULL) == ROCTRACER_STATUS_SUCCESS) 52 | m_filter.erase(cid); 53 | #endif 54 | } 55 | 56 | bool ApiIdList::loadUserPrefs() 57 | { 58 | // FIXME: check an ENV variable that points to an exclude file 59 | return false; 60 | } 61 | bool ApiIdList::contains(uint32_t apiId) 62 | { 63 | return (m_filter.find(apiId) != m_filter.end()) ? !m_invert : m_invert; // XOR 64 | } 65 | -------------------------------------------------------------------------------- /rpd_tracer/ApiIdList.h: -------------------------------------------------------------------------------- 1 | /********************************************************************************* 2 | * Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | ********************************************************************************/ 22 | #pragma once 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | 31 | //#include 32 | 33 | // Usage 34 | // contains() are items you are interested in, i.e. matches the filter 35 | // "normal mode": things you add() are the only things matching the filter 36 | // invertMode() == true: All things match filter except what you add() 37 | 38 | class ApiIdList 39 | { 40 | public: 41 | ApiIdList(); 42 | bool invertMode() { return m_invert; } 43 | void setInvertMode(bool invert) { m_invert = invert; } 44 | void add(const std::string &apiName); 45 | void remove(const std::string &apiName); 46 | bool loadUserPrefs(); 47 | 48 | // Map api string to cnid enum 49 | virtual uint32_t mapName(const std::string &apiName) = 0; 50 | 51 | bool contains(uint32_t apiId); 52 | 53 | const std::unordered_map &filterList() { return m_filter; } 54 | 55 | private: 56 | std::unordered_map m_filter; // apiId -> "1" 57 | bool m_invert; 58 | }; 59 | 60 | -------------------------------------------------------------------------------- /rpd_tracer/BufferedTable.cpp: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Copyright (c) 2023 Advanced Micro Devices, Inc. 3 | **************************************************************************/ 4 | #include "Table.h" 5 | #include "Utility.h" 6 | 7 | #include 8 | 9 | 10 | class BufferedTablePrivate 11 | { 12 | public: 13 | BufferedTablePrivate(BufferedTable *cls) : p(cls) {} 14 | 15 | void work(); // work thread 16 | std::thread *worker; 17 | bool done; 18 | bool workerRunning; 19 | 20 | BufferedTable *p; 21 | }; 22 | 23 | BufferedTable::BufferedTable(const char *basefile, int bufferSize, int batchsize) 24 | : Table(basefile) 25 | , BUFFERSIZE(bufferSize) 26 | , BATCHSIZE(batchsize) 27 | , d(new BufferedTablePrivate(this)) 28 | { 29 | d->done = false; 30 | d->workerRunning = true; 31 | d->worker = new std::thread(&BufferedTablePrivate::work, d); 32 | } 33 | 34 | BufferedTable::~BufferedTable() 35 | { 36 | delete d; 37 | // finalize here? Possibly a second time 38 | } 39 | 40 | 41 | void BufferedTable::flush() 42 | { 43 | std::unique_lock lock(m_mutex); 44 | 45 | // wait for worker to pause 46 | while (d->workerRunning == true) 47 | m_wait.wait(lock); 48 | 49 | // Worker paused, clear the buffer ourselves 50 | auto flushPoint = m_head; 51 | while (flushPoint > m_tail) { 52 | lock.unlock(); 53 | writeRows(); 54 | lock.lock(); 55 | } 56 | 57 | // Table specific flush 58 | flushRows(); // While holding m_mutex 59 | } 60 | 61 | 62 | void BufferedTable::finalize() 63 | { 64 | std::unique_lock lock(m_mutex); 65 | d->done = true; 66 | m_wait.notify_one(); 67 | lock.unlock(); 68 | d->worker->join(); 69 | d->workerRunning = false; 70 | delete d->worker; 71 | 72 | flush(); 73 | } 74 | 75 | 76 | bool BufferedTable::workerRunning() 77 | { 78 | return d->workerRunning; 79 | } 80 | 81 | void BufferedTablePrivate::work() 82 | { 83 | std::unique_lock lock(p->m_mutex); 84 | 85 | while (done == false) { 86 | while ((p->m_head - p->m_tail) >= p->BATCHSIZE) { 87 | lock.unlock(); 88 | p->writeRows(); 89 | p->m_wait.notify_all(); 90 | lock.lock(); 91 | } 92 | workerRunning = false; 93 | if (done == false) 94 | p->m_wait.wait(lock); 95 | workerRunning = true; 96 | } 97 | } 98 | 99 | -------------------------------------------------------------------------------- /rpd_tracer/CuptiDataSource.h: -------------------------------------------------------------------------------- 1 | /********************************************************************************* 2 | * Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | ********************************************************************************/ 22 | #pragma once 23 | 24 | #include 25 | #include 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | #include "DataSource.h" 33 | #include "ApiIdList.h" 34 | 35 | class CudaApiIdList : public ApiIdList 36 | { 37 | public: 38 | CudaApiIdList(); 39 | uint32_t mapName(const std::string &apiName) override; 40 | private: 41 | std::unordered_map m_nameMap; 42 | }; 43 | 44 | class CuptiDataSource : public DataSource 45 | { 46 | public: 47 | //CuptiDataSource(); 48 | void init() override; 49 | void end() override; 50 | void startTracing() override; 51 | void stopTracing() override; 52 | void flush() override; 53 | 54 | private: 55 | CudaApiIdList m_apiList; 56 | 57 | CUpti_SubscriberHandle m_subscriber; 58 | 59 | static void CUPTIAPI api_callback(void *userdata, CUpti_CallbackDomain domain, 60 | CUpti_CallbackId cbid, const CUpti_CallbackData *cbInfo); 61 | 62 | static void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords); 63 | static void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize); 64 | 65 | }; 66 | -------------------------------------------------------------------------------- /rpd_tracer/DataSource.h: -------------------------------------------------------------------------------- 1 | /********************************************************************************* 2 | * Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | ********************************************************************************/ 22 | #pragma once 23 | 24 | //#include "Logger.h" 25 | 26 | class DataSource 27 | { 28 | public: 29 | //DataSource(); 30 | virtual void init() = 0; 31 | virtual void end() = 0; 32 | virtual void startTracing() = 0; 33 | virtual void stopTracing() = 0; 34 | virtual void flush() = 0; 35 | }; 36 | -------------------------------------------------------------------------------- /rpd_tracer/DbResource.cpp: -------------------------------------------------------------------------------- 1 | /********************************************************************************* 2 | * Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | ********************************************************************************/ 22 | #include "DbResource.h" 23 | 24 | #include 25 | 26 | class DbResourcePrivate 27 | { 28 | public: 29 | DbResourcePrivate(DbResource *cls) : p(cls) {} 30 | 31 | sqlite3 *connection; 32 | std::string resourceName; 33 | 34 | bool locked {false}; 35 | 36 | DbResource *p; 37 | 38 | static int resourceCallback(void *data, int argc, char **argv, char **colName); 39 | }; 40 | 41 | 42 | DbResource::DbResource(const std::string &basefile, const std::string &resourceName) 43 | : d(new DbResourcePrivate(this)) 44 | { 45 | sqlite3_open(basefile.c_str(), &d->connection); 46 | d->resourceName = resourceName; 47 | } 48 | 49 | DbResource::~DbResource() 50 | { 51 | unlock(); 52 | sqlite3_close(d->connection); 53 | } 54 | 55 | int DbResourcePrivate::resourceCallback(void *data, int argc, char **argv, char **colName) 56 | { 57 | sqlite3_int64 &resourceId = *(sqlite3_int64*)data; 58 | resourceId = atoll(argv[0]); 59 | return 0; 60 | } 61 | 62 | void DbResource::lock() 63 | { 64 | } 65 | 66 | bool DbResource::tryLock() 67 | { 68 | if (d->locked == false) { 69 | // check if available 70 | int ret; 71 | char *error_msg; 72 | 73 | sqlite3_int64 resourceValue = -1; 74 | ret = sqlite3_exec(d->connection, fmt::format("SELECT value FROM rocpd_metadata WHERE tag = 'resourceLock::{}'", d->resourceName).c_str(), &DbResourcePrivate::resourceCallback, &resourceValue, &error_msg); 75 | if (resourceValue <= 0) { 76 | // Not locked. Lock db and look again 77 | sqlite3_exec(d->connection, "BEGIN EXCLUSIVE TRANSACTION", NULL, NULL, NULL); 78 | resourceValue = -1; 79 | ret = sqlite3_exec(d->connection, fmt::format("SELECT value FROM rocpd_metadata WHERE tag = 'resourceLock::{}'", d->resourceName).c_str(), &DbResourcePrivate::resourceCallback, &resourceValue, &error_msg); 80 | if (resourceValue == -1) { 81 | // Not initialize, "make and take" 82 | ret = sqlite3_exec(d->connection, fmt::format("INSERT into rocpd_metadata(tag, value) VALUES ('resourceLock::{}', 1)", d->resourceName).c_str(), NULL, NULL, &error_msg); 83 | if (ret == SQLITE_OK) 84 | d->locked = true; 85 | } 86 | else if (resourceValue == 0) { 87 | // take resource 88 | ret = sqlite3_exec(d->connection, fmt::format("UPDATE rocpd_metadata SET value = '1' WHERE tag = 'resourceLock::{}'", d->resourceName).c_str(), NULL, NULL, &error_msg); 89 | if (ret == SQLITE_OK) 90 | d->locked = true; 91 | } 92 | sqlite3_exec(d->connection, "END TRANSACTION", NULL, NULL, NULL); 93 | } 94 | } 95 | return d->locked; 96 | } 97 | 98 | void DbResource::unlock() 99 | { 100 | if (d->locked) { 101 | int ret; 102 | char *error_msg; 103 | sqlite3_exec(d->connection, "BEGIN EXCLUSIVE TRANSACTION", NULL, NULL, NULL); 104 | ret = sqlite3_exec(d->connection, fmt::format("UPDATE rocpd_metadata SET value = '0' WHERE tag = 'resourceLock::{}'", d->resourceName).c_str(), NULL, NULL, &error_msg); 105 | sqlite3_exec(d->connection, "END TRANSACTION", NULL, NULL, NULL); 106 | } 107 | } 108 | 109 | bool DbResource::isLocked() 110 | { 111 | return d->locked; 112 | } 113 | -------------------------------------------------------------------------------- /rpd_tracer/DbResource.h: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Copyright (c) 2023 Advanced Micro Devices, Inc. 3 | **************************************************************************/ 4 | #pragma once 5 | 6 | #include 7 | #include 8 | 9 | class DbResourcePrivate; 10 | class DbResource 11 | { 12 | public: 13 | DbResource(const std::string &basefile, const std::string &resourceName); 14 | ~DbResource(); 15 | 16 | void lock(); 17 | bool tryLock(); 18 | void unlock(); 19 | 20 | bool isLocked(); 21 | 22 | private: 23 | DbResourcePrivate *d; 24 | friend class DbResourcePrivate; 25 | }; 26 | -------------------------------------------------------------------------------- /rpd_tracer/Logger.h: -------------------------------------------------------------------------------- 1 | /********************************************************************************* 2 | * Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | ********************************************************************************/ 22 | #pragma once 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | #include "Table.h" 30 | #include "DataSource.h" 31 | 32 | const sqlite_int64 EMPTY_STRING_ID = 1; 33 | 34 | class Logger 35 | { 36 | public: 37 | //Logger(); 38 | static Logger& singleton(); 39 | 40 | // Table writer classes. Used directly by DataSources 41 | MetadataTable &metadataTable() { return *m_metadataTable; } 42 | StringTable &stringTable() { return *m_stringTable; } 43 | OpTable &opTable() { return *m_opTable; } 44 | KernelApiTable &kernelApiTable() { return *m_kernelApiTable; } 45 | CopyApiTable ©ApiTable() { return *m_copyApiTable; } 46 | ApiTable &apiTable() { return *m_apiTable; } 47 | MonitorTable &monitorTable() { return *m_monitorTable; } 48 | StackFrameTable &stackFrameTable() { return *m_stackFrameTable; } 49 | 50 | // External control to stop/stop logging 51 | void rpdstart(); 52 | void rpdstop(); 53 | void rpdflush(); 54 | 55 | // External maker api 56 | void rpd_rangePush(const char *domain, const char *apiName, const char* args); 57 | void rpd_rangePop(); 58 | 59 | // Insert an api event. Used to log internal state or performance 60 | void createOverheadRecord(uint64_t start, uint64_t end, const std::string &name, const std::string &args); 61 | 62 | 63 | // Used on library load and unload. 64 | // Needs assistance from DataSources to avoid shutdown corruption 65 | static void rpdInit() __attribute__((constructor)); 66 | static void rpdFinalize() __attribute__((destructor)); 67 | 68 | const std::string filename() { return m_filename; }; 69 | bool writeStackFrames() { return m_writeStackFrames; }; 70 | 71 | private: 72 | int m_activeCount {0}; 73 | std::mutex m_activeMutex; 74 | 75 | std::deque m_sources; 76 | 77 | MetadataTable *m_metadataTable {nullptr}; 78 | StringTable *m_stringTable {nullptr}; 79 | OpTable *m_opTable {nullptr}; 80 | KernelApiTable *m_kernelApiTable {nullptr}; 81 | CopyApiTable *m_copyApiTable {nullptr}; 82 | ApiTable *m_apiTable {nullptr}; 83 | MonitorTable *m_monitorTable {nullptr}; 84 | StackFrameTable *m_stackFrameTable {nullptr}; 85 | 86 | void init(); 87 | void finalize(); 88 | 89 | std::string m_filename; 90 | bool m_writeOverheadRecords {true}; 91 | bool m_writeStackFrames {false}; 92 | 93 | bool m_done {false}; 94 | int m_period{1}; 95 | std::thread *m_worker {nullptr}; 96 | void autoflushWorker(); 97 | }; 98 | -------------------------------------------------------------------------------- /rpd_tracer/Makefile: -------------------------------------------------------------------------------- 1 | 2 | PREFIX = /usr/local 3 | 4 | HIP_PATH?= $(wildcard /opt/rocm/) 5 | CUDA_PATH?= $(wildcard /usr/local/cuda/) 6 | CPPTRACE_INCLUDE_PATH?= $(wildcard ../cpptrace_install/include) 7 | 8 | HIPCC=$(HIP_PATH)/bin/hipcc 9 | 10 | TARGET=hcc 11 | 12 | RPD_LIBS = -lsqlite3 -lfmt 13 | RPD_INCLUDES = 14 | RPD_SRCS = Table.cpp BufferedTable.cpp OpTable.cpp KernelApiTable.cpp CopyApiTable.cpp ApiTable.cpp StringTable.cpp MetadataTable.cpp MonitorTable.cpp StackFrameTable.cpp ApiIdList.cpp DbResource.cpp Logger.cpp Unwind.cpp 15 | 16 | ifneq (,$(HIP_PATH)) 17 | $(info Building with roctracer) 18 | RPD_LIBS += -L/opt/rocm/lib -lroctracer64 -lroctx64 -lamdhip64 -lrocm_smi64 19 | RPD_INCLUDES += -I/opt/rocm/include -I/opt/rocm/include/roctracer -I/opt/rocm/include/hsa 20 | RPD_SRCS += RoctracerDataSource.cpp #RocmSmiDataSource.cpp 21 | RPD_INCLUDES += -D__HIP_PLATFORM_AMD__ 22 | endif 23 | 24 | ifneq ($(CUDA_PATH),) 25 | $(info Building with cupti) 26 | RPD_LIBS += -L/usr/local/cuda/lib64 -lcupti 27 | RPD_INCLUDES += -I/usr/local/cuda/include 28 | RPD_SRCS += CuptiDataSource.cpp 29 | endif 30 | 31 | ifneq ($(CPPTRACE_INCLUDE_PATH),) 32 | $(info Building with cpptrace) 33 | RPD_INCLUDES += -DRPD_STACKFRAME_SUPPORT 34 | RPD_LIBS += -L$(CPPTRACE_INCLUDE_PATH)/../lib -lcpptrace -ldwarf -lz -lzstd -ldl 35 | RPD_INCLUDES += -I $(CPPTRACE_INCLUDE_PATH) 36 | endif 37 | 38 | RPD_OBJS = $(RPD_SRCS:.cpp=.o) 39 | 40 | 41 | RPD_MAIN = librpd_tracer.so 42 | RPD_SCRIPT = runTracer.sh loadTracer.sh 43 | 44 | PYTHON = python3 45 | PIP = pip3 46 | 47 | 48 | all: | $(RPD_MAIN) 49 | 50 | .PHONY: all 51 | 52 | 53 | $(RPD_MAIN): $(RPD_OBJS) 54 | $(CXX) -o $@ $^ -shared -rdynamic -std=c++11 $(RPD_LIBS) -g 55 | 56 | .cpp.o: 57 | $(CXX) -o $@ -c $< $(RPD_INCLUDES) -DAMD_INTERNAL_BUILD -std=c++11 -fPIC -g -O3 58 | 59 | #$(PREFIX)/lib/lib$(RPD_MAIN): 60 | # ln -s $(PREFIX)/lib/$(RPD_MAIN) $@ 61 | 62 | .PHONY: install 63 | #install: | $(PREFIX)/lib/lib$(RPD_MAIN) 64 | install: all 65 | cp $(RPD_MAIN) $(PREFIX)/lib/ 66 | cp $(RPD_SCRIPT) $(PREFIX)/bin/ 67 | ldconfig 68 | $(PYTHON) setup.py install 69 | 70 | .PHONY: uninstall 71 | uninstall: 72 | rm $(PREFIX)/lib/$(RPD_MAIN) 73 | rm $(PREFIX)/bin/$(RPD_SCRIPT) 74 | .PHONY: clean 75 | clean: 76 | rm -f *.o *.so 77 | -------------------------------------------------------------------------------- /rpd_tracer/MetadataTable.cpp: -------------------------------------------------------------------------------- 1 | /********************************************************************************* 2 | * Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | ********************************************************************************/ 22 | #include "Table.h" 23 | 24 | #include 25 | 26 | #include "Utility.h" 27 | 28 | 29 | //const char *SCHEMA_OP = "CREATE TEMPORARY TABLE \"temp_rocpd_op\" (\"id\" integer NOT NULL PRIMARY KEY AUTOINCREMENT, \"gpuId\" integer NOT NULL, \"queueId\" integer NOT NULL, \"sequenceId\" integer NOT NULL, \"completionSignal\" varchar(18) NOT NULL, \"start\" integer NOT NULL, \"end\" integer NOT NULL, \"description_id\" integer NOT NULL REFERENCES \"rocpd_string\" (\"id\") DEFERRABLE INITIALLY DEFERRED, \"opType_id\" integer NOT NULL REFERENCES \"rocpd_string\" (\"id\") DEFERRABLE INITIALLY DEFERRED)"; 30 | 31 | //const char *SCHEMA_API_OPS = "CREATE TEMPORARY TABLE \"temp_rocpd_api_ops\" (\"id\" integer NOT NULL PRIMARY KEY AUTOINCREMENT, \"api_id\" integer NOT NULL REFERENCES \"rocpd_api\" (\"id\") DEFERRABLE INITIALLY DEFERRED, \"op_id\" integer NOT NULL REFERENCES \"rocpd_op\" (\"id\") DEFERRABLE INITIALLY DEFERRED)"; 32 | 33 | 34 | class MetadataTablePrivate 35 | { 36 | public: 37 | MetadataTablePrivate(MetadataTable *cls) : p(cls) {} 38 | 39 | sqlite3_stmt *sessionInsert; 40 | 41 | sqlite3_int64 sessionId; 42 | void createSession(); 43 | 44 | MetadataTable *p; 45 | }; 46 | 47 | int sessionCallback(void *data, int argc, char **argv, char **colName) 48 | { 49 | sqlite3_int64 &sessionId = *(sqlite3_int64*)data; 50 | sessionId = atoll(argv[0]); 51 | return 0; 52 | } 53 | 54 | MetadataTable::MetadataTable(const char *basefile) 55 | : Table(basefile) 56 | , d(new MetadataTablePrivate(this)) 57 | { 58 | d->createSession(); 59 | } 60 | 61 | void MetadataTable::flush() 62 | { 63 | } 64 | 65 | void MetadataTable::finalize() 66 | { 67 | } 68 | 69 | sqlite3_int64 MetadataTable::sessionId() 70 | { 71 | return d->sessionId; 72 | } 73 | 74 | 75 | void MetadataTablePrivate::createSession() 76 | { 77 | int ret; 78 | sqlite3_exec(p->m_connection, "BEGIN EXCLUSIVE TRANSACTION", NULL, NULL, NULL); 79 | // get or create session count property 80 | 81 | sqlite3_int64 sessionId = -1; 82 | char *error_msg; 83 | ret = sqlite3_exec(p->m_connection, "SELECT value FROM rocpd_metadata WHERE tag = 'session_count'", &sessionCallback, &sessionId, &error_msg); 84 | if (sessionId == -1) { 85 | sessionId = 0; 86 | ret = sqlite3_exec(p->m_connection, "INSERT into rocpd_metadata(tag, value) VALUES ('session_count', 1)", NULL, NULL, &error_msg); 87 | } 88 | else { 89 | char buff[4096]; 90 | std::snprintf(buff, 4096, "UPDATE rocpd_metadata SET value = '%lld' WHERE tag = 'session_count'", sessionId + 1); 91 | ret = sqlite3_exec(p->m_connection, buff, NULL, NULL, &error_msg); 92 | } 93 | 94 | sqlite3_exec(p->m_connection, "END TRANSACTION", NULL, NULL, NULL); 95 | 96 | //printf("Opening session: %lld\n", sessionId); 97 | fflush(stdout); 98 | 99 | this->sessionId = sessionId; 100 | } 101 | -------------------------------------------------------------------------------- /rpd_tracer/README.md: -------------------------------------------------------------------------------- 1 | # RPD_TRACER 2 | 3 | This is a tracer that can attach to any process and record hip apis, ops, and roctx. 4 | 5 | 6 | ## Steps: 7 | 1) `cd` to the rocmProfileData root directory 8 | 2) Run `make; make install` 9 | 3) Run `runTracer.sh -o .rpd `. 10 | 4) `python tools/rpd2tracing .rpd .json` for Chrome tracing output. 11 | 12 | 13 | Manual Stuff: 14 | - Use 'LD_PRELOAD=./librpd_tracer.so' to attach the profiler to any process 15 | - Default output file name is 'trace.rpd' 16 | - Override file name with env 'RPDT_FILENAME=' 17 | - Create empty rpd file with python3 -m rocpd.schema --create ${OUTPUT_FILE} 18 | - Multiple processes can log to the same file concurrently 19 | - Files can be appended any number of times 20 | 21 | ## Example 22 | This example shows how to dynamically link `librpd_tracer.so` file to your application. 23 | 24 | 1) Make sure you run step 2 above to install rpd utilities. 25 | 2) Create empty rpd file with `python3 -m rocpd.schema --create ${OUTPUT_FILE}`. Here `${OUTPUT_FILE}` a rpd file. 26 | 3) If you are using CMake to build your application, add `target_link_libraries(/opt/rocm/lib/libroctracer64.so ${CMAKE_DL_LIBS})`. This will allow you to load libraries for `libroctracer` and `DL`. 27 | 4) In your app make the following changes: 28 | 1) Declare pointer for `dlopen()` function. Somewhere at the beginning of your code: 29 | ``` 30 | void* rocTracer_lib; 31 | ``` 32 | 2) Define function type of the functions you are interested in calling, and declare them: 33 | ``` 34 | typedef void (*rt_func_t)(); 35 | rt_func_t init_tracing; 36 | rt_func_t start_tracing; 37 | rt_func_t stop_tracing; 38 | ``` 39 | 3) Load dynamic library file: 40 | ``` 41 | rocTracer_lib = dlopen("librpd_tracer.so", RTLD_LAZY); //defer resolution until the first reference via RTLD_LAZY 42 | std::cout << rocTracer_lib << std::endl; // points to some memory location, e.g. 0x89a... 43 | if (!rocTracer_lib) { 44 | fputs (dlerror(), stderr); 45 | exit(1); 46 | } 47 | ``` 48 | 4) Initializing/starting/stopping follow the same syntax, below shows example for `init_tracing()` function, place them appropriately in your application: 49 | ``` 50 | dlerror(); //clear any previous errors. 51 | init_tracing = (rt_func_t)dlsym(rocTracer_lib, "_Z12init_tracingv"); 52 | if(!init_tracing) 53 | { 54 | std::cout << init_tracing << std::endl; 55 | fputs(dlerror(), stderr); 56 | exit(1); 57 | } 58 | ``` 59 | 5) You can run the function you linked via `dlsym` as (following the example in above step): 60 | ``` 61 | init_tracing(); 62 | ``` 63 | Note: You can utilize `nm -gd ` to find out symbol names in your library. 64 | -------------------------------------------------------------------------------- /rpd_tracer/RocmSmiDataSource.cpp: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Copyright (c) 2022 Advanced Micro Devices, Inc. 3 | **************************************************************************/ 4 | #include "RocmSmiDataSource.h" 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "rocm_smi/rocm_smi.h" 11 | 12 | #include 13 | 14 | #include 15 | 16 | #include "Logger.h" 17 | #include "Utility.h" 18 | 19 | 20 | // Create a factory for the Logger to locate and use 21 | extern "C" { 22 | DataSource *RocmSmiDataSourceFactory() { return new RocmSmiDataSource(); } 23 | } // extern "C" 24 | 25 | 26 | 27 | void RocmSmiDataSource::init() 28 | { 29 | rsmi_status_t ret; 30 | ret = rsmi_init(0); 31 | 32 | #if 0 33 | uint32_t num_devices; 34 | uint16_t dev_id; 35 | 36 | rsmi_num_monitor_devices(&num_devices); 37 | for (int i = 0; i < num_devices; ++i) { 38 | rsmi_dev_id_get(i, &dev_id); 39 | fprintf(stderr, "device: %d\n", dev_id); 40 | } 41 | #endif 42 | 43 | m_done = false; 44 | m_period = 1000; 45 | 46 | m_resource = new DbResource(Logger::singleton().filename(), std::string("smi_logger_active")); 47 | m_worker = new std::thread(&RocmSmiDataSource::work, this); 48 | } 49 | 50 | void RocmSmiDataSource::end() 51 | { 52 | std::unique_lock lock(m_mutex); 53 | m_done = true; 54 | lock.unlock(); 55 | m_worker->join(); 56 | delete m_worker; 57 | 58 | m_resource->unlock(); 59 | 60 | rsmi_status_t ret; 61 | ret = rsmi_shut_down(); 62 | } 63 | 64 | void RocmSmiDataSource::startTracing() 65 | { 66 | std::unique_lock lock(m_mutex); 67 | m_loggingActive = true; 68 | } 69 | 70 | void RocmSmiDataSource::stopTracing() 71 | { 72 | std::unique_lock lock(m_mutex); 73 | m_loggingActive = false; 74 | 75 | // Tell the monitor table that it should terminate any outstanding ranges... 76 | // since we are paused/stopped. 77 | Logger &logger = Logger::singleton(); 78 | logger.monitorTable().endCurrentRuns(clocktime_ns()); 79 | } 80 | 81 | void RocmSmiDataSource::flush() { 82 | Logger &logger = Logger::singleton(); 83 | logger.monitorTable().endCurrentRuns(clocktime_ns()); 84 | } 85 | 86 | 87 | void RocmSmiDataSource::work() 88 | { 89 | Logger &logger = Logger::singleton(); 90 | std::unique_lock lock(m_mutex); 91 | 92 | sqlite3_int64 startTime = clocktime_ns()/1000; 93 | 94 | bool haveResource = m_resource->tryLock(); 95 | 96 | while (m_done == false) { 97 | if (haveResource && m_loggingActive) { 98 | lock.unlock(); 99 | 100 | uint32_t num_devices = 1; 101 | uint16_t dev_id = 0; 102 | 103 | rsmi_num_monitor_devices(&num_devices); 104 | for (int i = 0; i < num_devices; ++i) { 105 | rsmi_status_t ret; 106 | 107 | #if 1 108 | rsmi_frequencies_t freqs; 109 | ret = rsmi_dev_gpu_clk_freq_get(i, RSMI_CLK_TYPE_SYS, &freqs); 110 | if (ret == RSMI_STATUS_SUCCESS) { 111 | MonitorTable::row mrow; 112 | mrow.deviceId = i; 113 | mrow.deviceType = "gpu"; // FIXME, use enums or somthing fancy 114 | mrow.monitorType = "sclk"; // FIXME, use enums or somthing fancy 115 | mrow.start = clocktime_ns(); 116 | mrow.end = 0; 117 | mrow.value = fmt::format("{}", freqs.frequency[freqs.current] / 1000000); 118 | logger.monitorTable().insert(mrow); 119 | } 120 | #endif 121 | #if 0 122 | uint64_t pow; 123 | ret = rsmi_dev_power_ave_get(i, 0, &pow); 124 | if (ret == RSMI_STATUS_SUCCESS) { 125 | MonitorTable::row mrow; 126 | mrow.deviceId = i; 127 | mrow.deviceType = "gpu"; // FIXME, use enums or somthing fancy 128 | mrow.monitorType = "power"; // FIXME, use enums or somthing fancy 129 | mrow.start = clocktime_ns(); 130 | mrow.end = 0; 131 | mrow.value = fmt::format("{}", pow / 1000000.0); 132 | logger.monitorTable().insert(mrow); 133 | } 134 | #endif 135 | #if 0 136 | int64_t temp; 137 | ret = rsmi_dev_temp_metric_get(i, RSMI_TEMP_TYPE_FIRST, RSMI_TEMP_CURRENT, &temp); 138 | if (ret == RSMI_STATUS_SUCCESS) { 139 | MonitorTable::row mrow; 140 | mrow.deviceId = i; 141 | mrow.deviceType = "gpu"; // FIXME, use enums or somthing fancy 142 | mrow.monitorType = "temp"; // FIXME, use enums or somthing fancy 143 | mrow.start = clocktime_ns(); 144 | mrow.end = 0; 145 | mrow.value = fmt::format("{}", temp/1000); 146 | logger.monitorTable().insert(mrow); 147 | } 148 | #endif 149 | } 150 | lock.lock(); 151 | } 152 | 153 | sqlite3_int64 sleepTime = startTime + m_period - clocktime_ns()/1000; 154 | sleepTime = (sleepTime > 0) ? sleepTime : 0; 155 | // sleep longer if we aren't the active instance 156 | if (haveResource == false) 157 | sleepTime += m_period * 10; 158 | lock.unlock(); 159 | usleep(sleepTime); 160 | lock.lock(); 161 | // Try to become the active logging instance 162 | if (haveResource == false) { 163 | haveResource = m_resource->tryLock(); 164 | } 165 | startTime = clocktime_ns()/1000; 166 | } 167 | } 168 | -------------------------------------------------------------------------------- /rpd_tracer/RocmSmiDataSource.h: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Copyright (c) 2022 Advanced Micro Devices, Inc. 3 | **************************************************************************/ 4 | #pragma once 5 | 6 | #include "DataSource.h" 7 | #include "DbResource.h" 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | 15 | class RocmSmiDataSource : public DataSource 16 | { 17 | public: 18 | //RoctracerDataSource(); 19 | void init() override; 20 | void end() override; 21 | void startTracing() override; 22 | void stopTracing() override; 23 | virtual void flush() override; 24 | 25 | private: 26 | std::mutex m_mutex; 27 | std::condition_variable m_wait; 28 | bool m_loggingActive {false}; 29 | DbResource *m_resource {nullptr}; 30 | 31 | void work(); // work thread 32 | std::thread *m_worker {nullptr}; 33 | volatile bool m_done {false}; 34 | bool m_workerRunning {false}; 35 | sqlite3_int64 m_period { 10000 }; 36 | }; 37 | 38 | -------------------------------------------------------------------------------- /rpd_tracer/RoctracerDataSource.h: -------------------------------------------------------------------------------- 1 | /********************************************************************************* 2 | * Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | ********************************************************************************/ 22 | #pragma once 23 | 24 | #include 25 | 26 | #include 27 | #include 28 | #include 29 | 30 | #include "DataSource.h" 31 | #include "ApiIdList.h" 32 | #include "Logger.h" 33 | 34 | class RocmApiIdList : public ApiIdList 35 | { 36 | public: 37 | RocmApiIdList() { ; } 38 | uint32_t mapName(const std::string &apiName) override; 39 | }; 40 | 41 | 42 | class RoctracerDataSource : public DataSource 43 | { 44 | public: 45 | //RoctracerDataSource(); 46 | void init() override; 47 | void end() override; 48 | void startTracing() override; 49 | void stopTracing() override; 50 | void flush() override; 51 | 52 | private: 53 | RocmApiIdList m_apiList; 54 | 55 | roctracer_pool_t *m_hccPool{nullptr}; 56 | static void api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg); 57 | static void hcc_activity_callback(const char* begin, const char* end, void* arg); 58 | }; 59 | -------------------------------------------------------------------------------- /rpd_tracer/StackFrameTable.cpp: -------------------------------------------------------------------------------- 1 | /********************************************************************************* 2 | * Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | ********************************************************************************/ 22 | #include "Table.h" 23 | 24 | #include 25 | #include 26 | #include 27 | 28 | #include "rpd_tracer.h" 29 | #include "Utility.h" 30 | 31 | 32 | const char *SCHEMA = R"sql(CREATE TEMPORARY TABLE "temp_rocpd_stackframe" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "api_ptr_id" integer NOT NULL REFERENCES "rocpd_api" ("id") DEFERRABLE INITIALLY DEFERRED, "depth" integer NOT NULL, "name_id" integer NOT NULL REFERENCES "rocpd_string" ("id") DEFERRABLE INITIALLY DEFERRED);)sql"; 33 | 34 | class StackFrameTablePrivate 35 | { 36 | public: 37 | StackFrameTablePrivate(StackFrameTable *cls) : p(cls) {} 38 | static const int BUFFERSIZE = 4096 * 4; 39 | static const int BATCHSIZE = 4096; // rows per transaction 40 | std::array rows; // Circular buffer 41 | 42 | sqlite3_stmt *insertStatement; 43 | 44 | StackFrameTable *p; 45 | }; 46 | 47 | 48 | StackFrameTable::StackFrameTable(const char *basefile) 49 | : BufferedTable(basefile, StackFrameTablePrivate::BUFFERSIZE, StackFrameTablePrivate::BATCHSIZE) 50 | , d(new StackFrameTablePrivate(this)) 51 | { 52 | int ret; 53 | // set up tmp table 54 | 55 | ret = sqlite3_exec(m_connection, SCHEMA, NULL, NULL, NULL); 56 | // prepare queries to insert row 57 | ret = sqlite3_prepare_v2(m_connection, "insert into temp_rocpd_stackframe(api_ptr_id, depth, name_id) values (?,?,?)", -1, &d->insertStatement, NULL); 58 | } 59 | 60 | 61 | StackFrameTable::~StackFrameTable() 62 | { 63 | delete d; 64 | } 65 | 66 | 67 | void StackFrameTable::insert(const StackFrameTable::row &row) 68 | { 69 | std::unique_lock lock(m_mutex); 70 | while (m_head - m_tail >= StackFrameTablePrivate::BUFFERSIZE) { 71 | // buffer is full; insert in-line or wait 72 | m_wait.notify_one(); // make sure working is running 73 | m_wait.wait(lock); 74 | } 75 | 76 | d->rows[(++m_head) % StackFrameTablePrivate::BUFFERSIZE] = row; 77 | 78 | if (workerRunning() == false && (m_head - m_tail) >= StackFrameTablePrivate::BATCHSIZE) { 79 | lock.unlock(); 80 | m_wait.notify_one(); 81 | } 82 | } 83 | 84 | 85 | void StackFrameTable::flushRows() 86 | { 87 | int ret = 0; 88 | ret = sqlite3_exec(m_connection, "begin transaction", NULL, NULL, NULL); 89 | ret = sqlite3_exec(m_connection, "insert into rocpd_stackframe select * from temp_rocpd_stackframe", NULL, NULL, NULL); 90 | fprintf(stderr, "rocpd_stackframe: %d\n", ret); 91 | ret = sqlite3_exec(m_connection, "delete from temp_rocpd_stackframe", NULL, NULL, NULL); 92 | ret = sqlite3_exec(m_connection, "commit", NULL, NULL, NULL); 93 | } 94 | 95 | 96 | void StackFrameTable::writeRows() 97 | { 98 | std::unique_lock wlock(m_writeMutex); 99 | std::unique_lock lock(m_mutex); 100 | 101 | if (m_head == m_tail) 102 | return; 103 | 104 | const timestamp_t cb_begin_time = clocktime_ns(); 105 | 106 | int start = m_tail + 1; 107 | int end = m_tail + BATCHSIZE; 108 | end = (end > m_head) ? m_head : end; 109 | lock.unlock(); 110 | 111 | sqlite3_exec(m_connection, "BEGIN DEFERRED TRANSACTION", NULL, NULL, NULL); 112 | 113 | for (int i = start; i <= end; ++i) { 114 | int index = 1; 115 | StackFrameTable::row &r = d->rows[i % BUFFERSIZE]; 116 | 117 | sqlite3_bind_int64(d->insertStatement, index++, r.api_id + m_idOffset); 118 | sqlite3_bind_int(d->insertStatement, index++, r.depth); 119 | sqlite3_bind_int64(d->insertStatement, index++, r.name_id); 120 | int ret = sqlite3_step(d->insertStatement); 121 | sqlite3_reset(d->insertStatement); 122 | } 123 | lock.lock(); 124 | m_tail = end; 125 | lock.unlock(); 126 | 127 | //const timestamp_t cb_mid_time = util::HsaTimer::clocktime_ns(util::HsaTimer::TIME_ID_CLOCK_MONOTONIC); 128 | sqlite3_exec(m_connection, "END TRANSACTION", NULL, NULL, NULL); 129 | //const timestamp_t cb_end_time = util::HsaTimer::clocktime_ns(util::HsaTimer::TIME_ID_CLOCK_MONOTONIC); 130 | // FIXME 131 | const timestamp_t cb_end_time = clocktime_ns(); 132 | char buff[4096]; 133 | std::snprintf(buff, 4096, "count=%d | remaining=%d", end - start + 1, m_head - m_tail); 134 | createOverheadRecord(cb_begin_time, cb_end_time, "StackFrameTable::writeRows", buff); 135 | } 136 | -------------------------------------------------------------------------------- /rpd_tracer/Table.cpp: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Copyright (c) 2023 Advanced Micro Devices, Inc. 3 | **************************************************************************/ 4 | #include "Table.h" 5 | #include "Utility.h" 6 | 7 | int busy_handler(void *data, int count) 8 | { 9 | count = (count < 9) ? count : 8; 10 | usleep(1000 * (0x1 << count)); 11 | return 1; 12 | } 13 | 14 | Table::Table(const char *basefile) 15 | : m_connection(NULL) 16 | { 17 | //pthread_mutex_init(m_mutex); 18 | //pthread_cond_init(m_wait); 19 | sqlite3_open(basefile, &m_connection); 20 | //sqlite3_busy_timeout(m_connection, 10000); 21 | sqlite3_busy_handler(m_connection, &busy_handler, NULL); 22 | } 23 | 24 | Table::~Table() 25 | { 26 | // FIXME: ensure these aren't in use 27 | //pthread_mutex_destroy(m_mutex); 28 | //pthread_cond_destroy(m_wait); 29 | 30 | sqlite3_close(m_connection); 31 | } 32 | 33 | void Table::setIdOffset(sqlite3_int64 offset) 34 | { 35 | m_idOffset = offset; 36 | } 37 | -------------------------------------------------------------------------------- /rpd_tracer/Unwind.cpp: -------------------------------------------------------------------------------- 1 | /********************************************************************************* 2 | * Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | ********************************************************************************/ 22 | 23 | #include 24 | #include "Logger.h" 25 | 26 | #ifdef RPD_STACKFRAME_SUPPORT 27 | #include 28 | #include 29 | 30 | // FIXME: can we avoid shutdown corruption? 31 | // Other rocm libraries crashing on unload 32 | // libsqlite unloading before we are done using it 33 | // Current workaround: register an onexit function when first activity is delivered back 34 | // this let's us unload first, or close to. 35 | // New workaround: register 4 times, only finalize once. see register_once 36 | 37 | static std::once_flag registerDoubleAgain_once; 38 | 39 | int unwind(Logger &logger, const char *api, const sqlite_int64 api_id) { 40 | 41 | if (!logger.writeStackFrames()) return 0; 42 | 43 | #if 0 44 | // for reference: full stack w/o manipulations 45 | const std::string stack1 = cpptrace::generate_trace(0).to_string(false); 46 | std::cout << stack1 << std::endl; 47 | if (true) return 0; 48 | #endif 49 | 50 | // strip out the top frames that only point into roctracer/rpd, do not add color 51 | const std::string stack = cpptrace::generate_trace(3).to_string(false); 52 | /* 53 | * returns: 54 | * Stack trace (most recent call first): 55 | * #0 0x00007f3a1d2f4447 at /opt/rocm/lib/libamdhip64.so.6 56 | * #1 0x00000000002055cf in main_foo(int, char**) at /root/rocm-examples/Applications/bitonic_sort/main.hip:170:5 57 | * #2 0x00007f3a1cb561c9 in __libc_start_call_main at ./csu/../sysdeps/nptl/libc_start_call_main.h:58:16 58 | * #3 0x00007f3a1cb5628a in __libc_start_main_impl at ./csu/../csu/libc-start.c:360:3 59 | * #4 0x0000000000204b04 at /root/rocm-examples/Applications/bitonic_sort/applications_bitonic_sort 60 | * 61 | * need to get rid of the first line 62 | * should inject api into #0 frame as "in $api at" 63 | */ 64 | std::istringstream iss(stack); 65 | std::string line; 66 | std::getline(iss, line); // get rid of "Stack trace (most recent call first):" 67 | 68 | std::getline(iss, line); 69 | std::string s1 = line.substr(0,21); 70 | std::string s2 = line.substr(21); 71 | 72 | std::string fixed = s1 + " in " + api + "()" + s2; 73 | 74 | StackFrameTable::row frame0; 75 | frame0.api_id = api_id; 76 | frame0.depth = 0; 77 | frame0.name_id = logger.stringTable().getOrCreate(fixed.c_str()); 78 | logger.stackFrameTable().insert(frame0); 79 | 80 | int n = 1; 81 | while ( std::getline(iss, line) ) { 82 | if (line.empty()) 83 | continue; 84 | StackFrameTable::row frame; 85 | frame.api_id = api_id; 86 | frame.depth = n; 87 | frame.name_id = logger.stringTable().getOrCreate(line.c_str()); 88 | logger.stackFrameTable().insert(frame); 89 | 90 | n++; 91 | } 92 | 93 | std::call_once(registerDoubleAgain_once, atexit, Logger::rpdFinalize); 94 | 95 | return 0; 96 | } 97 | 98 | #else 99 | 100 | int unwind(Logger &logger, const char *api, const sqlite_int64 api_id) { 101 | return 0; 102 | } 103 | 104 | #endif 105 | 106 | -------------------------------------------------------------------------------- /rpd_tracer/Utility.h: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Copyright (c) 2023 Advanced Micro Devices, Inc. 3 | **************************************************************************/ 4 | #pragma once 5 | 6 | #include 7 | #include /* For SYS_xxx definitions */ 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | 14 | typedef uint64_t timestamp_t; 15 | 16 | static inline uint32_t GetPid() 17 | { 18 | thread_local uint32_t pid = syscall(__NR_getpid); 19 | return pid; 20 | } 21 | 22 | static inline uint32_t GetTid() 23 | { 24 | thread_local uint32_t tid = syscall(__NR_gettid); 25 | return tid; 26 | } 27 | 28 | // C++ symbol demangle 29 | static inline const char* cxx_demangle(const char* symbol) { 30 | size_t funcnamesize; 31 | int status; 32 | const char* ret = (symbol != NULL) ? abi::__cxa_demangle(symbol, NULL, &funcnamesize, &status) : symbol; 33 | return (ret != NULL) ? ret : symbol; 34 | } 35 | 36 | static timestamp_t timespec_to_ns(const timespec& time) { 37 | return ((timestamp_t)time.tv_sec * 1000000000) + time.tv_nsec; 38 | } 39 | 40 | static timestamp_t clocktime_ns() { 41 | timespec ts; 42 | clock_gettime(CLOCK_MONOTONIC, &ts); 43 | return ((timestamp_t)ts.tv_sec * 1000000000) + ts.tv_nsec; 44 | } 45 | 46 | void createOverheadRecord(uint64_t start, uint64_t end, const std::string &name, const std::string &args); 47 | 48 | class Logger; 49 | int unwind(Logger &logger, const char *api, const sqlite_int64 api_id); 50 | -------------------------------------------------------------------------------- /rpd_tracer/loadTracer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ################################################################################ 3 | # Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All rights reserved. 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | ################################################################################ 23 | OUTPUT_FILE="trace.rpd" 24 | 25 | if [ "$1" = "-o" ] ; then 26 | OUTPUT_FILE=$2 27 | shift 28 | shift 29 | fi 30 | 31 | if [ -e ${OUTPUT_FILE} ] ; then 32 | rm ${OUTPUT_FILE} 33 | fi 34 | 35 | python3 -m rocpd.schema --create ${OUTPUT_FILE} 36 | if [ $? != 0 ] ; then 37 | echo "Error: Could not create rpd file. Please run 'python setup.py install' from the rocpd_python dir" 38 | exit 39 | fi 40 | 41 | export RPDT_FILENAME=${OUTPUT_FILE} 42 | export RPDT_AUTOSTART=0 43 | LD_PRELOAD=librpd_tracer.so "$@" 44 | -------------------------------------------------------------------------------- /rpd_tracer/rpd_tracer.h: -------------------------------------------------------------------------------- 1 | /********************************************************************************* 2 | * Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | ********************************************************************************/ 22 | #pragma once 23 | 24 | #include 25 | 26 | extern "C" { 27 | void rpdstart(); 28 | void rpdstop(); 29 | void rpdflush(); 30 | void rpd_mark(const char *domain, const char *apiName, const char* args); 31 | void rpd_rangePush(const char *domain, const char *apiName, const char* args); 32 | void rpd_rangePop(); 33 | } 34 | 35 | void createOverheadRecord(uint64_t start, uint64_t end, const std::string &name, const std::string &args); 36 | -------------------------------------------------------------------------------- /rpd_tracer/runTracer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ################################################################################ 3 | # Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All rights reserved. 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | ################################################################################ 23 | OUTPUT_FILE="trace.rpd" 24 | 25 | if [ "$1" = "-o" ] ; then 26 | OUTPUT_FILE=$2 27 | shift 28 | shift 29 | fi 30 | 31 | if [ -e ${OUTPUT_FILE} ] ; then 32 | rm ${OUTPUT_FILE} 33 | fi 34 | 35 | python3 -m rocpd.schema --create ${OUTPUT_FILE} 36 | if [ $? != 0 ] ; then 37 | echo "Error: Could not create rpd file. Please run 'python setup.py install' from the rocpd_python dir" 38 | exit 39 | fi 40 | 41 | export RPDT_FILENAME=${OUTPUT_FILE} 42 | LD_PRELOAD=librpd_tracer.so "$@" 43 | -------------------------------------------------------------------------------- /rpd_tracer/setup.py: -------------------------------------------------------------------------------- 1 | 2 | ################################################################################ 3 | # Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All rights reserved. 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | ################################################################################ 23 | 24 | from distutils.core import setup, Extension 25 | 26 | setup (name = 'rpdTracer', 27 | version = '1.0', 28 | description = 'Tracer control from user code', 29 | py_modules = ['rpdTracerControl'], 30 | ) 31 | -------------------------------------------------------------------------------- /tools/README_nvvp.txt: -------------------------------------------------------------------------------- 1 | Nvidia Visual Profiler files can be converted to RPD with nvvp2rpd.py 2 | 3 | Concerns: 4 | Nvvp files do not have Api names but rather store enum values. Those values can be looked up in 'cupti_runtime_cbid.h', which is a generated file and may change at any time. 5 | The nvvp2rpd.py importer requires a 'cupti_runtime_cbid.h'. 6 | The safest approach would be to aquire that 'cupti_runtime_cbid.h' from the system being profiled 7 | 8 | Current Status: 9 | The importer is at a very early statge of development (and may stay there). It imports all api calls. It imports kernel ops (only names and durations). Many other ops, e.g. memcpy, memset, sync, are not imported. There is detailed data for all these things is the source files. 10 | 11 | Usage: 12 | python3.6 nvvp2rpd.py --help 13 | 14 | -------------------------------------------------------------------------------- /tools/README_rpt.txt: -------------------------------------------------------------------------------- 1 | Output from 'HIP_TRACE_API=1' and 'HCC_PROFILE=2' can be imported into RPD with rpt2rpd.py. 2 | 3 | Support is minimal: 4 | Captures all api calls 5 | Captures kernel ops (only names and durations) 6 | Other ops are not imported 7 | Api -> op relationship is not available in the source data 8 | Api duration is not available in the source data (currently set to 10 usec for fun) 9 | 10 | Todo: 11 | Capture all ops 12 | Generate ops 'subclasses' to capture more detailed op data. Some is available from the source. 13 | Attempt to link api to ops based on the assumption of in-order queues 14 | 15 | Usage: 16 | python3.6 rpt2rpd.py --help 17 | -------------------------------------------------------------------------------- /tools/README_step1.txt: -------------------------------------------------------------------------------- 1 | Run you application under rocprofiler. You will need a working rocprofiler that logs all required fields. (harder than it sounds) 2 | 3 | Use: 4 | echo pmc: > in.txt 5 | rocprof -i in.txt --hip-trace --roctx-trace --flush-rate 10ms --timestamp on -d rocout 6 | 7 | This should populate a folder tree that will terminate in a dir containing: 8 | hcc_ops_trace.txt 9 | hip_api_trace.txt 10 | 11 | Those are the goodies you need for step 2. 12 | -------------------------------------------------------------------------------- /tools/README_step2.txt: -------------------------------------------------------------------------------- 1 | Use rocprof2rpd.py to convert the rocprofiler file to an rpd file: 2 | 'python3.6 rocprof2rpd.py --ops_input_file hcc_ops_trace.txt --api_input_file hip_api_trace.txt myCoolProfile.rpd' 3 | 4 | To use the python library rocpd_python, navigate to the rocpd_python folder and install the library 5 | 6 | python3.6 setup.py install 7 | python3.6 -m rocpd.rocprofiler_import --ops_input_file hcc_ops_trace.txt --api_input_file hip_api_trace.txt myCoolProfile.rpd 8 | 9 | 10 | Generate json from your profile: 11 | 'python3.6 rpd2tracing.py myCoolProfile.rpd trace.json' 12 | 13 | Good to go, fire up chrome and take a look. 14 | 15 | Note: 16 | If you need to subrange you must specify timestamps in usecs. You can browse the the db to find good start/end times but those times are in nsec. Divide by 1000. 17 | ---------------------------------------------------------------------------- 18 | usage: rpd2tracing.py [-h] [--start START] [--end END] input_rpd output_json 19 | 20 | convert RPD to json for chrome tracing 21 | 22 | positional arguments: 23 | input_rpd input rpd db 24 | output_json chrone tracing json output 25 | 26 | optional arguments: 27 | -h, --help show this help message and exit 28 | --start START start timestamp 29 | --end END end timestamp 30 | ----------------------------------------------------------------------------- 31 | 32 | Bonus: 33 | There are some views in HelpfulQueries.txt. You can open your rpd file with sqlite3 and apply those views. 34 | - 'Op' and 'api' mirror the the 'rocpd_op' and 'rocpd_api' tables but have the strings joined in. 35 | - 'Top' gives a top kernel summary 36 | - 'Busy' gives gpu busy time 37 | -------------------------------------------------------------------------------- /tools/rpd2table.py: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | # copies of the Software, and to permit persons to whom the Software is 9 | # furnished to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in 12 | # all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | # THE SOFTWARE. 21 | ################################################################################ 22 | 23 | # 24 | # Create a csv summary table from an rpd file 25 | # 26 | 27 | import sys 28 | import os 29 | import re 30 | import sqlite3 31 | from datetime import datetime 32 | import pandas as pd 33 | import argparse 34 | 35 | parser = argparse.ArgumentParser(description='convert an RPD file to a summary table in CSV') 36 | parser.add_argument('input_rpd', type=str, help="Input RPD file") 37 | parser.add_argument('user_marker', type=str, help="Input User Maker; leave it empty to process the entire file.") 38 | parser.add_argument('output_csv', type=str, help="Output Summary Table to CSV") 39 | args = parser.parse_args() 40 | 41 | def process_rpd_to_df(rpd_path, markers_list): 42 | connection = sqlite3.connect(rpd_path) 43 | 44 | # Keep it here for now for future extension 45 | rangeStringApi = "" 46 | rangeStringOp = "" 47 | 48 | create_marker_list = f"""CREATE TEMPORARY TABLE ext_marker ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "pid" integer NOT NULL, "tid" integer NOT NULL, "name" varchar(255) NOT NULL, "start" integer NOT NULL, "end" integer NOT NULL)""" 49 | connection.execute(create_marker_list) 50 | 51 | markers_list = markers_list[0].split(", ") 52 | marker_list_update_query = f""" 53 | INSERT INTO ext_marker (pid, tid, name, start, end) 54 | SELECT pid, tid, args, start, end 55 | FROM api 56 | WHERE apiName = 'UserMarker' AND args IN ({', '.join(['?'] * len(markers_list))}) 57 | """ 58 | connection.execute(marker_list_update_query, markers_list) 59 | 60 | collect_api_query = f""" 61 | CREATE TEMPORARY VIEW marker as 62 | SELECT A.pid, A.tid, A.id as marker_id, A.name as marker_name, B.id as api_id, apiName 63 | FROM ext_marker A join api B 64 | ON B.start >= A.start and B.end <= A.end and A.pid = B.pid and A.tid and B.tid 65 | """ 66 | connection.execute(collect_api_query) 67 | 68 | collect_kernel_query = f""" 69 | CREATE TEMPORARY VIEW marker_kernel as 70 | SELECT B.*, gpuid, C.optype as optype, C.description as kernel_name, (end - start) as duration 71 | FROM rocpd_api_ops A join marker B on B.api_id = A.api_id join op C on C.id = A.op_id 72 | """ 73 | connection.execute(collect_kernel_query) 74 | 75 | generate_table_query = f""" 76 | SELECT marker_name, marker_id, gpuid, kernel_name, COUNT(DISTINCT marker_id) as marker_count, count(duration) as kernel_count, sum(duration) as total_dur, avg(duration) as avg_dur, min(duration) as min_dur, max(duration) as max_dur, 77 | (SUM(duration) * 100.0 / SUM(SUM(duration)) OVER (PARTITION BY marker_name, gpuid)) AS kernel_percentage 78 | FROM marker_kernel 79 | GROUP BY marker_name, gpuid, kernel_name 80 | ORDER BY gpuid 81 | """ 82 | 83 | table_df = pd.read_sql_query(generate_table_query, connection) 84 | 85 | return table_df 86 | 87 | 88 | def main(): 89 | parser = argparse.ArgumentParser(description='Convert an RPD file to a summary table in CSV') 90 | parser.add_argument('input_rpd', type=str, help="Input RPD file") 91 | parser.add_argument('user_marker', type=str, nargs='*', help="Input User Marker(s); leave it empty to process the entire file.") 92 | parser.add_argument('output_csv', type=str, help="Output Summary Table to CSV") 93 | args = parser.parse_args() 94 | 95 | rpd_path = args.input_rpd 96 | markers_list = args.user_marker 97 | output_csv = args.output_csv 98 | 99 | table_df = process_rpd_to_df(rpd_path, markers_list) 100 | 101 | table_df.to_csv(output_csv, index=False) 102 | print(f"Summary table saved to {output_csv}") 103 | 104 | if __name__ == "__main__": 105 | main() -------------------------------------------------------------------------------- /tools/rpd_trim.py: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Copyright (c) 2021 - 2024 Advanced Micro Devices, Inc. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | # copies of the Software, and to permit persons to whom the Software is 9 | # furnished to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in 12 | # all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | # THE SOFTWARE. 21 | ################################################################################ 22 | 23 | # 24 | # Trim any data from an rpd file that doesn't fall within a specified time span. 25 | # 26 | 27 | import sqlite3 28 | import argparse 29 | 30 | parser = argparse.ArgumentParser(description='Permanently remove events/data from an RPD file that falls outside a specified time range. Range start and end values default to the files\' original start and end') 31 | parser.add_argument('input_rpd', type=str, help="input rpd db") 32 | parser.add_argument('--start', type=str, help="start time - default ns or percentage %%. Number only is interpreted as ns. Number with %% is interpreted as percentage. Number with leading '+' is interpreted as delta from the start time.") 33 | parser.add_argument('--end', type=str, help="end time - default ns or percentage %%. See help for --start") 34 | parser.add_argument('--dryrun', action=argparse.BooleanOptionalAction, help="compute range but take no action") 35 | args = parser.parse_args() 36 | 37 | connection = sqlite3.connect(args.input_rpd) 38 | 39 | min_time = connection.execute("select MIN(start) from rocpd_api;").fetchall()[0][0] 40 | max_time = connection.execute("select MAX(end) from rocpd_api;").fetchall()[0][0] 41 | if (min_time == None): 42 | raise Exception("Trace file is empty.") 43 | 44 | print(f"\t duration: \t{(max_time-min_time) / 1000000000} seconds") 45 | 46 | # Calculate trim start 47 | if args.start: 48 | if "%" in args.start: 49 | start_time = int( (max_time - min_time) * ( int( args.start.replace("%","") )/100 ) + min_time ) 50 | elif args.start.startswith('+'): 51 | start_time = int(args.start[1:]) + min_time 52 | else: 53 | start_time = int(args.start) 54 | else: 55 | start_time = min_time 56 | 57 | # Calculate trim end 58 | if args.end: 59 | if "%" in args.end: 60 | end_time = int( (max_time - min_time) * ( int( args.end.replace("%","") )/100 ) + min_time ) 61 | elif args.end.startswith('+'): 62 | end_time = int(args.end[1:]) + min_time 63 | else: 64 | end_time = int(args.end) 65 | else: 66 | end_time = max_time 67 | 68 | print("Timestamps:") 69 | print(f"\t first: \t{min_time} ns") 70 | print(f"\t last: \t{max_time} ns") 71 | print(f"\trng_start: \t{start_time} ns") 72 | print(f"\trng_end : \t{end_time} ns") 73 | 74 | assert start_time >= min_time 75 | assert start_time <= max_time 76 | assert end_time >= min_time 77 | assert end_time <= max_time 78 | 79 | print() 80 | print(f"Trimmed range: {start_time} --> {end_time}") 81 | print(f"Trimmed duration: {(end_time-start_time)/1000000000} seconds") 82 | 83 | apiCount = connection.execute("select count(*) from rocpd_api").fetchall()[0][0] 84 | apiRemoveCount = connection.execute("select count(*) from rocpd_api where start < %s or start > %s"%(start_time, end_time)).fetchall()[0][0] 85 | opCount = connection.execute("select count(*) from rocpd_op").fetchall()[0][0] 86 | opRemoveCount = connection.execute("select count(*) from rocpd_api A join rocpd_api_ops B on B.api_id = A.id where A.start < %s or A.start > %s"%(start_time, end_time)).fetchall()[0][0] 87 | print() 88 | print(f"Removing {apiRemoveCount} of {apiCount} api calls. {apiCount - apiRemoveCount} remaining") 89 | print(f"Removing {opRemoveCount} of {opCount} async ops. {opCount - opRemoveCount} remaining") 90 | 91 | if args.dryrun: 92 | print("Dry run, exiting") 93 | exit() 94 | 95 | connection.execute("delete from rocpd_api where start < %s or start > %s"%(start_time, end_time)) 96 | connection.execute("delete from rocpd_api_ops where api_id not in (select id from rocpd_api)") 97 | connection.execute("delete from rocpd_op where id not in (select op_id from rocpd_api_ops)") 98 | try: 99 | connection.execute("delete from rocpd_monitor where start < (select min(start) from rocpd_api) or start > (select max(end) from rocpd_op)") 100 | except: 101 | pass 102 | 103 | connection.commit() 104 | 105 | #clear any unused strings 106 | stringCount = connection.execute("select count(*) from rocpd_string").fetchall()[0][0] 107 | from rocpd.importer import RocpdImportData 108 | from rocpd.strings import cleanStrings 109 | importData = RocpdImportData() 110 | importData.resumeExisting(connection) # load the current db state 111 | cleanStrings(importData, False) 112 | stringRemaingCount = connection.execute("select count(*) from rocpd_string").fetchall()[0][0] 113 | print(f"Removed {stringCount - stringRemaingCount} of {stringCount} strings. {stringRemaingCount} remaining") 114 | 115 | connection.isolation_level = None 116 | connection.execute("vacuum") 117 | connection.commit() 118 | connection.close() 119 | --------------------------------------------------------------------------------