├── .git-blame-ignore-revs ├── .github ├── actions │ └── linuxTest │ │ └── action.yml └── workflows │ ├── lint.yml │ ├── nightly_build_and_test.yml │ ├── pti-tools_build_and_test.yml │ └── sdk_build_and_test.yml ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── SECURITY.md ├── SOFTWARE ├── VERSION ├── build_utils ├── CMakeLists.txt ├── build_utils.py ├── convert_dll_to_lib.py ├── get_cl_headers.py ├── get_cl_tracing_headers.py ├── get_gmm_headers.py ├── get_gtpin_headers_legacy.py ├── get_gtpin_libs_legacy.py ├── get_iga_headers.py ├── get_igc_headers.py ├── get_itt.py ├── get_md_headers.py └── get_ze_headers.py ├── chapters ├── binary_instrumentation │ ├── GTPin.md │ └── OpenCLBuiltIn.md ├── binary_source_correlation │ ├── GenBinaryDecoding.md │ ├── GenSymbolsDecoding.md │ ├── LevelZero.md │ └── OpenCL.md ├── code_annotation │ └── ITT.md ├── device_activity_tracing │ ├── DPCXX.md │ ├── LevelZero.md │ └── OpenCL.md ├── metrics_collection │ ├── LevelZero.md │ ├── MetricsDiscoveryAPI.md │ └── PerfMonReg.md ├── runtime_api_tracing │ ├── LevelZero.md │ ├── OMPT.md │ └── OpenCL.md └── system_management │ └── LevelZero.md ├── loader ├── init.cc ├── loader.cc ├── loader.h └── tool.h ├── samples ├── cl_debug_info │ ├── CMakeLists.txt │ ├── README.md │ ├── cl_debug_info_collector.h │ └── tool.cc ├── cl_gemm │ ├── CMakeLists.txt │ ├── README.md │ └── main.cc ├── cl_gemm_inst │ ├── CMakeLists.txt │ ├── README.md │ └── main.cc ├── cl_gemm_itt │ ├── CMakeLists.txt │ ├── README.md │ ├── ittnotify.cc │ └── main.cc ├── cl_gpu_metrics │ ├── CMakeLists.txt │ ├── README.md │ ├── cl_metric_collector.h │ └── tool.cc ├── cl_gpu_query │ ├── CMakeLists.txt │ ├── README.md │ ├── cl_metric_collector.h │ └── tool.cc ├── cl_hot_functions │ ├── CMakeLists.txt │ ├── README.md │ ├── cl_api_collector.h │ └── tool.cc ├── cl_hot_kernels │ ├── CMakeLists.txt │ ├── README.md │ ├── cl_kernel_collector.h │ └── tool.cc ├── dpc_gemm │ ├── CMakeLists.txt │ ├── README.md │ └── main.cc ├── dpc_info │ ├── CMakeLists.txt │ ├── README.md │ └── main.cc ├── gpu_perfmon_read │ ├── CMakeLists.txt │ ├── README.md │ ├── gpu_perfmon_collector.h │ └── tool.cc ├── gpu_perfmon_set │ ├── CMakeLists.txt │ ├── README.md │ └── main.cc ├── omp_gemm │ ├── CMakeLists.txt │ ├── README.md │ └── main.cc ├── omp_hot_regions │ ├── CMakeLists.txt │ ├── README.md │ ├── omp_region_collector.h │ └── tool.cc ├── ze_debug_info │ ├── CMakeLists.txt │ ├── README.md │ ├── tool.cc │ └── ze_debug_info_collector.h ├── ze_gemm │ ├── CMakeLists.txt │ ├── README.md │ ├── gemm.cl │ ├── gemm.spv │ └── main.cc ├── ze_hot_functions │ ├── CMakeLists.txt │ ├── README.md │ ├── tool.cc │ ├── ze_api_callbacks.h │ └── ze_api_collector.h ├── ze_hot_kernels │ ├── CMakeLists.txt │ ├── README.md │ ├── tool.cc │ └── ze_kernel_collector.h ├── ze_info │ ├── CMakeLists.txt │ ├── README.md │ └── main.cc ├── ze_metric_info │ ├── CMakeLists.txt │ ├── README.md │ └── main.cc ├── ze_metric_query │ ├── CMakeLists.txt │ ├── README.md │ ├── tool.cc │ └── ze_metric_collector.h ├── ze_metric_streamer │ ├── CMakeLists.txt │ ├── README.md │ ├── tool.cc │ └── ze_metric_collector.h └── ze_sysman │ ├── CMakeLists.txt │ ├── README.md │ └── main.cc ├── sdk ├── .clang-format ├── .clang-tidy ├── .editorconfig ├── .gitignore ├── CMakeLists.txt ├── CMakePresets.json ├── PtiConfig.cmake ├── README.md ├── TODO.md ├── VERSION ├── cmake │ ├── Modules │ │ ├── FindDevUtilities.cmake │ │ ├── FindLevelZero.cmake │ │ ├── FindXpti.cmake │ │ ├── Findunified-runtime.cmake │ │ ├── macros.cmake │ │ └── pti_versioninfo.rc.in │ ├── bom_line.bash │ ├── bom_line_win.bat │ ├── bom_macro.cmake │ ├── coverage.cmake │ ├── generate_coverage_report.py │ ├── packaging.cmake │ ├── tags │ │ ├── lin_conda_tags.txt │ │ ├── lin_def_tags.txt │ │ ├── lin_pip_tags.txt │ │ ├── win_conda_tags.txt │ │ ├── win_def_tags.txt │ │ └── win_pip_tags.txt │ └── toolchains │ │ ├── clang_toolchain.cmake │ │ ├── icpx_asan_toolchain.cmake │ │ ├── icpx_fuzz_toolchain.cmake │ │ ├── icpx_toolchain.cmake │ │ └── icpx_tsan_toolchain.cmake ├── docker │ ├── docker.mk │ ├── redhat-9 │ │ └── bldrun.Dockerfile │ ├── rocky-8 │ │ └── bldrun.Dockerfile │ ├── sles-15 │ │ └── bldrun.Dockerfile │ ├── ubuntu-22-04 │ │ └── bldrun.Dockerfile │ └── ubuntu-24-04 │ │ └── bldrun.Dockerfile ├── docs │ ├── README.md │ ├── doxygen │ │ └── Doxyfile │ ├── requirements.txt │ └── sphinx │ │ ├── Makefile │ │ └── source │ │ ├── _static │ │ ├── custom.css │ │ ├── favicons.png │ │ └── oneAPI-rgb-rev-100.png │ │ ├── build.rst │ │ ├── conf.py │ │ ├── devguide.rst │ │ ├── index.rst │ │ ├── install.rst │ │ ├── intro.rst │ │ ├── knownissues.rst │ │ ├── license.rst │ │ ├── linking.rst │ │ ├── quickstart.rst │ │ ├── samples.rst │ │ ├── systemreqs.rst │ │ ├── toctree.rst │ │ └── whatsnew.rst ├── env │ ├── oneapi-vars.bat │ ├── oneapi-vars.sh │ ├── vars.bat │ └── vars.sh ├── fuzz │ ├── CMakeLists.txt │ ├── README.md │ ├── test_pti_view.cc │ └── ubsan-ignore.txt ├── include │ └── pti │ │ ├── pti.h │ │ ├── pti_driver_levelzero_api_ids.h │ │ ├── pti_metrics.h │ │ ├── pti_runtime_sycl_api_ids.h │ │ ├── pti_version.h.in │ │ └── pti_view.h ├── samples │ ├── dlworkloads │ │ ├── CMakeLists.txt │ │ ├── CMakeLists_bkup.txt │ │ ├── README.md │ │ ├── device_memory.cpp │ │ ├── device_memory.h │ │ ├── main.cpp │ │ ├── model_mixedprogramming.cpp │ │ ├── model_mixedprogramming.h │ │ ├── operation_onednn.cpp │ │ ├── operation_onednn.h │ │ ├── operation_onedpl.cpp │ │ ├── operation_onedpl.h │ │ ├── operation_onemkl.cpp │ │ ├── operation_onemkl.h │ │ ├── operation_syclkernel.cpp │ │ ├── operation_syclkernel.h │ │ ├── queue.cpp │ │ ├── queue.h │ │ ├── tiny_tensor.h │ │ └── utils.h │ ├── dpc_gemm │ │ ├── CMakeLists.txt │ │ └── main.cc │ ├── dpc_gemm_threaded │ │ ├── CMakeLists.txt │ │ └── main.cc │ ├── iso3dfd_dpcpp │ │ ├── CMakeLists.txt │ │ ├── License.txt │ │ ├── include │ │ │ ├── device_selector.hpp │ │ │ └── iso3dfd.h │ │ ├── sample.json │ │ ├── src │ │ │ ├── CMakeLists.txt │ │ │ ├── iso3dfd.cpp │ │ │ ├── iso3dfd_kernels.cpp │ │ │ └── utils.cpp │ │ └── third-party-programs.txt │ ├── metrics_iso3dfd_dpcpp │ │ ├── CMakeLists.txt │ │ ├── License.txt │ │ ├── include │ │ │ ├── device_selector.hpp │ │ │ └── iso3dfd.h │ │ ├── src │ │ │ ├── CMakeLists.txt │ │ │ ├── iso3dfd.cpp │ │ │ ├── iso3dfd_kernels.cpp │ │ │ └── utils.cpp │ │ └── third-party-programs.txt │ ├── onemkl_gemm │ │ ├── CMakeLists.txt │ │ └── onemkl_gemm.cc │ ├── samples_utilities │ │ ├── metrics_utils.h │ │ └── samples_utils.h │ └── vector_sq_add │ │ ├── CMakeLists.txt │ │ └── vector_sq_add.cc ├── src │ ├── consumer_thread.h │ ├── default_buffer_callbacks.h │ ├── gen_tracing_callbacks.py │ ├── levelzero │ │ ├── collector_options.h │ │ ├── ze_collector.h │ │ ├── ze_driver_init.cc │ │ ├── ze_driver_init.h │ │ ├── ze_event_cache.h │ │ ├── ze_local_collection_helpers.h │ │ ├── ze_timer_helper.h │ │ └── ze_wrappers.h │ ├── lz_api_tracing_api_loader.h │ ├── metrics_handler.h │ ├── overhead_kinds.h │ ├── pti.cc │ ├── pti_lib_handler.h │ ├── pti_metrics.cc │ ├── pti_version.cc │ ├── pti_view.cc │ ├── pti_view_load.cc │ ├── sycl │ │ └── sycl_collector.h │ ├── trace_metrics.h │ ├── unikernel.h │ ├── utils │ │ ├── demangle.h │ │ ├── enum_conversion_helper.h │ │ ├── internal_helper.h │ │ ├── library_loader.h │ │ ├── platform_config.h.in │ │ ├── platform_strings.h │ │ ├── pti_assert.h │ │ ├── pti_filesystem.h │ │ ├── utils.h │ │ └── ze_utils.h │ ├── view_buffer.h │ ├── view_handler.h │ ├── view_record_info.h │ ├── xpti_adapter.cc │ └── xpti_adapter.h └── test │ ├── CMakeLists.txt │ ├── assert_exception_test.cc │ ├── gemm.cl │ ├── gemm.spv │ ├── init_tests.cc │ ├── local_ze_collection_fixture.cc │ ├── main_classapi_fixture.cc │ ├── main_dpcgemm_fixture.cc │ ├── main_startstop_fixture.cc │ ├── main_urgemm_fixture.cc │ ├── main_vecsqadd_fixture.cc │ ├── main_zegemm_fixture.cc │ ├── mem_ops_test.cc │ ├── metrics_vecsqadd_fixture.cc │ ├── multi_thread_correlation.awk │ ├── multi_threaded_submission.cc │ ├── no_kernel_overlap.cc │ ├── perf_dpc_gemm_threaded.cc │ ├── perf_test.py │ ├── pti_assert_test.cc │ ├── suppressions │ ├── ASan.supp │ ├── LSan.supp │ ├── TSan.supp │ └── UBSan.supp │ ├── sycl_queue_id.cc │ ├── utils │ ├── sycl_config_info.h │ ├── test_helpers.h │ └── ze_config_info.h │ ├── view_buffer_test.cc │ └── view_gpu_local_test.cc ├── tests ├── run.py ├── samples │ ├── cl_debug_info.py │ ├── cl_gemm.py │ ├── cl_gemm_inst.py │ ├── cl_gemm_itt.py │ ├── cl_gpu_metrics.py │ ├── cl_gpu_query.py │ ├── cl_hot_functions.py │ ├── cl_hot_kernels.py │ ├── dpc_gemm.py │ ├── dpc_info.py │ ├── gpu_perfmon_read.py │ ├── gpu_perfmon_set.py │ ├── omp_gemm.py │ ├── omp_hot_regions.py │ ├── ze_debug_info.py │ ├── ze_gemm.py │ ├── ze_hot_functions.py │ ├── ze_hot_kernels.py │ ├── ze_info.py │ ├── ze_metric_info.py │ ├── ze_metric_query.py │ ├── ze_metric_streamer.py │ └── ze_sysman.py ├── tools │ ├── cl_tracer.py │ ├── gpuinfo.py │ ├── instcount.py │ ├── memaccess.py │ ├── oneprof.py │ ├── onetrace.py │ ├── sysmon.py │ ├── unitrace-build.py │ ├── unitrace-test.py │ └── ze_tracer.py └── utils.py ├── third-party-programs.txt ├── tools ├── cl_tracer │ ├── CMakeLists.txt │ ├── README.md │ ├── cl_api_callbacks.h │ ├── cl_api_collector.h │ ├── cl_ext_callbacks.h │ ├── cl_ext_collector.cc │ ├── cl_ext_collector.h │ ├── cl_kernel_collector.h │ ├── cl_tracer.h │ └── tool.cc ├── gpuinfo │ ├── CMakeLists.txt │ ├── README.md │ └── main.cc ├── instcount │ ├── CMakeLists.txt │ ├── README.md │ ├── instcount.cpp │ ├── instcount.hpp │ └── tool.cc ├── memaccess │ ├── CMakeLists.txt │ ├── README.md │ ├── memaccess.cpp │ ├── memaccess.hpp │ └── tool.cc ├── oneprof │ ├── CMakeLists.txt │ ├── README.md │ ├── finalizer.h │ ├── metric_query_cache.h │ ├── metric_query_collector.h │ ├── metric_storage.h │ ├── metric_streamer_collector.h │ ├── prof_options.h │ ├── prof_utils.h │ ├── profiler.h │ ├── result_storage.h │ └── tool.cc ├── onetrace │ ├── CMakeLists.txt │ ├── README.md │ ├── tool.cc │ └── unified_tracer.h ├── sysmon │ ├── CMakeLists.txt │ ├── README.md │ └── main.cc ├── unitrace │ ├── CMakeLists.txt │ ├── README.md │ ├── cmake │ │ └── FindXptifw.cmake │ ├── doc │ │ └── images │ │ │ ├── call-logging.png │ │ │ ├── call-trace.png │ │ │ ├── ccl_logging.png │ │ │ ├── ccl_summary_report.png │ │ │ ├── chrome_itt_logging.png │ │ │ ├── device-logging.png │ │ │ ├── device-no-thread-no-engine.png │ │ │ ├── device-per-engine.png │ │ │ ├── device-per-thread-per-engine.png │ │ │ ├── device-per-thread.png │ │ │ ├── device-timeline-text.png │ │ │ ├── device-timing-with-no-shape.png │ │ │ ├── device-timing.png │ │ │ ├── event_query.png │ │ │ ├── host-device-times.png │ │ │ ├── host-timing.png │ │ │ ├── implicit-per-tile-kernel-logging.png │ │ │ ├── implicit-per-tile-timing.png │ │ │ ├── kernel-info-with-no-shape.png │ │ │ ├── kernel-info.png │ │ │ ├── kernel-logging.png │ │ │ ├── kernel-submissions.png │ │ │ ├── metric-query.png │ │ │ ├── metric-sampling.png │ │ │ ├── mpi-counter-parameter.png │ │ │ ├── mpi-device-initiated.png │ │ │ ├── mpi-imbalance.png │ │ │ ├── mpi-logging.png │ │ │ ├── multipl-ranks-timelines.png │ │ │ ├── perfchart-multi-sets.png │ │ │ ├── perfchart.png │ │ │ ├── perfmetricsbrowser.png │ │ │ ├── perfmetricsbrowser2.png │ │ │ ├── perfmetricstrace.png │ │ │ ├── pytorch.png │ │ │ ├── roofline.png │ │ │ ├── stall-sampling.png │ │ │ ├── stallchart.png │ │ │ ├── stallreport.png │ │ │ ├── stallstatistics.png │ │ │ ├── sycl-logging.png │ │ │ └── throughput.png │ ├── requirements.txt │ ├── scripts │ │ ├── gen_l0_loader.py │ │ ├── gen_tracing_callbacks.py │ │ ├── gen_tracing_common_header.py │ │ ├── get_commit_hash.py │ │ ├── get_itt.py │ │ ├── metrics │ │ │ ├── addrasm.py │ │ │ ├── analyzeperfmetrics.py │ │ │ └── config │ │ │ │ ├── bmg │ │ │ │ ├── ComputeBasic.txt │ │ │ │ └── MemoryProfile.txt │ │ │ │ └── pvc │ │ │ │ ├── ComputeBasic.txt │ │ │ │ ├── GpuOffload.txt │ │ │ │ ├── L1ProfileReads.txt │ │ │ │ ├── L1ProfileSlmBankConflicts.txt │ │ │ │ ├── L1ProfileWrites.txt │ │ │ │ └── MemProfile.txt │ │ ├── roofline │ │ │ ├── device_configs │ │ │ │ └── PVC_1tile.csv │ │ │ ├── example │ │ │ │ ├── gemm_fp16_bytes.1799860 │ │ │ │ └── gemm_fp16_flops.1799853 │ │ │ ├── roofline.py │ │ │ └── roofline_libs.py │ │ ├── summary │ │ │ ├── categorize.py │ │ │ ├── schemas │ │ │ │ ├── LLaMA.ini │ │ │ │ └── LLaMA.json │ │ │ └── summary.py │ │ ├── tracemerge │ │ │ └── mergetrace.py │ │ └── uniview.py │ ├── src │ │ ├── chromelogger.h │ │ ├── collector_options.h │ │ ├── itt │ │ │ └── itt_collector.h │ │ ├── levelzero │ │ │ ├── ze_collector.h │ │ │ ├── ze_event_cache.h │ │ │ ├── ze_loader.h │ │ │ └── ze_metrics.h │ │ ├── mpi │ │ │ └── mpi.c │ │ ├── opencl │ │ │ ├── cl_api_callbacks.h │ │ │ ├── cl_collector.h │ │ │ └── cl_intel_ext.h │ │ ├── tracer.cc │ │ ├── tracer.h │ │ ├── unicontrol.h │ │ ├── unievent.h │ │ ├── unikernel.h │ │ ├── unimemory.h │ │ ├── unitimer.h │ │ ├── unitrace.cc │ │ ├── unitrace_ze_utils.h │ │ ├── utils │ │ │ └── library_loader.h │ │ ├── version.h │ │ └── xpti │ │ │ └── xpti_collector.h │ └── test │ │ ├── CMakeLists.txt │ │ ├── cl_gemm │ │ ├── CMakeLists.txt │ │ ├── gold │ │ │ ├── linux │ │ │ │ ├── d.txt │ │ │ │ ├── h.txt │ │ │ │ └── t.txt │ │ │ └── windows │ │ │ │ ├── d.txt │ │ │ │ ├── h.txt │ │ │ │ └── t.txt │ │ └── main.cc │ │ ├── dpc_gemm │ │ ├── CMakeLists.txt │ │ ├── gold │ │ │ ├── linux │ │ │ │ ├── d.txt │ │ │ │ ├── h.txt │ │ │ │ └── t.txt │ │ │ └── windows │ │ │ │ ├── d.txt │ │ │ │ ├── h.txt │ │ │ │ └── t.txt │ │ └── main.cc │ │ ├── graph │ │ ├── CMakeLists.txt │ │ ├── gold │ │ │ ├── linux │ │ │ │ ├── d.txt │ │ │ │ ├── h.txt │ │ │ │ └── t.txt │ │ │ └── windows │ │ │ │ ├── d.txt │ │ │ │ ├── h.txt │ │ │ │ └── t.txt │ │ └── graph.cpp │ │ ├── grf │ │ ├── CMakeLists.txt │ │ ├── gold │ │ │ ├── linux │ │ │ │ ├── d.txt │ │ │ │ ├── h.txt │ │ │ │ └── t.txt │ │ │ └── windows │ │ │ │ ├── d.txt │ │ │ │ ├── h.txt │ │ │ │ └── t.txt │ │ └── grf.cpp │ │ ├── omp_gemm │ │ ├── CMakeLists.txt │ │ ├── gold │ │ │ ├── linux │ │ │ │ ├── d.txt │ │ │ │ ├── h.txt │ │ │ │ └── t.txt │ │ │ └── windows │ │ │ │ ├── d.txt │ │ │ │ ├── h.txt │ │ │ │ └── t.txt │ │ └── main.cc │ │ ├── run_test.py │ │ ├── scenarios.txt │ │ ├── test_unitrace.py │ │ ├── unidiff.py │ │ └── ze_gemm │ │ ├── CMakeLists.txt │ │ ├── gemm.spv │ │ ├── gold │ │ ├── linux │ │ │ ├── d.txt │ │ │ ├── h.txt │ │ │ └── t.txt │ │ └── windows │ │ │ ├── d.txt │ │ │ ├── h.txt │ │ │ └── t.txt │ │ └── main.cc ├── utils │ ├── correlator.cc │ ├── correlator.h │ ├── logger.h │ └── trace_options.h └── ze_tracer │ ├── CMakeLists.txt │ ├── README.md │ ├── gen_tracing_callbacks.py │ ├── tool.cc │ ├── ze_api_collector.h │ ├── ze_event_cache.h │ ├── ze_kernel_collector.h │ └── ze_tracer.h └── utils ├── cl_api_tracer.h ├── cl_utils.h ├── demangle.h ├── gen_binary_decoder.h ├── gpu_elf_parser ├── CMakeLists.txt ├── include │ ├── elf_parser.h │ ├── elf_parser.hpp │ ├── elf_parser_def.hpp │ └── elf_parser_mapping.h └── src │ ├── dwarf_state_machine.cpp │ ├── dwarf_state_machine.hpp │ ├── elf_parser.cpp │ ├── section_debug_abbrev.cpp │ ├── section_debug_abbrev.hpp │ ├── section_debug_info.cpp │ ├── section_debug_info.hpp │ ├── section_debug_line.cpp │ └── section_debug_line.hpp ├── gtpin_utils.h ├── gtpin_utils ├── CMakeLists.txt ├── doc │ ├── capsule.puml │ ├── interfaces.png │ ├── interfaces.puml │ ├── results.png │ ├── results.puml │ ├── writer.png │ └── writer.puml ├── include │ ├── capsule.hpp │ ├── control.hpp │ ├── def_gpu.hpp │ ├── profiler.hpp │ ├── profiler_base.hpp │ ├── results.hpp │ ├── tool.hpp │ ├── tool_factory.hpp │ └── writer.hpp └── src │ ├── capsule.cpp │ ├── control.cpp │ ├── macro │ ├── add.cpp │ ├── and.cpp │ ├── atomic_store.cpp │ ├── cbit.cpp │ ├── cmp.cpp │ ├── mov.cpp │ ├── mul.cpp │ ├── not.cpp │ ├── or.cpp │ ├── sel.cpp │ ├── shl.cpp │ ├── shr.cpp │ ├── sub.cpp │ └── xor.cpp │ ├── profiler.cpp │ ├── results.cpp │ ├── tool.cpp │ ├── tool_factory.cpp │ └── writer.cpp ├── leb128.h ├── metric_device.h ├── metric_utils.h ├── pti_assert.h ├── shared_library.h ├── trace_guard.cc ├── trace_guard.h ├── utils.h └── ze_utils.h /.git-blame-ignore-revs: -------------------------------------------------------------------------------- 1 | # [PTI-LIB] Update formatting. Add EditorConfig file. 2 | # This formats the whole project (sans dlworkloads and iso). It also adds 3 | # automation for detecting formatting problems. Not super relevant in the blame 4 | 7262dc724137b8972cb2b5ec1304f0cf236ddbac 5 | -------------------------------------------------------------------------------- /.github/actions/linuxTest/action.yml: -------------------------------------------------------------------------------- 1 | name: 'Linux tests for PTI' 2 | description: | 3 | Run the tests with a partition that is expected to ALWAYS succeed and 4 | another partition that MAY fail. The latter is designated as Quarantine. 5 | inputs: 6 | wdir: 7 | description: Working directory 8 | required: true 9 | preset: 10 | description: Preset used in the build to be tested. 11 | required: true 12 | 13 | runs: 14 | using: "composite" 15 | steps: 16 | 17 | - name: Tests that MUST succeed. 18 | working-directory: ${{ inputs.wdir }} 19 | shell: bash 20 | run: | 21 | source /opt/intel/oneapi/setvars.sh 22 | ctest --output-on-failure --preset ${{ inputs.preset }} -LE performance 23 | 24 | - name: Quarantined tests # flaky tests 25 | continue-on-error: true 26 | working-directory: ${{ inputs.wdir }} 27 | shell: bash 28 | run: | 29 | source /opt/intel/oneapi/setvars.sh 30 | if ctest --output-on-failure --preset ${{ inputs.preset }} -L performance; then 31 | echo "Test status: 0" 32 | echo "Quarantine tests all pass." 33 | else 34 | echo "Test status: 1" 35 | echo "::warning::Quarantine tests presented failures." 36 | fi 37 | exit 0 38 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | name: Linter Checks 4 | 5 | on: 6 | push: 7 | branches: [ "master" ] 8 | paths: 9 | - sdk/** 10 | pull_request: 11 | branches: [ "master" ] 12 | paths: 13 | - sdk/** 14 | 15 | defaults: 16 | run: 17 | shell: bash 18 | 19 | permissions: 20 | contents: read 21 | 22 | jobs: 23 | run-format-check: 24 | 25 | container: 26 | image: ${{ vars.PTI_DOCKER_IMAGE }} 27 | 28 | if: vars.PTI_RUN_TESTS == 1 29 | 30 | runs-on: [self-hosted, Linux, pti] 31 | 32 | steps: 33 | - name: Checkout 34 | uses: actions/checkout@v4 35 | 36 | - name: Python Environment Setup 37 | uses: actions/setup-python@v5 38 | with: 39 | python-version: '3.11' 40 | 41 | - name: Install Dependencies 42 | run: | 43 | python -m pip install --upgrade setuptools wheel pip 44 | pip install clang-format==14 45 | 46 | - name: Check SDK Format 47 | working-directory: sdk 48 | run: | 49 | mkdir build 50 | cd build 51 | # Expedited configure 52 | cmake .. -DPTI_BUILD_TESTING=OFF -DPTI_BUILD_SAMPLES=OFF -DPTI_INSTALL=OFF 53 | make format-chk 54 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/build 2 | **/__pycache__ 3 | venv/ 4 | .vscode/ 5 | .vs/ 6 | .vim/ 7 | .cache/ 8 | compile_commands.json 9 | CMakeUserPresets.json 10 | error_diff.txt 11 | *.swp 12 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | 25 | 26 | ## How to Contribute 27 | 28 | At this stage we do not accept pull requests. We will change this soon. 29 | 30 | For now - please, submit issues or start a discussion. -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (C) 2024 Intel Corporation 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | Intel is committed to rapidly addressing security vulnerabilities affecting our customers and providing clear guidance on the solution, impact, severity and mitigation. 3 | 4 | ## Reporting a Vulnerability 5 | Please report any security vulnerabilities in this project utilizing the guidelines [here](https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html). 6 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 0.49.28 2 | -------------------------------------------------------------------------------- /build_utils/build_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import shutil 4 | import subprocess 5 | 6 | def clone(url, commit, clone_path): 7 | if (not os.path.exists(clone_path)): 8 | subprocess.call(["git", "clone", url, clone_path]) 9 | subprocess.call(["git", "checkout", commit], cwd = os.path.abspath(clone_path)) 10 | 11 | def copy(src_path, dst_path, file_list): 12 | for file in file_list: 13 | dst_file = os.path.join(dst_path, file) 14 | if os.path.isfile(dst_file): 15 | os.remove(dst_file) 16 | 17 | src_file = os.path.join(src_path, file) 18 | assert os.path.isfile(src_file) 19 | 20 | shutil.copy(src_file, dst_file) 21 | 22 | def download(url, download_path): 23 | if not os.path.exists(download_path): 24 | os.mkdir(download_path) 25 | 26 | url_items = url.split('/') 27 | file_name = os.path.join(download_path, url_items[len(url_items) - 1]) 28 | command = "curl " + url + " --output " + file_name 29 | shell = True 30 | if sys.platform != 'win32': 31 | shell = False 32 | command = command.split(" ") 33 | 34 | if not os.path.isfile(file_name): 35 | subprocess.call(command, shell = shell) 36 | 37 | return file_name 38 | 39 | def unpack(arch_file, target_path): 40 | if (not os.path.exists(target_path)): 41 | os.mkdir(target_path) 42 | subprocess.call(["tar", "-xf", arch_file, "-C", target_path]) 43 | 44 | def get_root(build_path): 45 | sample_path, build_dir = os.path.split(build_path) 46 | samples_path, sample_dir = os.path.split(sample_path) 47 | root_path, samples_dir = os.path.split(samples_path) 48 | return root_path -------------------------------------------------------------------------------- /build_utils/convert_dll_to_lib.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import subprocess 4 | import shutil 5 | 6 | def main(): 7 | if len(sys.argv) < 3: 8 | print("Usage: python convert_dll_to_lib.py <.dll file path>") 9 | return 10 | 11 | if os.environ.get('OS','') != 'Windows_NT': 12 | print("This script is only for Windows") 13 | else: 14 | lib_path = sys.argv[1] 15 | dll_file = sys.argv[2] 16 | 17 | assert os.path.exists(shutil.which("dumpbin")) 18 | assert os.path.exists(shutil.which("lib")) 19 | assert os.path.exists(dll_file) 20 | assert dll_file.find(".dll") != -1 21 | 22 | if not os.path.exists(lib_path): 23 | os.mkdir(lib_path) 24 | 25 | lib_name = os.path.basename(dll_file) 26 | lib_name = os.path.splitext(lib_name)[0] 27 | 28 | def_file = os.path.join(lib_path, lib_name + ".def") 29 | lib_file = os.path.join(lib_path, lib_name + ".lib") 30 | 31 | cmd_commands = [ 32 | "echo EXPORTS >> " + def_file, 33 | "for /f " + '"' + "skip=19 tokens=4" + '"' + " %A in ('dumpbin /exports " + dll_file + "') do echo %A >> " + def_file, 34 | "lib /def:" + def_file + " /out:" + lib_file + " /machine:x64"] 35 | 36 | for command in cmd_commands: 37 | subprocess.call(command, shell=True) 38 | 39 | if __name__ == "__main__": 40 | main() -------------------------------------------------------------------------------- /build_utils/get_cl_headers.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | import build_utils 5 | 6 | url = "https://github.com/KhronosGroup/OpenCL-Headers.git" 7 | commit = "dcd5bede6859d26833cd85f0d6bbcee7382dc9b3" 8 | 9 | def main(): 10 | if len(sys.argv) < 3: 11 | print("Usage: python get_ocl_headers.py ") 12 | return 13 | 14 | dst_path = sys.argv[1] 15 | if (not os.path.exists(dst_path)): 16 | os.mkdir(dst_path) 17 | dst_path = os.path.join(dst_path, "CL") 18 | if (not os.path.exists(dst_path)): 19 | os.mkdir(dst_path) 20 | 21 | clone_path = sys.argv[2] 22 | clone_path = os.path.join(clone_path, "OpenCL-Headers") 23 | build_utils.clone(url, commit, clone_path) 24 | 25 | src_path = os.path.join(clone_path, "CL") 26 | build_utils.copy(src_path, dst_path, ["cl.h", "cl_gl.h", "cl_version.h", "cl_platform.h"]) 27 | 28 | if __name__ == "__main__": 29 | main() -------------------------------------------------------------------------------- /build_utils/get_gmm_headers.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | import build_utils 5 | 6 | url = "https://github.com/intel/gmmlib.git" 7 | commit = "9290546f493656d80a4aa05b63dfeaf9f8527b0a" 8 | 9 | def main(): 10 | if len(sys.argv) < 3: 11 | print("Usage: python get_gmm_headers.py ") 12 | return 13 | 14 | dst_path = sys.argv[1] 15 | if (not os.path.exists(dst_path)): 16 | os.mkdir(dst_path) 17 | dst_path = os.path.join(dst_path, "igdgmm") 18 | if (not os.path.exists(dst_path)): 19 | os.mkdir(dst_path) 20 | dst_path = os.path.join(dst_path, "inc") 21 | if (not os.path.exists(dst_path)): 22 | os.mkdir(dst_path) 23 | dst_path = os.path.join(dst_path, "common") 24 | if (not os.path.exists(dst_path)): 25 | os.mkdir(dst_path) 26 | 27 | clone_path = sys.argv[2] 28 | clone_path = os.path.join(clone_path, "gmmlib") 29 | build_utils.clone(url, commit, clone_path) 30 | 31 | src_path = os.path.join(clone_path, "Source") 32 | src_path = os.path.join(src_path, "inc") 33 | src_path = os.path.join(src_path, "common") 34 | 35 | build_utils.copy(src_path, dst_path, ["igfxfmid.h"]) 36 | 37 | if __name__ == "__main__": 38 | main() -------------------------------------------------------------------------------- /build_utils/get_gtpin_libs_legacy.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | import build_utils 5 | 6 | def main(): 7 | if len(sys.argv) < 3: 8 | print("Usage: python get_gtpin_libs_legacy.py ") 9 | return 10 | 11 | dst_path = sys.argv[1] 12 | if (not os.path.exists(dst_path)): 13 | os.mkdir(dst_path) 14 | dst_path = os.path.join(dst_path, "GTPIN") 15 | if (not os.path.exists(dst_path)): 16 | os.mkdir(dst_path) 17 | 18 | build_path = sys.argv[2] 19 | if sys.platform == 'win32': 20 | gtpin_package = "external-gtpin-2.19-win.zip" 21 | download_link = "https://downloadmirror.intel.com/686382/" 22 | else: 23 | gtpin_package = "external-gtpin-2.19-linux.tar.xz" 24 | download_link = "https://downloadmirror.intel.com/686383/" 25 | build_utils.download(download_link + gtpin_package, build_path) 26 | arch_file = os.path.join(build_path, gtpin_package) 27 | build_utils.unpack(arch_file, build_path) 28 | 29 | src_path = os.path.join(build_path, "Profilers") 30 | src_path = os.path.join(src_path, "Lib") 31 | src_path = os.path.join(src_path, "intel64") 32 | 33 | gtpin_libs = ["gtpin.lib"]\ 34 | if sys.platform == 'win32' else\ 35 | ["libgcc_s.so.1", 36 | "libged.so", 37 | "libgtpin.so", 38 | "libgtpin_core.so", 39 | "libiga_wrapper.so", 40 | "libstdc++.so.6"] 41 | 42 | build_utils.copy(src_path, dst_path, gtpin_libs) 43 | 44 | if sys.platform == 'win32': 45 | gtpin_dlls = [ 46 | "gtpin.dll", 47 | "ged.dll", 48 | "gtpin_core.dll", 49 | "iga_wrapper.dll"] 50 | 51 | build_utils.copy(src_path, build_path, gtpin_dlls) 52 | 53 | if __name__ == "__main__": 54 | main() 55 | -------------------------------------------------------------------------------- /build_utils/get_iga_headers.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | import build_utils 5 | 6 | url = "https://github.com/intel/intel-graphics-compiler.git" 7 | commit = "d5bef0c991c41e03a567187eb78fe35b6c116847" 8 | 9 | def main(): 10 | if len(sys.argv) < 3: 11 | print("Usage: python get_iga_headers.py ") 12 | return 13 | 14 | dst_path = sys.argv[1] 15 | if (not os.path.exists(dst_path)): 16 | os.mkdir(dst_path) 17 | dst_path = os.path.join(dst_path, "iga") 18 | if (not os.path.exists(dst_path)): 19 | os.mkdir(dst_path) 20 | 21 | clone_path = sys.argv[2] 22 | clone_path = os.path.join(clone_path, "intel-graphics-compiler") 23 | build_utils.clone(url, commit, clone_path) 24 | 25 | src_path = os.path.join(clone_path, "visa") 26 | src_path = os.path.join(src_path, "iga") 27 | src_path = os.path.join(src_path, "IGALibrary") 28 | src_path = os.path.join(src_path, "api") 29 | 30 | build_utils.copy(src_path, dst_path, 31 | ["iga.h", 32 | "iga_types_ext.hpp", 33 | "iga_types_swsb.hpp", 34 | "iga_bxml_ops.hpp", 35 | "iga_bxml_enums.hpp", 36 | "kv.h", 37 | "kv.hpp"]) 38 | 39 | if __name__ == "__main__": 40 | main() -------------------------------------------------------------------------------- /build_utils/get_igc_headers.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | import build_utils 5 | 6 | url = "https://github.com/intel/intel-graphics-compiler.git" 7 | commit = "d5bef0c991c41e03a567187eb78fe35b6c116847" 8 | 9 | def main(): 10 | if len(sys.argv) < 3: 11 | print("Usage: python get_igc_headers.py ") 12 | return 13 | 14 | dst_path = sys.argv[1] 15 | if (not os.path.exists(dst_path)): 16 | os.mkdir(dst_path) 17 | dst_path = os.path.join(dst_path, "igc") 18 | if (not os.path.exists(dst_path)): 19 | os.mkdir(dst_path) 20 | dst_path = os.path.join(dst_path, "ocl_igc_shared") 21 | if (not os.path.exists(dst_path)): 22 | os.mkdir(dst_path) 23 | dst_path = os.path.join(dst_path, "executable_format") 24 | if (not os.path.exists(dst_path)): 25 | os.mkdir(dst_path) 26 | 27 | clone_path = sys.argv[2] 28 | clone_path = os.path.join(clone_path, "intel-graphics-compiler") 29 | build_utils.clone(url, commit, clone_path) 30 | 31 | src_path = os.path.join(clone_path, "IGC") 32 | src_path = os.path.join(src_path, "AdaptorOCL") 33 | src_path = os.path.join(src_path, "ocl_igc_shared") 34 | src_path = os.path.join(src_path, "executable_format") 35 | 36 | build_utils.copy(src_path, dst_path, ["program_debug_data.h", "patch_list.h"]) 37 | 38 | if __name__ == "__main__": 39 | main() -------------------------------------------------------------------------------- /build_utils/get_itt.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | import build_utils 5 | 6 | url = "https://github.com/intel/ittapi.git" 7 | commit = "83d6ca4bb7df67dfacac7618bd7ef5f12735c8a7" 8 | 9 | def main(): 10 | if len(sys.argv) < 3: 11 | print("Usage: python get_itt.py ") 12 | return 13 | 14 | dst_path = sys.argv[1] 15 | if (not os.path.exists(dst_path)): 16 | os.mkdir(dst_path) 17 | dst_path = os.path.join(dst_path, "ITT") 18 | if (not os.path.exists(dst_path)): 19 | os.mkdir(dst_path) 20 | 21 | clone_path = sys.argv[2] 22 | clone_path = os.path.join(clone_path, "ittapi") 23 | build_utils.clone(url, commit, clone_path) 24 | 25 | src_path = os.path.join(clone_path, "src") 26 | src_path = os.path.join(src_path, "ittnotify") 27 | build_utils.copy(src_path, dst_path, ["disable_warnings.h", "ittnotify_config.h", 28 | "ittnotify_static.c", "ittnotify_static.h", "ittnotify_types.h"]) 29 | 30 | src_path = os.path.join(clone_path, "include") 31 | build_utils.copy(src_path, dst_path, ["ittnotify.h"]) 32 | 33 | dst_path = os.path.join(dst_path, "legacy") 34 | if (not os.path.exists(dst_path)): 35 | os.mkdir(dst_path) 36 | 37 | src_path = os.path.join(src_path, "legacy") 38 | build_utils.copy(src_path, dst_path, ["ittnotify.h"]) 39 | 40 | if __name__ == "__main__": 41 | main() -------------------------------------------------------------------------------- /build_utils/get_md_headers.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | import build_utils 5 | 6 | url = "https://github.com/intel/metrics-discovery.git" 7 | commit = "4ff8df044cbd9b270f2cf9561d1a60f826679f62" 8 | 9 | def main(): 10 | if len(sys.argv) < 3: 11 | print("Usage: python get_md_headers.py ") 12 | return 13 | 14 | dst_path = sys.argv[1] 15 | if (not os.path.exists(dst_path)): 16 | os.mkdir(dst_path) 17 | 18 | clone_path = sys.argv[2] 19 | clone_path = os.path.join(clone_path, "metrics-discovery") 20 | build_utils.clone(url, commit, clone_path) 21 | 22 | src_path = os.path.join(clone_path, "inc") 23 | src_path = os.path.join(src_path, "common") 24 | src_path = os.path.join(src_path, "instrumentation") 25 | src_path = os.path.join(src_path, "api") 26 | 27 | build_utils.copy(src_path, dst_path, ["metrics_discovery_api.h"]) 28 | 29 | if __name__ == "__main__": 30 | main() 31 | -------------------------------------------------------------------------------- /build_utils/get_ze_headers.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | import build_utils 5 | 6 | url = "https://github.com/oneapi-src/level-zero.git" 7 | default_commit = "a4afcb39ee265e595d3f0aa57b25b5e845fb494c" 8 | 9 | def main(): 10 | if len(sys.argv) < 3: 11 | print("Usage: python get_ze_headers.py []") 12 | return 13 | 14 | dst_path = sys.argv[1] 15 | if (not os.path.exists(dst_path)): 16 | os.mkdir(dst_path) 17 | dst_path = os.path.join(dst_path, "level_zero") 18 | if (not os.path.exists(dst_path)): 19 | os.mkdir(dst_path) 20 | 21 | clone_path = sys.argv[2] 22 | clone_path = os.path.join(clone_path, "level-zero") 23 | 24 | if len(sys.argv) > 3: 25 | commit = sys.argv[3] 26 | else: 27 | commit = default_commit 28 | 29 | build_utils.clone(url, commit, clone_path) 30 | 31 | src_path = os.path.join(clone_path, "include") 32 | build_utils.copy(src_path, dst_path,\ 33 | ["ze_api.h", "zes_api.h", "zet_api.h"]) 34 | 35 | if (not os.path.exists(os.path.join(dst_path, "layers"))): 36 | os.mkdir(os.path.join(dst_path, "layers")) 37 | src_path = os.path.join(clone_path, "include", "layers") 38 | build_utils.copy(src_path, os.path.join(dst_path, "layers"),\ 39 | ["zel_tracing_api.h", "zel_tracing_register_cb.h"]) 40 | 41 | if __name__ == "__main__": 42 | main() 43 | -------------------------------------------------------------------------------- /loader/init.cc: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | 7 | #include "tool.h" 8 | #include "utils.h" 9 | 10 | #if defined(_WIN32) 11 | 12 | void AtExit() { 13 | DisableProfiling(); 14 | } 15 | 16 | extern "C" PTI_EXPORT 17 | DWORD Init(void*) { 18 | atexit(AtExit); 19 | EnableProfiling(); 20 | return 0; 21 | } 22 | 23 | #else 24 | 25 | static bool IsEnabled() { 26 | std::string value = utils::GetEnv("PTI_ENABLE"); 27 | if (value == "1") { 28 | return true; 29 | } 30 | return false; 31 | } 32 | 33 | void __attribute__((constructor)) Load() { 34 | if (IsEnabled()) { 35 | EnableProfiling(); 36 | } 37 | } 38 | 39 | void __attribute__((destructor)) Unload() { 40 | DisableProfiling(); 41 | } 42 | 43 | #endif -------------------------------------------------------------------------------- /loader/loader.h: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | 7 | #ifndef PTI_SAMPLES_LOADER_LOADER_H_ 8 | #define PTI_SAMPLES_LOADER_LOADER_H_ 9 | 10 | #if defined(_WIN32) 11 | #include 12 | extern "C" DWORD Init(void*); 13 | #endif 14 | 15 | extern "C" void Usage(); 16 | extern "C" int ParseArgs(int argc, char* argv[]); 17 | extern "C" void SetToolEnv(); 18 | 19 | #endif // PTI_SAMPLES_LOADER_LOADER_H_ -------------------------------------------------------------------------------- /loader/tool.h: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | 7 | #ifndef PTI_SAMPLES_LOADER_TOOL_H_ 8 | #define PTI_SAMPLES_LOADER_TOOL_H_ 9 | 10 | void EnableProfiling(); 11 | void DisableProfiling(); 12 | 13 | #endif // PTI_SAMPLES_LOADER_TOOL_H_ -------------------------------------------------------------------------------- /samples/cl_debug_info/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include("../../build_utils/CMakeLists.txt") 2 | SetRequiredCMakeVersion() 3 | cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) 4 | 5 | project(PTI_Samples_OpenCL_Debug_Info CXX) 6 | SetCompilerFlags() 7 | SetBuildType() 8 | 9 | # Tool Library 10 | 11 | add_library(clt_debug_info SHARED "${PROJECT_SOURCE_DIR}/../../loader/init.cc" tool.cc) 12 | target_include_directories(clt_debug_info 13 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 14 | if(CMAKE_INCLUDE_PATH) 15 | target_include_directories(clt_debug_info 16 | PUBLIC "${CMAKE_INCLUDE_PATH}") 17 | endif() 18 | 19 | add_subdirectory("${PTI_CMAKE_MACRO_DIR}/../utils/gpu_elf_parser" debug_info_parser) 20 | FindPtiElfParserHeaders(clt_debug_info) 21 | target_link_libraries(clt_debug_info debug_info_parser) 22 | 23 | FindOpenCLLibrary(clt_debug_info) 24 | FindOpenCLHeaders(clt_debug_info) 25 | 26 | GetOpenCLTracingHeaders(clt_debug_info) 27 | 28 | FindIGALibrary(clt_debug_info) 29 | GetIGAHeaders(clt_debug_info) 30 | 31 | GetIGCHeaders(clt_debug_info) 32 | GetGmmHeaders(clt_debug_info) 33 | 34 | # Loader 35 | 36 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTOOL_NAME=clt_debug_info") 37 | add_executable(cl_debug_info "${PROJECT_SOURCE_DIR}/../../loader/loader.cc") 38 | target_include_directories(cl_debug_info 39 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 40 | if(UNIX) 41 | target_link_libraries(cl_debug_info 42 | dl) 43 | endif() 44 | -------------------------------------------------------------------------------- /samples/cl_debug_info/tool.cc: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | 7 | #include "cl_debug_info_collector.h" 8 | 9 | static ClDebugInfoCollector* collector = nullptr; 10 | 11 | // External Tool Interface //////////////////////////////////////////////////// 12 | 13 | extern "C" PTI_EXPORT void Usage() { 14 | std::cout << "Usage: ./cl_debug_info[.exe] " << std::endl; 15 | } 16 | 17 | extern "C" PTI_EXPORT int ParseArgs(int argc, char* argv[]) { return 1; } 18 | 19 | extern "C" PTI_EXPORT void SetToolEnv() {} 20 | 21 | // Internal Tool Functionality //////////////////////////////////////////////// 22 | 23 | static void PrintResults() { 24 | PTI_ASSERT(collector != nullptr); 25 | 26 | const KernelDebugInfoMap& debug_info_map = collector->GetKernelDebugInfoMap(); 27 | if (debug_info_map.size() == 0) { 28 | return; 29 | } 30 | 31 | std::cerr << std::endl; 32 | for (auto pair : debug_info_map) { 33 | ClDebugInfoCollector::PrintKernelDebugInfo(pair.first, pair.second); 34 | } 35 | } 36 | 37 | // Internal Tool Interface //////////////////////////////////////////////////// 38 | 39 | void EnableProfiling() { 40 | cl_device_id device = utils::cl::GetIntelDevice(CL_DEVICE_TYPE_GPU); 41 | if (device == nullptr) { 42 | std::cerr << "[WARNING] Unable to find target GPU device for tracing" << std::endl; 43 | return; 44 | } 45 | 46 | collector = ClDebugInfoCollector::Create(device); 47 | } 48 | 49 | void DisableProfiling() { 50 | if (collector != nullptr) { 51 | collector->DisableTracing(); 52 | PrintResults(); 53 | delete collector; 54 | } 55 | } -------------------------------------------------------------------------------- /samples/cl_gemm/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include("../../build_utils/CMakeLists.txt") 2 | SetRequiredCMakeVersion() 3 | cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) 4 | 5 | project(PTI_Samples_OpenCL_GEMM CXX) 6 | SetCompilerFlags() 7 | SetBuildType() 8 | 9 | add_executable(cl_gemm main.cc) 10 | target_include_directories(cl_gemm 11 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 12 | if(CMAKE_INCLUDE_PATH) 13 | target_include_directories(cl_gemm 14 | PUBLIC "${CMAKE_INCLUDE_PATH}") 15 | endif() 16 | 17 | FindOpenCLLibrary(cl_gemm) 18 | FindOpenCLHeaders(cl_gemm) -------------------------------------------------------------------------------- /samples/cl_gemm_inst/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include("../../build_utils/CMakeLists.txt") 2 | SetRequiredCMakeVersion() 3 | cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) 4 | 5 | project(PTI_Samples_OpenCL_GEMM_Instrumented CXX) 6 | SetCompilerFlags() 7 | SetBuildType() 8 | 9 | add_executable(cl_gemm_inst main.cc) 10 | target_include_directories(cl_gemm_inst 11 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 12 | if(CMAKE_INCLUDE_PATH) 13 | target_include_directories(cl_gemm_inst 14 | PUBLIC "${CMAKE_INCLUDE_PATH}") 15 | endif() 16 | 17 | FindOpenCLLibrary(cl_gemm_inst) 18 | FindOpenCLHeaders(cl_gemm_inst) -------------------------------------------------------------------------------- /samples/cl_gemm_itt/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include("../../build_utils/CMakeLists.txt") 2 | SetRequiredCMakeVersion() 3 | cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) 4 | 5 | project(PTI_Samples_OpenCL_GEMM_ITT CXX) 6 | SetCompilerFlags() 7 | SetBuildType() 8 | 9 | add_executable(cl_gemm_itt main.cc ittnotify.cc) 10 | target_include_directories(cl_gemm_itt 11 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 12 | if(CMAKE_INCLUDE_PATH) 13 | target_include_directories(cl_gemm_itt 14 | PUBLIC "${CMAKE_INCLUDE_PATH}") 15 | endif() 16 | 17 | FindOpenCLLibrary(cl_gemm_itt) 18 | FindOpenCLHeaders(cl_gemm_itt) 19 | 20 | GetITT(cl_gemm_itt) 21 | 22 | if(UNIX) 23 | target_link_libraries(cl_gemm_itt 24 | dl) 25 | endif() -------------------------------------------------------------------------------- /samples/cl_gemm_itt/ittnotify.cc: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | 7 | #define UNICODE 8 | #include -------------------------------------------------------------------------------- /samples/cl_gpu_metrics/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include("../../build_utils/CMakeLists.txt") 2 | SetRequiredCMakeVersion() 3 | cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) 4 | 5 | project(PTI_Samples_OpenCL_GPU_Metrics CXX) 6 | SetCompilerFlags() 7 | SetBuildType() 8 | 9 | # Tool Library 10 | 11 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DPTI_KERNEL_INTERVALS=1") 12 | add_library(clt_gpu_metrics SHARED 13 | "${PROJECT_SOURCE_DIR}/../../loader/init.cc" 14 | tool.cc) 15 | target_include_directories(clt_gpu_metrics 16 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils" 17 | PRIVATE "${PROJECT_SOURCE_DIR}/../cl_hot_kernels") 18 | if(CMAKE_INCLUDE_PATH) 19 | target_include_directories(clt_gpu_metrics 20 | PUBLIC "${CMAKE_INCLUDE_PATH}") 21 | endif() 22 | if(UNIX) 23 | target_link_libraries(clt_gpu_metrics 24 | pthread) 25 | endif() 26 | 27 | FindOpenCLLibrary(clt_gpu_metrics) 28 | FindOpenCLHeaders(clt_gpu_metrics) 29 | 30 | GetOpenCLTracingHeaders(clt_gpu_metrics) 31 | 32 | GetMDHeaders(clt_gpu_metrics) 33 | CheckForMDLibrary(clt_gpu_metrics) 34 | 35 | # Loader 36 | 37 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTOOL_NAME=clt_gpu_metrics") 38 | add_executable(cl_gpu_metrics "${PROJECT_SOURCE_DIR}/../../loader/loader.cc") 39 | target_include_directories(cl_gpu_metrics 40 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 41 | if(UNIX) 42 | target_link_libraries(cl_gpu_metrics 43 | dl) 44 | endif() -------------------------------------------------------------------------------- /samples/cl_gpu_query/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include("../../build_utils/CMakeLists.txt") 2 | SetRequiredCMakeVersion() 3 | cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) 4 | 5 | project(PTI_Samples_OpenCL_GPU_Query CXX) 6 | SetCompilerFlags() 7 | SetBuildType() 8 | 9 | # Tool Library 10 | 11 | add_library(clt_gpu_query SHARED 12 | "${PROJECT_SOURCE_DIR}/../../loader/init.cc" 13 | "${PROJECT_SOURCE_DIR}/../../utils/trace_guard.cc" 14 | tool.cc) 15 | target_include_directories(clt_gpu_query 16 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 17 | if(CMAKE_INCLUDE_PATH) 18 | target_include_directories(clt_gpu_query 19 | PUBLIC "${CMAKE_INCLUDE_PATH}") 20 | endif() 21 | 22 | FindOpenCLLibrary(clt_gpu_query) 23 | FindOpenCLHeaders(clt_gpu_query) 24 | 25 | GetOpenCLTracingHeaders(clt_gpu_query) 26 | 27 | GetMDHeaders(clt_gpu_query) 28 | CheckForMDLibrary(clt_gpu_query) 29 | 30 | # Loader 31 | 32 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTOOL_NAME=clt_gpu_query") 33 | add_executable(cl_gpu_query "${PROJECT_SOURCE_DIR}/../../loader/loader.cc") 34 | target_include_directories(cl_gpu_query 35 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 36 | if(UNIX) 37 | target_link_libraries(cl_gpu_query 38 | dl) 39 | endif() -------------------------------------------------------------------------------- /samples/cl_hot_functions/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include("../../build_utils/CMakeLists.txt") 2 | SetRequiredCMakeVersion() 3 | cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) 4 | 5 | project(PTI_Samples_OpenCL_Hot_Functions CXX) 6 | SetCompilerFlags() 7 | SetBuildType() 8 | 9 | # Tool Library 10 | 11 | add_library(clt_hot_functions SHARED 12 | "${PROJECT_SOURCE_DIR}/../../loader/init.cc" 13 | tool.cc) 14 | target_include_directories(clt_hot_functions 15 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 16 | if(CMAKE_INCLUDE_PATH) 17 | target_include_directories(clt_hot_functions 18 | PUBLIC "${CMAKE_INCLUDE_PATH}") 19 | endif() 20 | 21 | FindOpenCLLibrary(clt_hot_functions) 22 | FindOpenCLHeaders(clt_hot_functions) 23 | 24 | GetOpenCLTracingHeaders(clt_hot_functions) 25 | 26 | # Loader 27 | 28 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTOOL_NAME=clt_hot_functions") 29 | add_executable(cl_hot_functions "${PROJECT_SOURCE_DIR}/../../loader/loader.cc") 30 | target_include_directories(cl_hot_functions 31 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 32 | if(UNIX) 33 | target_link_libraries(cl_hot_functions 34 | dl) 35 | endif() -------------------------------------------------------------------------------- /samples/cl_hot_kernels/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include("../../build_utils/CMakeLists.txt") 2 | SetRequiredCMakeVersion() 3 | cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) 4 | 5 | project(PTI_Samples_OpenCL_Hot_Kernels CXX) 6 | SetCompilerFlags() 7 | SetBuildType() 8 | 9 | # Tool Library 10 | 11 | add_library(clt_hot_kernels SHARED 12 | "${PROJECT_SOURCE_DIR}/../../loader/init.cc" 13 | tool.cc) 14 | target_include_directories(clt_hot_kernels 15 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 16 | if(CMAKE_INCLUDE_PATH) 17 | target_include_directories(clt_hot_kernels 18 | PUBLIC "${CMAKE_INCLUDE_PATH}") 19 | endif() 20 | 21 | FindOpenCLLibrary(clt_hot_kernels) 22 | FindOpenCLHeaders(clt_hot_kernels) 23 | 24 | GetOpenCLTracingHeaders(clt_hot_kernels) 25 | 26 | # Loader 27 | 28 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTOOL_NAME=clt_hot_kernels") 29 | add_executable(cl_hot_kernels "${PROJECT_SOURCE_DIR}/../../loader/loader.cc") 30 | target_include_directories(cl_hot_kernels 31 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 32 | if(UNIX) 33 | target_link_libraries(cl_hot_kernels 34 | dl) 35 | endif() -------------------------------------------------------------------------------- /samples/dpc_gemm/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include("../../build_utils/CMakeLists.txt") 2 | SetRequiredCMakeVersion() 3 | cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) 4 | 5 | if(WIN32) 6 | set(CMAKE_CXX_COMPILER "dpcpp-cl.exe") 7 | else() 8 | set(CMAKE_CXX_COMPILER "icpx") 9 | endif() 10 | 11 | project(PTI_Samples_DPC_GEMM CXX) 12 | SetCompilerFlags() 13 | SetBuildType() 14 | 15 | add_executable(dpc_gemm main.cc) 16 | 17 | target_compile_options(dpc_gemm PUBLIC -fsycl -gline-tables-only) 18 | 19 | # target_link_options CMake >= 3.13 20 | set_target_properties(dpc_gemm PROPERTIES LINK_FLAGS "-fsycl -gline-tables-only") 21 | 22 | target_include_directories(dpc_gemm 23 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 24 | if(CMAKE_INCLUDE_PATH) 25 | target_include_directories(dpc_gemm 26 | PUBLIC "${CMAKE_INCLUDE_PATH}") 27 | endif() 28 | -------------------------------------------------------------------------------- /samples/dpc_info/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include("../../build_utils/CMakeLists.txt") 2 | SetRequiredCMakeVersion() 3 | cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) 4 | 5 | if(WIN32) 6 | set(CMAKE_CXX_COMPILER "dpcpp-cl.exe") 7 | set(CMAKE_GENERATOR_TOOLSET "Intel(R) oneAPI DPC++ Compiler") 8 | else() 9 | set(CMAKE_CXX_COMPILER "icpx") 10 | endif() 11 | 12 | project(PTI_Samples_DPC_Info CXX) 13 | SetCompilerFlags() 14 | SetBuildType() 15 | 16 | add_executable(dpc_info main.cc) 17 | 18 | if(NOT WIN32) 19 | target_compile_options(dpc_info PUBLIC -fsycl) 20 | set_target_properties(dpc_info PROPERTIES LINK_FLAGS "-fsycl") 21 | endif() 22 | 23 | if(CMAKE_INCLUDE_PATH) 24 | target_include_directories(dpc_info 25 | PUBLIC "${CMAKE_INCLUDE_PATH}") 26 | endif() 27 | -------------------------------------------------------------------------------- /samples/gpu_perfmon_read/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include("../../build_utils/CMakeLists.txt") 2 | SetRequiredCMakeVersion() 3 | cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) 4 | 5 | project(PTI_Samples_GPU_PerfMon_Read CXX) 6 | SetCompilerFlags() 7 | SetBuildType() 8 | 9 | # Tool Library 10 | 11 | add_library(gput_perfmon_read SHARED "${PROJECT_SOURCE_DIR}/../../loader/init.cc" tool.cc) 12 | target_include_directories(gput_perfmon_read 13 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 14 | if(CMAKE_INCLUDE_PATH) 15 | target_include_directories(gput_perfmon_read 16 | PUBLIC "${CMAKE_INCLUDE_PATH}") 17 | endif() 18 | 19 | FindIGALibrary(gput_perfmon_read) 20 | GetIGAHeaders(gput_perfmon_read) 21 | 22 | FindGTPinLibrary_legacy(gput_perfmon_read) 23 | GetGTPinHeaders_legacy(gput_perfmon_read) 24 | 25 | # Loader 26 | 27 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTOOL_NAME=gput_perfmon_read") 28 | add_executable(gpu_perfmon_read "${PROJECT_SOURCE_DIR}/../../loader/loader.cc") 29 | target_include_directories(gpu_perfmon_read 30 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 31 | if(UNIX) 32 | target_link_libraries(gpu_perfmon_read 33 | dl) 34 | endif() -------------------------------------------------------------------------------- /samples/gpu_perfmon_read/tool.cc: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | 7 | #include "gpu_perfmon_collector.h" 8 | 9 | static GpuPerfMonCollector* collector = nullptr; 10 | 11 | // External Tool Interface //////////////////////////////////////////////////// 12 | 13 | extern "C" PTI_EXPORT 14 | void Usage() { 15 | std::cout << 16 | "Usage: ./gpu_perfmon_read[.exe] " << 17 | std::endl; 18 | } 19 | 20 | extern "C" PTI_EXPORT 21 | int ParseArgs(int argc, char* argv[]) { 22 | return 1; 23 | } 24 | 25 | extern "C" PTI_EXPORT 26 | void SetToolEnv() { 27 | utils::SetEnv("ZET_ENABLE_API_TRACING_EXP", "1"); 28 | utils::SetEnv("ZET_ENABLE_PROGRAM_INSTRUMENTATION", "1"); 29 | } 30 | 31 | // Internal Tool Functionality //////////////////////////////////////////////// 32 | 33 | static void PrintResults() { 34 | PTI_ASSERT(collector != nullptr); 35 | 36 | const KernelDataMap& kernel_data_map = collector->GetKernelDataMap(); 37 | if (kernel_data_map.size() == 0) { 38 | std::cerr << "[WARNING] No kernels were collected" << std::endl; 39 | return; 40 | } 41 | 42 | std::cerr << std::endl; 43 | GpuPerfMonCollector::PrintResults(kernel_data_map); 44 | } 45 | 46 | // Internal Tool Interface //////////////////////////////////////////////////// 47 | 48 | void EnableProfiling() { 49 | PTI_ASSERT(collector == nullptr); 50 | collector = GpuPerfMonCollector::Create(); 51 | } 52 | 53 | void DisableProfiling() { 54 | if (collector != nullptr) { 55 | PrintResults(); 56 | delete collector; 57 | } 58 | } -------------------------------------------------------------------------------- /samples/gpu_perfmon_set/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include("../../build_utils/CMakeLists.txt") 2 | SetRequiredCMakeVersion() 3 | cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) 4 | 5 | project(PTI_Samples_GPU_PerfMon_Set CXX) 6 | SetCompilerFlags() 7 | SetBuildType() 8 | 9 | if(NOT UNIX) 10 | message(FATAL_ERROR "Linux only is supported") 11 | endif() 12 | 13 | add_executable(gpu_perfmon_set main.cc) 14 | target_include_directories(gpu_perfmon_set 15 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 16 | if(CMAKE_INCLUDE_PATH) 17 | target_include_directories(gpu_perfmon_set 18 | PUBLIC "${CMAKE_INCLUDE_PATH}") 19 | endif() 20 | 21 | FindDRMLibrary(gpu_perfmon_set) 22 | CheckDRMHeaders(gpu_perfmon_set) -------------------------------------------------------------------------------- /samples/gpu_perfmon_set/README.md: -------------------------------------------------------------------------------- 1 | # GPU PerfMon Set 2 | ## Overview 3 | This sample utility allows to tune Intel(R) Processor Graphics execution unit (EU) performance monitoring register to collect hardware events of some type. 4 | 5 | The utility has a single parameter that contains the value one would like to set for events collection. Actual values are described in Programmer's Reference Manual for Intel(R) Processor Graphics, [Volume 14: Observability](https://01.org/sites/default/files/documentation/intel-gfx-prm-osrc-kbl-vol14-observability.pdf), e.g.: 6 | ```sh 7 | ./gpu_perfmon_set 5 # EU Stall Collection 8 | ``` 9 | On start the utility will configure the driver, and GPU will collect desired events until user press ENTER button. Expected output should be the following: 10 | ``` 11 | Configuration with the given GUID is already added 12 | GPU PefMon configuration is completed 13 | Press ENTER to deconfigure the driver... 14 | ``` 15 | To read the collected values for the event one should use binary instrumentation techniques or inline assembly. 16 | 17 | ## Supported OS 18 | - Linux 19 | 20 | ## Prerequisites 21 | - [CMake](https://cmake.org/) (version 3.12 and above) 22 | - [Git](https://git-scm.com/) (version 1.8 and above) 23 | - [Python](https://www.python.org/) (version 2.7 and above) 24 | - [libdrm](https://gitlab.freedesktop.org/mesa/drm) 25 | 26 | ## Build and Run 27 | ### Linux 28 | Run the following commands to build the sample: 29 | ```sh 30 | cd /samples/gpu_perfmon_set 31 | mkdir build 32 | cd build 33 | cmake -DCMAKE_BUILD_TYPE=Release .. 34 | make 35 | ``` 36 | Use this command line to run the utility: 37 | ```sh 38 | ./gpu_perfmon_set 39 | ``` -------------------------------------------------------------------------------- /samples/omp_gemm/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include("../../build_utils/CMakeLists.txt") 2 | SetRequiredCMakeVersion() 3 | cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) 4 | 5 | if(WIN32) 6 | set(CMAKE_CXX_COMPILER "icl.exe") 7 | else() 8 | set(CMAKE_CXX_COMPILER "icpx") 9 | endif() 10 | 11 | project(PTI_Samples_OpenMP_GEMM CXX) 12 | SetCompilerFlags() 13 | SetBuildType() 14 | 15 | if(WIN32) 16 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qnextgen /Qopenmp") 17 | else() 18 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fiopenmp -fopenmp-targets=spir64") 19 | endif() 20 | 21 | add_executable(omp_gemm main.cc) 22 | target_include_directories(omp_gemm 23 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 24 | if(CMAKE_INCLUDE_PATH) 25 | target_include_directories(omp_gemm 26 | PUBLIC "${CMAKE_INCLUDE_PATH}") 27 | endif() -------------------------------------------------------------------------------- /samples/omp_hot_regions/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include("../../build_utils/CMakeLists.txt") 2 | SetRequiredCMakeVersion() 3 | cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) 4 | 5 | if(WIN32) 6 | set(CMAKE_CXX_COMPILER "icl.exe") 7 | else() 8 | set(CMAKE_CXX_COMPILER "icpx") 9 | endif() 10 | 11 | project(PTI_Samples_OpenMP_Hot_Regions CXX) 12 | SetCompilerFlags() 13 | SetBuildType() 14 | 15 | CheckForOMPTHeaders() 16 | 17 | add_library(omp_hot_regions SHARED tool.cc) 18 | target_include_directories(omp_hot_regions 19 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 20 | if(CMAKE_INCLUDE_PATH) 21 | target_include_directories(omp_hot_regions 22 | PUBLIC "${CMAKE_INCLUDE_PATH}") 23 | endif() -------------------------------------------------------------------------------- /samples/ze_debug_info/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include("../../build_utils/CMakeLists.txt") 2 | SetRequiredCMakeVersion() 3 | cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) 4 | 5 | project(PTI_Samples_L0_Debug_Info CXX) 6 | SetCompilerFlags() 7 | SetBuildType() 8 | 9 | # Tool Library 10 | 11 | add_library(zet_debug_info SHARED "${PROJECT_SOURCE_DIR}/../../loader/init.cc" tool.cc) 12 | target_include_directories(zet_debug_info 13 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 14 | if(CMAKE_INCLUDE_PATH) 15 | target_include_directories(zet_debug_info 16 | PUBLIC "${CMAKE_INCLUDE_PATH}") 17 | endif() 18 | 19 | add_subdirectory("${PTI_CMAKE_MACRO_DIR}/../utils/gpu_elf_parser" debug_info_parser) 20 | FindPtiElfParserHeaders(zet_debug_info) 21 | target_link_libraries(zet_debug_info debug_info_parser) 22 | 23 | FindL0Library(zet_debug_info) 24 | FindL0Headers(zet_debug_info) 25 | 26 | FindIGALibrary(zet_debug_info) 27 | GetIGAHeaders(zet_debug_info) 28 | 29 | GetIGCHeaders(zet_debug_info) 30 | GetGmmHeaders(zet_debug_info) 31 | 32 | # Loader 33 | 34 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTOOL_NAME=zet_debug_info") 35 | add_executable(ze_debug_info "${PROJECT_SOURCE_DIR}/../../loader/loader.cc") 36 | target_include_directories(ze_debug_info 37 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 38 | if(UNIX) 39 | target_link_libraries(ze_debug_info 40 | dl) 41 | endif() 42 | -------------------------------------------------------------------------------- /samples/ze_debug_info/tool.cc: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | 7 | #include "ze_debug_info_collector.h" 8 | 9 | static ZeDebugInfoCollector* collector = nullptr; 10 | 11 | // External Tool Interface //////////////////////////////////////////////////// 12 | 13 | extern "C" PTI_EXPORT void Usage() { 14 | std::cout << "Usage: ./ze_debug_info[.exe] " << std::endl; 15 | } 16 | 17 | extern "C" PTI_EXPORT int ParseArgs(int argc, char* argv[]) { return 1; } 18 | 19 | extern "C" PTI_EXPORT void SetToolEnv() { utils::SetEnv("ZE_ENABLE_TRACING_LAYER", "1"); } 20 | 21 | // Internal Tool Functionality //////////////////////////////////////////////// 22 | 23 | static void PrintResults() { 24 | PTI_ASSERT(collector != nullptr); 25 | 26 | const KernelDebugInfoMap& debug_info_map = collector->GetKernelDebugInfoMap(); 27 | if (debug_info_map.size() == 0) { 28 | return; 29 | } 30 | 31 | std::cerr << std::endl; 32 | for (auto pair : debug_info_map) { 33 | ZeDebugInfoCollector::PrintKernelDebugInfo(pair.first, pair.second); 34 | } 35 | } 36 | 37 | // Internal Tool Interface //////////////////////////////////////////////////// 38 | 39 | void EnableProfiling() { 40 | ze_result_t status = ZE_RESULT_SUCCESS; 41 | status = zeInit(ZE_INIT_FLAG_GPU_ONLY); 42 | PTI_ASSERT(status == ZE_RESULT_SUCCESS); 43 | collector = ZeDebugInfoCollector::Create(); 44 | } 45 | 46 | void DisableProfiling() { 47 | if (collector != nullptr) { 48 | collector->DisableTracing(); 49 | PrintResults(); 50 | delete collector; 51 | } 52 | } -------------------------------------------------------------------------------- /samples/ze_gemm/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include("../../build_utils/CMakeLists.txt") 2 | SetRequiredCMakeVersion() 3 | cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) 4 | 5 | project(PTI_Samples_L0_GEMM CXX) 6 | SetCompilerFlags() 7 | SetBuildType() 8 | 9 | add_executable(ze_gemm main.cc) 10 | target_include_directories(ze_gemm 11 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 12 | if(CMAKE_INCLUDE_PATH) 13 | target_include_directories(ze_gemm 14 | PUBLIC "${CMAKE_INCLUDE_PATH}") 15 | endif() 16 | 17 | add_custom_command(TARGET ze_gemm PRE_BUILD 18 | COMMAND ${CMAKE_COMMAND} -E copy_if_different 19 | ${PROJECT_SOURCE_DIR}/gemm.spv 20 | ${CMAKE_BINARY_DIR}/gemm.spv) 21 | add_custom_command(TARGET ze_gemm PRE_BUILD 22 | COMMAND ${CMAKE_COMMAND} -E copy_if_different 23 | ${PROJECT_SOURCE_DIR}/gemm.cl 24 | ${CMAKE_BINARY_DIR}/gemm.cl) 25 | 26 | FindL0Library(ze_gemm) 27 | FindL0Headers(ze_gemm) 28 | 29 | if(UNIX) 30 | target_link_libraries(ze_gemm 31 | dl) 32 | endif() -------------------------------------------------------------------------------- /samples/ze_gemm/gemm.cl: -------------------------------------------------------------------------------- 1 | __kernel void GEMM(__global float* a, __global float* b, 2 | __global float* c, int size) { 3 | int j = get_global_id(0); 4 | int i = get_global_id(1); 5 | float sum = 0.0f; 6 | for (int k = 0; k < size; ++k) { 7 | sum += a[i * size + k] * b[k * size + j]; 8 | } 9 | c[i * size + j] = sum; 10 | } -------------------------------------------------------------------------------- /samples/ze_gemm/gemm.spv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/samples/ze_gemm/gemm.spv -------------------------------------------------------------------------------- /samples/ze_hot_functions/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include("../../build_utils/CMakeLists.txt") 2 | SetRequiredCMakeVersion() 3 | cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) 4 | 5 | project(PTI_Samples_L0_Hot_Functions CXX) 6 | SetCompilerFlags() 7 | SetBuildType() 8 | 9 | # Tool Library 10 | 11 | add_library(zet_hot_functions SHARED 12 | "${PROJECT_SOURCE_DIR}/../../loader/init.cc" 13 | tool.cc) 14 | target_include_directories(zet_hot_functions 15 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 16 | if(CMAKE_INCLUDE_PATH) 17 | target_include_directories(zet_hot_functions 18 | PUBLIC "${CMAKE_INCLUDE_PATH}") 19 | endif() 20 | 21 | FindL0Library(zet_hot_functions) 22 | FindL0Headers(zet_hot_functions) 23 | 24 | # Loader 25 | 26 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTOOL_NAME=zet_hot_functions") 27 | add_executable(ze_hot_functions "${PROJECT_SOURCE_DIR}/../../loader/loader.cc") 28 | target_include_directories(ze_hot_functions 29 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 30 | if(UNIX) 31 | target_link_libraries(ze_hot_functions 32 | dl) 33 | endif() -------------------------------------------------------------------------------- /samples/ze_hot_kernels/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include("../../build_utils/CMakeLists.txt") 2 | SetRequiredCMakeVersion() 3 | cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) 4 | 5 | project(PTI_Samples_L0_Hot_Kernels CXX) 6 | SetCompilerFlags() 7 | SetBuildType() 8 | 9 | # Tool Library 10 | 11 | add_library(zet_hot_kernels SHARED 12 | "${PROJECT_SOURCE_DIR}/../../loader/init.cc" 13 | tool.cc) 14 | target_include_directories(zet_hot_kernels 15 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 16 | if(CMAKE_INCLUDE_PATH) 17 | target_include_directories(zet_hot_kernels 18 | PUBLIC "${CMAKE_INCLUDE_PATH}") 19 | endif() 20 | 21 | FindL0Library(zet_hot_kernels) 22 | FindL0Headers(zet_hot_kernels) 23 | 24 | # Loader 25 | 26 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTOOL_NAME=zet_hot_kernels") 27 | add_executable(ze_hot_kernels "${PROJECT_SOURCE_DIR}/../../loader/loader.cc") 28 | target_include_directories(ze_hot_kernels 29 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 30 | if(UNIX) 31 | target_link_libraries(ze_hot_kernels 32 | dl) 33 | endif() -------------------------------------------------------------------------------- /samples/ze_info/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include("../../build_utils/CMakeLists.txt") 2 | SetRequiredCMakeVersion() 3 | cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) 4 | 5 | project(PTI_Samples_L0_Info CXX) 6 | SetCompilerFlags() 7 | SetBuildType() 8 | 9 | add_executable(ze_info main.cc) 10 | target_include_directories(ze_info 11 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 12 | if(CMAKE_INCLUDE_PATH) 13 | target_include_directories(ze_info 14 | PUBLIC "${CMAKE_INCLUDE_PATH}") 15 | endif() 16 | 17 | FindL0Library(ze_info) 18 | FindL0Headers(ze_info) 19 | -------------------------------------------------------------------------------- /samples/ze_metric_info/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include("../../build_utils/CMakeLists.txt") 2 | SetRequiredCMakeVersion() 3 | cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) 4 | 5 | project(PTI_Samples_L0_Metric_Info CXX) 6 | SetCompilerFlags() 7 | SetBuildType() 8 | 9 | add_executable(ze_metric_info main.cc) 10 | target_include_directories(ze_metric_info 11 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 12 | if(CMAKE_INCLUDE_PATH) 13 | target_include_directories(ze_metric_info 14 | PUBLIC "${CMAKE_INCLUDE_PATH}") 15 | endif() 16 | 17 | FindL0Library(ze_metric_info) 18 | FindL0Headers(ze_metric_info) 19 | 20 | CheckForMDLibrary(ze_metric_info) 21 | CheckForMetricsLibrary() 22 | 23 | if(UNIX) 24 | target_link_libraries(ze_metric_info 25 | dl) 26 | endif() -------------------------------------------------------------------------------- /samples/ze_metric_query/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include("../../build_utils/CMakeLists.txt") 2 | SetRequiredCMakeVersion() 3 | cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) 4 | 5 | project(PTI_Samples_L0_Metric_Query CXX) 6 | SetCompilerFlags() 7 | SetBuildType() 8 | 9 | # Tool Library 10 | 11 | add_library(zet_metric_query SHARED "${PROJECT_SOURCE_DIR}/../../loader/init.cc" tool.cc) 12 | target_include_directories(zet_metric_query 13 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 14 | if(CMAKE_INCLUDE_PATH) 15 | target_include_directories(zet_metric_query 16 | PUBLIC "${CMAKE_INCLUDE_PATH}") 17 | endif() 18 | 19 | FindL0Library(zet_metric_query) 20 | FindL0Headers(zet_metric_query) 21 | 22 | CheckForMDLibrary(zet_metric_query) 23 | CheckForMetricsLibrary() 24 | 25 | # Loader 26 | 27 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTOOL_NAME=zet_metric_query") 28 | add_executable(ze_metric_query "${PROJECT_SOURCE_DIR}/../../loader/loader.cc") 29 | target_include_directories(ze_metric_query 30 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 31 | if(UNIX) 32 | target_link_libraries(ze_metric_query 33 | dl) 34 | endif() -------------------------------------------------------------------------------- /samples/ze_metric_streamer/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include("../../build_utils/CMakeLists.txt") 2 | SetRequiredCMakeVersion() 3 | cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) 4 | 5 | project(PTI_Samples_L0_Metric_Tracer CXX) 6 | SetCompilerFlags() 7 | SetBuildType() 8 | 9 | # Tool Library 10 | 11 | add_library(zet_metric_streamer SHARED 12 | "${PROJECT_SOURCE_DIR}/../../loader/init.cc" 13 | tool.cc) 14 | target_include_directories(zet_metric_streamer 15 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils" 16 | PRIVATE "${PROJECT_SOURCE_DIR}/../ze_hot_kernels") 17 | if(CMAKE_INCLUDE_PATH) 18 | target_include_directories(zet_metric_streamer 19 | PUBLIC "${CMAKE_INCLUDE_PATH}") 20 | endif() 21 | 22 | if(UNIX) 23 | target_link_libraries(zet_metric_streamer 24 | pthread) 25 | endif() 26 | 27 | FindL0Library(zet_metric_streamer) 28 | FindL0Headers(zet_metric_streamer) 29 | 30 | CheckForMDLibrary(zet_metric_streamer) 31 | CheckForMetricsLibrary() 32 | 33 | # Loader 34 | 35 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTOOL_NAME=zet_metric_streamer") 36 | add_executable(ze_metric_streamer "${PROJECT_SOURCE_DIR}/../../loader/loader.cc") 37 | target_include_directories(ze_metric_streamer 38 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 39 | if(UNIX) 40 | target_link_libraries(ze_metric_streamer 41 | dl) 42 | endif() -------------------------------------------------------------------------------- /samples/ze_sysman/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include("../../build_utils/CMakeLists.txt") 2 | SetRequiredCMakeVersion() 3 | cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) 4 | 5 | project(PTI_Samples_L0_Sysmon CXX) 6 | SetCompilerFlags() 7 | SetBuildType() 8 | 9 | add_executable(ze_sysman main.cc) 10 | target_include_directories(ze_sysman 11 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 12 | if(CMAKE_INCLUDE_PATH) 13 | target_include_directories(ze_sysman 14 | PUBLIC "${CMAKE_INCLUDE_PATH}") 15 | endif() 16 | 17 | FindL0Library(ze_sysman) 18 | FindL0Headers(ze_sysman) -------------------------------------------------------------------------------- /samples/ze_sysman/README.md: -------------------------------------------------------------------------------- 1 | # Level Zero System Management 2 | ## Overview 3 | This sample application provides basic static and dynamic information for GPU device: 4 | 5 | Output should look like the following: 6 | ``` 7 | Device: Intel(R) Iris(R) Plus Graphics 655 [0x3ea5] 8 | -- Subdevice Count: 0 9 | -- Driver Version: A7F72C54A5788663395C411 10 | -- PCI Bus: 0000:00:02.0 11 | -- Frequency Domains: 1 12 | ---- [0] Clock EU Freq Range (MHz): 300 - 1200 (changeable) 13 | ---- [0] Current Clock EU Freq (MHz): 300 14 | -- Temperature Sensors: 5 15 | ---- [2] Core Temperature (C): 34 16 | ``` 17 | 18 | ## Supported OS 19 | - Linux 20 | - Windows (*under development*) 21 | 22 | ## Prerequisites 23 | - [CMake](https://cmake.org/) (version 3.12 and above) 24 | - [Git](https://git-scm.com/) (version 1.8 and above) 25 | - [Python](https://www.python.org/) (version 2.7 and above) 26 | - [oneAPI Level Zero loader](https://github.com/oneapi-src/level-zero) 27 | - [Intel(R) Graphics Compute Runtime for oneAPI Level Zero and OpenCL(TM) Driver](https://github.com/intel/compute-runtime) 28 | 29 | ## Build and Run 30 | ### Linux 31 | Run the following commands to build the sample: 32 | ```sh 33 | cd /samples/ze_sysman 34 | mkdir build 35 | cd build 36 | cmake -DCMAKE_BUILD_TYPE=Release .. 37 | make 38 | ``` 39 | Use this command line to run the utility: 40 | ```sh 41 | ./ze_sysman 42 | ``` 43 | ### Windows 44 | Use Microsoft* Visual Studio x64 command prompt to run the following commands and build the sample: 45 | ```sh 46 | cd \samples\ze_sysman 47 | mkdir build 48 | cd build 49 | cmake -G "NMake Makefiles" -DCMAKE_BUILD_TYPE=Release -DCMAKE_LIBRARY_PATH=\lib -DCMAKE_INCLUDE_PATH=\include .. 50 | nmake 51 | ``` 52 | Use this command line to run the application: 53 | ```sh 54 | ze_sysman.exe 55 | ``` -------------------------------------------------------------------------------- /sdk/.clang-tidy: -------------------------------------------------------------------------------- 1 | Checks: '-*,google*,cppcoreguidelines*,readability*' 2 | CheckOptions: 3 | - key: readability-identifier-naming.GlobalConstantCase 4 | value: CamelCase 5 | - key: readability-identifier-naming.GlobalConstantPrefix 6 | value: k 7 | - key: readability-identifier-naming.ClassCase 8 | value: CamelCase 9 | - key: readability-identifier-naming.EnumCase 10 | value: CamelCase 11 | - key: readability-identifier-naming.EnumConstantCase 12 | value: CamelCase 13 | - key: readability-identifier-naming.EnumConstantPrefix 14 | value: k 15 | - key: readability-identifier-naming.FunctionCase 16 | value: CamelCase 17 | - key: readability-identifier-naming.ClassMemberCase 18 | value: lower_case 19 | - key: readability-identifier-naming.ClassMemberSuffix 20 | value: _ 21 | - key: readability-identifier-naming.VariableCase 22 | value: lower_case 23 | - key: readability-identifier-naming.IgnoreMainLikeFunctions 24 | value: 1 25 | - key: readability-implicit-bool-conversion.AllowPointerConditions 26 | value: 1 27 | - key: readability-implicit-bool-conversion.AllowIntegerConditions 28 | value: 1 29 | -------------------------------------------------------------------------------- /sdk/.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | end_of_line = lf 5 | insert_final_newline = true 6 | trim_trailing_whitespace = true 7 | 8 | [*.{c,cc,h}] 9 | indent_style = space 10 | indent_size = 2 11 | max_line_length = 100 12 | 13 | [{CMakeLists.txt,*.cmake}] 14 | indent_style = space 15 | indent_size = 2 16 | max_line_length = 80 17 | 18 | [*.py] 19 | indent_style = space 20 | indent_size = 4 21 | profile = black 22 | -------------------------------------------------------------------------------- /sdk/.gitignore: -------------------------------------------------------------------------------- 1 | build*/ 2 | *~ 3 | venv/ 4 | .vscode/ 5 | .vs/ 6 | .vim/ 7 | .cache/ 8 | compile_commands.json 9 | CMakeUserPresets.json 10 | error_diff.txt 11 | wbr 12 | -------------------------------------------------------------------------------- /sdk/PtiConfig.cmake: -------------------------------------------------------------------------------- 1 | # https://cmake.org/cmake/help/latest/guide/importing-exporting/index.html 2 | include("${CMAKE_CURRENT_LIST_DIR}/PtiTargets.cmake") 3 | 4 | include(CMakeFindDependencyMacro) 5 | find_dependency(Threads) 6 | -------------------------------------------------------------------------------- /sdk/TODO.md: -------------------------------------------------------------------------------- 1 | # TODO list for Profiling Tools Interfaces SDK 2 | 3 | 1. Make a documentation 4 | 2. Clean and optimize View records 5 | - Compact them, e.g., remove redundant fields, compact other fields. 6 | - If decided to keep - make records fields representing context, device etc. - back-end agnostic 7 | - Remove (?) `_process_id` field 8 | 3. Clarify and properly define `pti_view_memcpy_type` and `pti_view_memory_type` 9 | 4. Change type of _pci_address in all records to numerical. 10 | 5. Structurize uuid -- is currently a uint8_t array of PTI_MAX_DEVICE_UUID_SIZE. 11 | 12 | -------------------------------------------------------------------------------- /sdk/VERSION: -------------------------------------------------------------------------------- 1 | 0.12.3 2 | -------------------------------------------------------------------------------- /sdk/cmake/Modules/pti_versioninfo.rc.in: -------------------------------------------------------------------------------- 1 | #define VER_COMPANY_NAME_STR "Intel Corporation\0" 2 | 3 | #define VER_PRODUCT_DESCRIPTION_STR "@PROJECT_HOMEPAGE_URL@\0" 4 | 5 | #define VER_INTERNAL_NAME_STR "@PROJECT_NAME@\0" 6 | 7 | #define VER_LEGAL_COPYRIGHT_STR "@PTI_COPYRIGHT@\0" 8 | 9 | #define VER_PRODUCT_NAME_STR "Profiling Tools Interface\0" 10 | 11 | #define VER_FILEVERSION @PROJECT_VERSION_MAJOR@,@PROJECT_VERSION_MINOR@,@PROJECT_VERSION_PATCH@ 12 | #define VER_FILEVERSION_STR "@PROJECT_VERSION_MAJOR@.@PROJECT_VERSION_MINOR@.@PROJECT_VERSION_PATCH@\0" 13 | 14 | 15 | #define VER_PRODUCTVERSION @PROJECT_VERSION_MAJOR@,@PROJECT_VERSION_MINOR@,@PROJECT_VERSION_PATCH@ 16 | #define VER_PRODUCTVERSION_STR "@PROJECT_VERSION_MAJOR@.@PROJECT_VERSION_MINOR@.@PROJECT_VERSION_PATCH@\0" 17 | 18 | 19 | 1 VERSIONINFO 20 | FILEVERSION VER_FILEVERSION 21 | PRODUCTVERSION VER_PRODUCTVERSION 22 | BEGIN 23 | BLOCK "StringFileInfo" 24 | BEGIN 25 | BLOCK "040904b0" 26 | BEGIN 27 | VALUE "CompanyName", VER_COMPANY_NAME_STR 28 | VALUE "FileDescription", VER_PRODUCT_DESCRIPTION_STR 29 | VALUE "FileVersion", VER_FILEVERSION_STR 30 | VALUE "InternalName", VER_INTERNAL_NAME_STR 31 | VALUE "LegalCopyright", VER_LEGAL_COPYRIGHT_STR 32 | VALUE "ProductName", VER_PRODUCT_NAME_STR 33 | VALUE "ProductVersion", VER_PRODUCTVERSION_STR 34 | END 35 | END 36 | BLOCK "VarFileInfo" 37 | BEGIN 38 | VALUE "Translation", 0x409, 1252 39 | END 40 | END 41 | -------------------------------------------------------------------------------- /sdk/cmake/bom_line.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | BOM_DIR=$1 3 | echo BOM_DIR $BOM_DIR 4 | TGT_BOM=$2 5 | echo TGT_BOM $TGT_BOM 6 | INST_DIR=$3 7 | echo INST_DIR $INST_DIR 8 | BRK_TOKEN=$4 9 | echo BRK_TOKEN $BRK_TOKEN 10 | TGT_FILE=$5 11 | echo TGT_FILE $TGT_FILE 12 | 13 | [ "$INST_DIR" = "NULL" ] && INST_DIR="" 14 | 15 | 16 | [[ -d ${BOM_DIR} ]] || mkdir ${BOM_DIR} 17 | echo ALL $@ 18 | 19 | STG1=${TGT_FILE##*/${BRK_TOKEN}/} 20 | STRIPPED_NAME=${STG1#$BRK_TOKEN/} 21 | echo STG1= ${STG1} STRIPPED_NAME ${STRIPPED_NAME} 22 | 23 | if [[ -L "$TGT_FILE" ]]; then 24 | echo ""$':'"INS"${INST_DIR}/${STRIPPED_NAME}$':'CKSUM$':'ONE$'::'INT$':'755$':'$(readlink $TGT_FILE) >> ${TGT_BOM} 25 | else 26 | echo "DEL"/${STRIPPED_NAME}$':'"INS"${INST_DIR}/${STRIPPED_NAME}$':'CKSUM$':'ONE$':'$':'INT$':'$(stat -c '%a' ${TGT_FILE}) >> ${TGT_BOM} 27 | fi 28 | -------------------------------------------------------------------------------- /sdk/cmake/bom_line_win.bat: -------------------------------------------------------------------------------- 1 | bash cmake\bom_line.bash %* 2 | -------------------------------------------------------------------------------- /sdk/cmake/bom_macro.cmake: -------------------------------------------------------------------------------- 1 | macro(bomline bomDir class dirPrefix tgtMatch) 2 | install(CODE " 3 | file( 4 | GLOB_RECURSE CMAKE_FILES_LIST 5 | ${tgtMatch} 6 | ) 7 | message(STATUS \"BEFORE CMAKE\" ${class}) 8 | foreach(FILE \${CMAKE_FILES_LIST}) 9 | message(STATUS \"Installed file1: \${FILE}\") 10 | if (UNIX) 11 | execute_process(COMMAND cmake/bom_line.bash ${bomDir} ${class} ${dirPrefix} \${CMAKE_INSTALL_PREFIX} \${FILE}) 12 | else() 13 | execute_process(COMMAND [[cmake\\bom_line_win.bat]] ${bomDir} ${class} ${dirPrefix} \${CMAKE_INSTALL_PREFIX} \${FILE}) 14 | endif() 15 | endforeach() 16 | message(STATUS \"OUTSIDE CMAKE\" ${class}) 17 | " 18 | COMPONENT Pti_Bom) 19 | endmacro() 20 | 21 | 22 | -------------------------------------------------------------------------------- /sdk/cmake/tags/lin_conda_tags.txt: -------------------------------------------------------------------------------- 1 | path_to_components = 2 | -------------------------------------------------------------------------------- /sdk/cmake/tags/lin_def_tags.txt: -------------------------------------------------------------------------------- 1 | path_to_components = 2 | -------------------------------------------------------------------------------- /sdk/cmake/tags/lin_pip_tags.txt: -------------------------------------------------------------------------------- 1 | path_to_components = 2 | -------------------------------------------------------------------------------- /sdk/cmake/tags/win_conda_tags.txt: -------------------------------------------------------------------------------- 1 | path_to_components = 2 | -------------------------------------------------------------------------------- /sdk/cmake/tags/win_def_tags.txt: -------------------------------------------------------------------------------- 1 | path_to_components = 2 | -------------------------------------------------------------------------------- /sdk/cmake/tags/win_pip_tags.txt: -------------------------------------------------------------------------------- 1 | path_to_components = /Library 2 | -------------------------------------------------------------------------------- /sdk/cmake/toolchains/clang_toolchain.cmake: -------------------------------------------------------------------------------- 1 | set(CMAKE_C_COMPILER "clang") 2 | set(CMAKE_CXX_COMPILER "clang++") 3 | -------------------------------------------------------------------------------- /sdk/cmake/toolchains/icpx_asan_toolchain.cmake: -------------------------------------------------------------------------------- 1 | if (UNIX) 2 | set(CMAKE_C_COMPILER icx) 3 | set(CMAKE_CXX_COMPILER icpx) 4 | endif() 5 | 6 | if (WIN32) 7 | set(CMAKE_C_COMPILER icx) 8 | set(CMAKE_CXX_COMPILER icx) 9 | endif() 10 | 11 | set(CMAKE_CXX_FLAGS_DEBUG_INIT "-fsanitize=address,undefined -fno-omit-frame-pointer -fno-optimize-sibling-calls") 12 | set(CMAKE_C_FLAGS_DEBUG_INIT "-fsanitize=address,undefined") 13 | -------------------------------------------------------------------------------- /sdk/cmake/toolchains/icpx_fuzz_toolchain.cmake: -------------------------------------------------------------------------------- 1 | if (UNIX) 2 | set(CMAKE_C_COMPILER icx) 3 | set(CMAKE_CXX_COMPILER icpx) 4 | endif() 5 | 6 | if (WIN32) 7 | set(CMAKE_C_COMPILER icx) 8 | set(CMAKE_CXX_COMPILER icx) 9 | endif() 10 | 11 | set(CMAKE_CXX_FLAGS_DEBUG_INIT "-fsanitize=address,undefined -fno-omit-frame-pointer -fno-optimize-sibling-calls\ 12 | -fsanitize-ignorelist=${PROJECT_SOURCE_DIR}/fuzz/ubsan-ignore.txt") 13 | set(CMAKE_C_FLAGS_DEBUG_INIT "-fsanitize=address,undefined\ 14 | -fsanitize-ignorelist=${PROJECT_SOURCE_DIR}/fuzz/ubsan-ignore.txt") 15 | -------------------------------------------------------------------------------- /sdk/cmake/toolchains/icpx_toolchain.cmake: -------------------------------------------------------------------------------- 1 | if (UNIX) 2 | set(CMAKE_C_COMPILER icx) 3 | set(CMAKE_CXX_COMPILER icpx) 4 | endif() 5 | 6 | if (WIN32) 7 | set(CMAKE_C_COMPILER icx) 8 | set(CMAKE_CXX_COMPILER icx) 9 | endif() 10 | -------------------------------------------------------------------------------- /sdk/cmake/toolchains/icpx_tsan_toolchain.cmake: -------------------------------------------------------------------------------- 1 | if (UNIX) 2 | set(CMAKE_C_COMPILER icx) 3 | set(CMAKE_CXX_COMPILER icpx) 4 | endif() 5 | 6 | if (WIN32) 7 | set(CMAKE_C_COMPILER icx) 8 | set(CMAKE_CXX_COMPILER icx) 9 | endif() 10 | 11 | set(CMAKE_CXX_FLAGS_DEBUG_INIT "-fsanitize=thread -fno-omit-frame-pointer -fsanitize-recover=all") 12 | set(CMAKE_C_FLAGS_DEBUG_INIT "-fsanitize=thread -fno-omit-frame-pointer -fsanitize-recover=all") 13 | -------------------------------------------------------------------------------- /sdk/docker/sles-15/bldrun.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM registry.suse.com/suse/sle15:15.6.47.20.19 2 | 3 | SHELL ["/bin/bash", "-o", "pipefail", "-c"] 4 | 5 | WORKDIR /tmp 6 | 7 | USER root 8 | 9 | #hadolint ignore=DL3041 10 | RUN zypper refresh && \ 11 | zypper --non-interactive install -y \ 12 | gawk \ 13 | wget \ 14 | cmake \ 15 | gcc \ 16 | gcc-c++ \ 17 | ninja \ 18 | sudo \ 19 | wget \ 20 | awk \ 21 | libprocps8 \ 22 | libsystemd0 \ 23 | procps \ 24 | which \ 25 | git \ 26 | vim \ 27 | python312 28 | 29 | RUN zypper addrepo https://yum.repos.intel.com/oneapi oneAPI && \ 30 | rpm --import https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB && \ 31 | zypper addrepo -f -r https://repositories.intel.com/gpu/sles/15sp6/unified/intel-gpu-15sp6.repo && \ 32 | rpm --import https://repositories.intel.com/gpu/intel-graphics.key 33 | 34 | # 35 | # Install the essential packages from oneAPI to build pti 36 | # instead of intel-oneapi-base-toolkit-2025.1.0 37 | # 38 | RUN zypper refresh && \ 39 | zypper up -y && \ 40 | zypper --non-interactive install -y \ 41 | intel-level-zero-gpu level-zero intel-gsc intel-opencl intel-ocloc \ 42 | intel-media-driver libigfxcmrt7 libvpl2 libvpl-tools libmfxgen1 \ 43 | libigdfcl-devel intel-igc-cm libigfxcmrt-devel level-zero-devel \ 44 | intel-metrics-discovery intel-metrics-discovery-devel \ 45 | intel-metrics-library intel-metrics-library-devel \ 46 | intel-dpcpp-cpp-compiler-2025.1 \ 47 | intel-oneapi-mkl-devel-2025.1 \ 48 | intel-oneapi-dnnl-devel-2025.1 \ 49 | intel-oneapi-ccl-devel-2021.15 50 | 51 | RUN update-alternatives --install /usr/local/bin/python python /usr/bin/python3.12 10 52 | 53 | -------------------------------------------------------------------------------- /sdk/docs/README.md: -------------------------------------------------------------------------------- 1 | # How to build PTI Library documentation 2 | 3 | Our documentation is written in restructured text markup (.rst) and built using [Sphinx](http://www.sphinx-doc.org/en/master/). 4 | 5 | This document explains how to build PTI Library documentation locally. 6 | 7 | ## Prerequisites 8 | ``` 9 | apt install doxygen 10 | pip install -r requirements.txt 11 | ``` 12 | 13 | ## Build documentation 14 | 15 | Do the following to generate HTML output of the documentation: 16 | 17 | 1. Clone PTI repository: 18 | 19 | ``` 20 | git clone https://github.com/intel/pti-gpu 21 | ``` 22 | 23 | 2. Go to the `sdk/docs/sphinx` folder: 24 | 25 | ``` 26 | cd sdk/docs/sphinx 27 | ``` 28 | 29 | 3. Run in the command line: 30 | 31 | ``` 32 | make html 33 | ``` 34 | 35 | 36 | That's it! Your built documentation is located in the ``build/html`` folder. -------------------------------------------------------------------------------- /sdk/docs/requirements.txt: -------------------------------------------------------------------------------- 1 | breathe 2 | sphinx 3 | sphinx-book-theme 4 | -------------------------------------------------------------------------------- /sdk/docs/sphinx/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /sdk/docs/sphinx/source/_static/custom.css: -------------------------------------------------------------------------------- 1 | pre { 2 | white-space: pre-wrap !important; 3 | } 4 | -------------------------------------------------------------------------------- /sdk/docs/sphinx/source/_static/favicons.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/sdk/docs/sphinx/source/_static/favicons.png -------------------------------------------------------------------------------- /sdk/docs/sphinx/source/_static/oneAPI-rgb-rev-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/sdk/docs/sphinx/source/_static/oneAPI-rgb-rev-100.png -------------------------------------------------------------------------------- /sdk/docs/sphinx/source/build.rst: -------------------------------------------------------------------------------- 1 | ======= 2 | Build 3 | ======= 4 | 5 | Build the pti library, tests, and samples: 6 | 7 | .. code-block:: bash 8 | 9 | source /setvars.sh 10 | cd sdk 11 | mkdir build 12 | cd build 13 | cmake -DCMAKE_BUILD_TYPE=Debug -DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/icpx_toolchain.cmake .. 14 | make -j 15 | 16 | -------------- 17 | Installation 18 | -------------- 19 | 20 | Install manually-built library: 21 | 22 | .. code-block:: bash 23 | 24 | mkdir build 25 | cd build 26 | cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/icpx_toolchain.cmake -DBUILD_TESTING=OFF .. 27 | make -j 28 | cmake --install . --config Release --prefix "../out" 29 | -------------------------------------------------------------------------------- /sdk/docs/sphinx/source/index.rst: -------------------------------------------------------------------------------- 1 | .. ptilib documentation master file, created by 2 | sphinx-quickstart on Mon Aug 3 11:39:30 2020. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to the PTI Library version |release| documentation! 7 | ============================================================= 8 | 9 | .. toctree:: 10 | :maxdepth: 4 11 | 12 | .. include:: toctree.rst 13 | 14 | Indices and tables 15 | ================== 16 | 17 | * :ref:`genindex` 18 | * :ref:`search` 19 | -------------------------------------------------------------------------------- /sdk/docs/sphinx/source/install.rst: -------------------------------------------------------------------------------- 1 | ============== 2 | Installation 3 | ============== 4 | 5 | .. code-block:: bash 6 | 7 | mkdir build 8 | cd build 9 | cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/icpx_toolchain.cmake -DBUILD_TESTING=OFF .. 10 | make -j 11 | cmake --install . --config Release --prefix "../out" 12 | -------------------------------------------------------------------------------- /sdk/docs/sphinx/source/intro.rst: -------------------------------------------------------------------------------- 1 | ##################### 2 | Introduction 3 | ##################### 4 | 5 | This is the PTI SDK library. 6 | 7 | PTI SDK will be a library for developing profiling tools for applications built on top of oneAPI and running on Intel GPUs. 8 | 9 | Today's SDK provides ``pti_view`` library with an API to trace various tasks of application running on Intel GPU. While the library implementation uses low-level tracing APIs of SYCL run-time and Level-Zero, its own API is high-level. 10 | 11 | PTI library is being built on the experience of PTI-GPU tools and samples and reuses that code with some modification. 12 | 13 | As for the project organization here - SDK folder is self-contained and independent from the rest of repository. 14 | 15 | One of the objectives is to extend functionality of the PTI library and with time to transform today's PTI-GPU project to an SDK. 16 | 17 | This project is in active development. We decided to open it at this early stage to benefit from feedback and criticism of interested parties and early adopters. 18 | 19 | -------------------------------------------------------------------------------- /sdk/docs/sphinx/source/knownissues.rst: -------------------------------------------------------------------------------- 1 | ============== 2 | Known Issues 3 | ============== 4 | 5 | Significant overhead is currently expected for workloads that frequently submit work to the GPU. Optimization efforts will continue both in the library as well as in the underlying layers of the software stack. 6 | 7 | For Local collection mode, Rolling drivers are currently required. LTS drivers from 2024 and earlier do not support it, and instead require Level Zero tracing to be globally enabled. 8 | 9 | Kernel name demangling is not supported on Windows. 10 | -------------------------------------------------------------------------------- /sdk/docs/sphinx/source/license.rst: -------------------------------------------------------------------------------- 1 | ========= 2 | License 3 | ========= 4 | 5 | MIT License 6 | 7 | Copyright (C) Intel Corporation 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy 10 | of this software and associated documentation files (the "Software"), to deal 11 | in the Software without restriction, including without limitation the rights 12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | copies of the Software, and to permit persons to whom the Software is 14 | furnished to do so, subject to the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be included in all 17 | copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | SOFTWARE. 26 | -------------------------------------------------------------------------------- /sdk/docs/sphinx/source/linking.rst: -------------------------------------------------------------------------------- 1 | ========= 2 | Linking 3 | ========= 4 | 5 | Use CMake ``find_package`` 6 | 7 | .. code-block:: cmake 8 | 9 | # set Pti_DIR if you install in a nonstandard location. 10 | set(Pti_DIR /lib/cmake/pti) 11 | find_package(Pti X.Y.Z) 12 | target_link_libraries(stuff PUBLIC Pti::pti_view) 13 | -------------------------------------------------------------------------------- /sdk/docs/sphinx/source/quickstart.rst: -------------------------------------------------------------------------------- 1 | ============= 2 | Quick Start 3 | ============= 4 | 5 | Test 6 | ------ 7 | Run the included test suite: 8 | 9 | .. code-block:: bash 10 | 11 | make test 12 | 13 | Usage 14 | ------- 15 | 16 | Use the ``samples/`` as a guide to developing with this library. 17 | 18 | Note: 19 | 20 | * Before ``ptiViewEnable()`` is called, please define callbacks and register them with ``ptiViewSetCallbacks()``. 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /sdk/docs/sphinx/source/samples.rst: -------------------------------------------------------------------------------- 1 | =========================== 2 | Code Samples and Examples 3 | =========================== 4 | 5 | From ``build`` directory: 6 | 7 | .. code-block:: bash 8 | 9 | ./samples/vector_sq_add/vec_sqadd 10 | ./samples/dpc_gemm/dpc_gemm 11 | ./samples/onemkl_gemm/onemkl_gemm 12 | -------------------------------------------------------------------------------- /sdk/docs/sphinx/source/toctree.rst: -------------------------------------------------------------------------------- 1 | .. toctree:: 2 | :caption: About 3 | :maxdepth: 2 4 | 5 | intro 6 | whatsnew 7 | systemreqs 8 | knownissues 9 | license 10 | 11 | .. toctree:: 12 | :caption: Get Started 13 | :maxdepth: 3 14 | 15 | build 16 | install 17 | linking 18 | quickstart 19 | samples 20 | 21 | .. toctree:: 22 | :caption: Developer Guide 23 | :maxdepth: 2 24 | 25 | devguide 26 | 27 | -------------------------------------------------------------------------------- /sdk/docs/sphinx/source/whatsnew.rst: -------------------------------------------------------------------------------- 1 | ============== 2 | What's New 3 | ============== 4 | 5 | Version 0.9.0 6 | --------------- 7 | 8 | * Added function call(s) providing the timestamp and allowing the user to provide 9 | their own timestamp via a callback. 10 | * Windows 11 support added. 11 | * Various bug fixes and improvements. 12 | 13 | Version 0.8.0 14 | --------------- 15 | 16 | * Added the ability to link against older, unsupported L0 loader and gracefully report unsupported. 17 | * Various bug fixes and improvements. 18 | 19 | Version 0.7.0 20 | --------------- 21 | 22 | Implements the new functionality of Local collection. It enables starting and stopping collection anytime-anywhere in an application when run on the system with installed Level-Zero runtime supporting `1.9.0 specification `_ and higher. 23 | 24 | Local collection functionality is transparent and controlled via ``ptiViewEnable`` and ``ptiViewDisable`` calls, where the first ``ptiViewEnable`` (or several of them) called at any place start the Local collection and the last ``ptiViewDisable`` (or several of them, paired with preceding ``ptiViewEnable`` calls) stop the Local collection. 25 | Outside of Local collection regions of interest, PTI SDK maintains zero overhead by not issuing any calls or collecting any data. 26 | 27 | On systems with Level-Zero version lower than 1.9.0 **PTI SDK** still operates as before its version 0.7.0: tracing runtime calls and causing the overhead outside of ``ptiViewEnable`` - ``ptiViewDisable`` regions, but reporting data only for ``ptiViewEnable`` - ``ptiViewDisable`` regions. 28 | -------------------------------------------------------------------------------- /sdk/env/oneapi-vars.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | REM 3 | REM Copyright (c) 2024 Intel Corporation 4 | REM 5 | REM Licensed under the Apache License, Version 2.0 (the "License"); 6 | REM you may not use this file except in compliance with the License. 7 | REM You may obtain a copy of the License at 8 | REM 9 | REM http://www.apache.org/licenses/LICENSE-2.0 10 | REM 11 | REM Unless required by applicable law or agreed to in writing, software 12 | REM distributed under the License is distributed on an "AS IS" BASIS, 13 | REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | REM See the License for the specific language governing permissions and 15 | REM limitations under the License. 16 | REM 17 | 18 | if not defined SETVARS_CALL ( 19 | echo: 20 | echo :: ERROR: This script must be executed by setvars.bat. 21 | echo: Try '[install-dir]\setvars.bat --help' for help. 22 | echo: 23 | exit /b 255 24 | ) 25 | 26 | if not defined ONEAPI_ROOT ( 27 | echo: 28 | echo :: ERROR: This script requires that the ONEAPI_ROOT env variable is set." 29 | echo: Try '[install-dir]\setvars.bat --help' for help. 30 | echo: 31 | exit /b 254 32 | ) 33 | 34 | set "PTI_ROOT=%ONEAPI_ROOT%" 35 | set "CMAKE_PREFIX_PATH=%PTI_ROOT%\lib\cmake\pti;%CMAKE_PREFIX_PATH%" 36 | set "CPATH=%PTI_ROOT%\include;%CPATH%" 37 | 38 | set "Pti_DIR=%PTI_ROOT%\lib\cmake\pti" 39 | 40 | exit /B 0 41 | -------------------------------------------------------------------------------- /sdk/env/oneapi-vars.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # shellcheck shell=sh 3 | 4 | ##===----------------------------------------------------------------------===## 5 | # 6 | # Copyright (C) 2024 Intel Corporation 7 | # 8 | # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 9 | # 10 | # This file incorporates work covered by the following copyright and permission 11 | # notice: 12 | # 13 | # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 14 | # See https://llvm.org/LICENSE.txt for license information. 15 | # 16 | ##===----------------------------------------------------------------------===## 17 | 18 | if [ -z "${SETVARS_CALL:-}" ] ; then 19 | >&2 echo " " 20 | >&2 echo ":: ERROR: This script must be sourced by oneapi-vars.sh." 21 | >&2 echo " Try 'source /oneapi-vars.sh --help' for help." 22 | >&2 echo " " 23 | return 255 24 | fi 25 | 26 | if [ -z "${ONEAPI_ROOT:-}" ] ; then 27 | >&2 echo " " 28 | >&2 echo ":: ERROR: This script requires that the ONEAPI_ROOT env variable is set." 29 | >&2 echo " Try 'source \oneapi-vars.sh --help' for help." 30 | >&2 echo " " 31 | return 254 32 | fi 33 | 34 | # ############################################################################ 35 | 36 | CMAKE_PREFIX_PATH=$(prepend_path "${component_root}/lib/cmake/pti" "${CMAKE_PREFIX_PATH:-}") ; export CMAKE_PREFIX_PATH 37 | 38 | C_INCLUDE_PATH=$(prepend_path "${component_root}/include" "${C_INCLUDE_PATH:-}") ; export C_INCLUDE_PATH 39 | CPLUS_INCLUDE_PATH=$(prepend_path "${component_root}/include" "${CPLUS_INCLUDE_PATH:-}") ; export CPLUS_INCLUDE_PATH 40 | 41 | Pti_DIR=${ONEAPI_ROOT}/lib/cmake/pti; export Pti_DIR 42 | 43 | 44 | -------------------------------------------------------------------------------- /sdk/fuzz/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(pti_view_fuzz test_pti_view.cc) 2 | 3 | target_link_libraries(pti_view_fuzz PUBLIC Pti::pti_view) 4 | 5 | target_compile_options(pti_view_fuzz PUBLIC -fsanitize=fuzzer,address) 6 | target_link_options(pti_view_fuzz PUBLIC -fsanitize=fuzzer,address) 7 | 8 | target_include_directories(pti_view_fuzz PRIVATE 9 | "${PROJECT_SOURCE_DIR}/src/utils") 10 | 11 | # Use CTest has a test runner 12 | add_test(NAME fuzz-pti-view-lib COMMAND pti_view_fuzz) 13 | 14 | # https://github.com/intel/compute-runtime/issues/376#issuecomment-786029828 15 | set_tests_properties( 16 | fuzz-pti-view-lib PROPERTIES LABELS "fuzz" ENVIRONMENT 17 | "NEOReadDebugKeys=1;DisableDeepBind=1") 18 | -------------------------------------------------------------------------------- /sdk/fuzz/README.md: -------------------------------------------------------------------------------- 1 | # PTI fuzz testing 2 | 3 | We are using [libFuzzer](https://llvm.org/docs/LibFuzzer.html) for fuzz 4 | testing. 5 | 6 | ## Build 7 | 8 | Build PTI for fuzzing targets. 9 | 10 | (Using CMake [presets](https://cmake.org/cmake/help/latest/manual/cmake-presets.7.html)) 11 | 12 | ```console 13 | >> source /setvars.sh 14 | >> cmake --preset fuzz 15 | >> cd build 16 | >> ninja -j $(nproc) 17 | ``` 18 | 19 | --or-- 20 | 21 | ```console 22 | >> source /setvars.sh 23 | >> mkdir build 24 | >> cd build 25 | >> cmake -DCMAKE_BUILD_TYPE=Debug \ 26 | -DCMAKE_TOOLCHAIN_FILE=../cmake/clang_toolchain.cmake \ 27 | -DPTI_FUZZ=1 .. 28 | >> make -j 29 | ``` 30 | 31 | ## Run 32 | 33 | ```console 34 | >> cd build 35 | >> ctest --verbose fuzz-pti-view-lib 36 | ``` 37 | -------------------------------------------------------------------------------- /sdk/fuzz/test_pti_view.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | /////////////////////////////////////////////////////////////////////////////// 9 | /// @ Filters out function from undefined behaviour sanitizer while fuzzing 10 | #if defined(__clang__) || defined(__GNUC__) 11 | #define ATTRIBUTE_NO_SANITIZE_UNDEFINED __attribute__((no_sanitize("undefined"))) 12 | #else 13 | #define ATTRIBUTE_NO_SANITIZE_UNDEFINED 14 | #endif 15 | 16 | namespace { 17 | template 18 | ATTRIBUTE_NO_SANITIZE_UNDEFINED inline T ConvertByteArray(const unsigned char* byte_array) { 19 | static_assert(std::is_trivially_copyable::value, 20 | "Must convert from byte array to trivially copyable type or " 21 | "risk undefined behavior."); 22 | 23 | T converted_value; 24 | std::memcpy(&converted_value, &byte_array, sizeof(T)); 25 | return converted_value; 26 | } 27 | } // namespace 28 | 29 | extern "C" { 30 | ATTRIBUTE_NO_SANITIZE_UNDEFINED 31 | int LLVMFuzzerTestOneInput(unsigned char* data, size_t size) { 32 | if (size >= sizeof(pti_view_kind)) { 33 | auto view_type = ConvertByteArray(data); 34 | ptiViewEnable(view_type); 35 | } 36 | 37 | pti_view_record_base* record = nullptr; 38 | ptiViewGetNextRecord(data, size, &record); 39 | 40 | if (size >= sizeof(pti_view_kind)) { 41 | auto view_type = ConvertByteArray(data); 42 | ptiViewDisable(view_type); 43 | } 44 | return 0; 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /sdk/fuzz/ubsan-ignore.txt: -------------------------------------------------------------------------------- 1 | # Library functions that exculded from undefined behaviour sanitizer analysis 2 | # very few high level ones could here 3 | fun:ptiViewEnable 4 | fun:ptiViewDisable 5 | -------------------------------------------------------------------------------- /sdk/include/pti/pti_version.h.in: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | #ifndef INCLUDE_PTI_VERSION_H_ 7 | #define INCLUDE_PTI_VERSION_H_ 8 | 9 | #include 10 | 11 | #include "pti/pti_export.h" 12 | 13 | /* clang-format off */ 14 | #if defined(__cplusplus) 15 | extern "C" { 16 | #endif 17 | 18 | #if !defined(PTI_VERSION) 19 | #define PTI_VERSION @PTI_VERSION@ 20 | #endif 21 | 22 | #define PTI_VERSION_STRING "@PTI_VERSION@" 23 | #define PTI_VERSION_MAJOR @PROJECT_VERSION_MAJOR@ 24 | #define PTI_VERSION_MINOR @PROJECT_VERSION_MINOR@ 25 | #define PTI_VERSION_PATCH @PROJECT_VERSION_PATCH@ 26 | 27 | typedef struct pti_version { 28 | uint32_t _major; 29 | uint32_t _minor; 30 | uint32_t _patch; 31 | } pti_version; 32 | 33 | /** 34 | * @brief Returns the compiled version of Intel(R) PTI 35 | * 36 | * @return c-string with compiled version of Intel(R) PTI 37 | */ 38 | PTI_EXPORT const char* ptiVersionString(); 39 | 40 | /** 41 | * @brief Returns the compiled version of Intel(R) PTI 42 | * 43 | * @return pti_version struct with compiled version of Intel(R) PTI 44 | */ 45 | pti_version PTI_EXPORT ptiVersion(); 46 | 47 | #if defined(__cplusplus) 48 | } 49 | #endif 50 | 51 | #endif // INCLUDE_PTI_VERSION_H_ 52 | -------------------------------------------------------------------------------- /sdk/samples/dlworkloads/CMakeLists_bkup.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.5) 2 | project(dlworkload_demo_with_syclgraph_capture) 3 | 4 | option(USE_HOST_MEMORY "Use USE_HOST_MEMORY" OFF) 5 | if(USE_HOST_MEMORY) 6 | add_definitions(-DUSE_HOST_MEMORY) 7 | endif() 8 | 9 | set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0") 10 | 11 | add_definitions(-Wall) 12 | set (CMAKE_CXX_COMPILER "dpcpp") 13 | FILE(GLOB SOURCES "*.cpp") 14 | 15 | add_executable(dlworkload ${SOURCES}) 16 | target_include_directories(dlworkload PUBLIC "${CMAKE_INCLUDE_PATH}") 17 | 18 | target_link_libraries(dlworkload -ldnnl -lmkl_sycl -lmkl_intel_ilp64 -lmkl_core -lmkl_tbb_thread) 19 | -------------------------------------------------------------------------------- /sdk/samples/dlworkloads/README.md: -------------------------------------------------------------------------------- 1 | # personal.guoyejun.syclgraph_capture_dlworkloads -------------------------------------------------------------------------------- /sdk/samples/dlworkloads/device_memory.cpp: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | #include "device_memory.h" 7 | 8 | void DeviceMemoryManager::deinit() 9 | { 10 | for(auto& info : memInfos) { 11 | sycl::free(info.data, *q); 12 | } 13 | } 14 | 15 | float * DeviceMemoryManager::alloc(size_t count) 16 | { 17 | if (count == 0) { 18 | return nullptr; 19 | } 20 | 21 | if (q == nullptr) { 22 | return nullptr; 23 | } 24 | 25 | for(auto& info : memInfos) { 26 | if (info.count >= count && !info.used) { 27 | info.used = true; 28 | #ifdef USE_HOST_MEMORY 29 | memset(info.data, 0xAB, info.count*sizeof(float)); 30 | #endif 31 | return info.data; 32 | } 33 | } 34 | 35 | #ifdef USE_HOST_MEMORY 36 | float *p = sycl::malloc_host(count, *q); 37 | memset(p, 0xCD, count*sizeof(float)); 38 | #else 39 | float *p = sycl::malloc_device(count, *q); 40 | #endif 41 | DeviceMemoryInfo info; 42 | info.data = p; 43 | info.count = count; 44 | info.used = true; 45 | memInfos.push_back(info); 46 | 47 | return p; 48 | } 49 | 50 | void DeviceMemoryManager::free(float *data) 51 | { 52 | for(auto& info : memInfos) { 53 | if (info.data == data) { 54 | info.used = false; 55 | return; 56 | } 57 | } 58 | 59 | assert(!"should not reach here"); 60 | } 61 | -------------------------------------------------------------------------------- /sdk/samples/dlworkloads/device_memory.h: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | #ifndef DEVICE_MEMORY_H_ 7 | #define DEVICE_MEMORY_H_ 8 | 9 | #include 10 | #include 11 | 12 | // In IPEX/ITEX, device memory are allocated and reused, and released at last. 13 | // here is a very simple mock for this behavior. 14 | 15 | struct DeviceMemoryInfo { 16 | float *data = nullptr; // float is enough for the demo 17 | size_t count = 0; 18 | bool used = 0; 19 | }; 20 | 21 | class DeviceMemoryManager { 22 | public: 23 | DeviceMemoryManager() {} 24 | void init(sycl::queue *q) { this->q = q;} 25 | void deinit(); 26 | float * alloc(size_t count); 27 | void free(float *data); 28 | private: 29 | std::vector memInfos = {}; 30 | sycl::queue *q = nullptr; 31 | }; 32 | 33 | inline auto& GlobalDeviceMemoryManager() { 34 | static DeviceMemoryManager g_devMemMgr = {}; 35 | return g_devMemMgr; 36 | } 37 | 38 | //extern DeviceMemoryManager g_devMemMgr; 39 | 40 | #endif 41 | -------------------------------------------------------------------------------- /sdk/samples/dlworkloads/model_mixedprogramming.cpp: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | #include "model_mixedprogramming.h" 7 | #include "operation_syclkernel.h" 8 | #include "operation_onednn.h" 9 | #include "operation_onemkl.h" 10 | #include "operation_onedpl.h" 11 | 12 | TinyTensor run_model_mixedprogramming(TinyTensor inp, sycl::queue *q) 13 | { 14 | // the first operation is written with sycl kernel for scale down 15 | TinyTensor outp = run_syclkernel_operation_scaledown(inp, q); 16 | GlobalDeviceMemoryManager().free(inp.data); 17 | 18 | inp = outp; 19 | outp = run_onednn_operation_conv2d(inp, q); 20 | GlobalDeviceMemoryManager().free(inp.data); 21 | 22 | // next operation uses oneMKL 23 | inp = outp; 24 | outp = run_onemkl_operation_fft(inp, q); 25 | GlobalDeviceMemoryManager().free(inp.data); 26 | 27 | // next operation uses oneDPL 28 | inp = outp; 29 | outp = run_onedpl_operation_cos(inp, q); 30 | GlobalDeviceMemoryManager().free(inp.data); 31 | 32 | return outp; 33 | } 34 | -------------------------------------------------------------------------------- /sdk/samples/dlworkloads/model_mixedprogramming.h: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | #ifndef MODEL_MIXEDPROGRAMMING_H_ 7 | #define MODEL_MIXEDPROGRAMMING_H_ 8 | 9 | #include 10 | #include "tiny_tensor.h" 11 | 12 | TinyTensor run_model_mixedprogramming(TinyTensor inp, sycl::queue *q); 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /sdk/samples/dlworkloads/operation_onednn.h: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | #ifndef OPERATION_ONEDNN_H_ 7 | #define OPERATION_ONEDNN_H_ 8 | 9 | #include 10 | #include "tiny_tensor.h" 11 | 12 | void onednn_prepare_weights(int oc, int ic, int ks, sycl::queue *q); 13 | TinyTensor run_onednn_operation_conv2d(const TinyTensor& inp, sycl::queue *q); 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /sdk/samples/dlworkloads/operation_onedpl.cpp: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | #include "operation_onedpl.h" 7 | #include 8 | 9 | // onedpl is only used by IPEX as code piece in kernel, while ITEX uses eigen 10 | TinyTensor run_onedpl_operation_cos(const TinyTensor& inp, sycl::queue *q) 11 | { 12 | TinyTensor outp(inp.N, inp.C, inp.H, inp.W); 13 | 14 | float *src = inp.data; 15 | float *dst = outp.data; 16 | 17 | q->submit([&](sycl::handler &h) { 18 | h.parallel_for(outp.count(), [=](sycl::item<1> item) { 19 | int idx = item.get_id(0); 20 | dst[idx] = oneapi::dpl::cos(src[idx]); 21 | }); 22 | }); 23 | 24 | return outp; 25 | } 26 | -------------------------------------------------------------------------------- /sdk/samples/dlworkloads/operation_onedpl.h: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | #ifndef OPERATION_ONEDPL_H_ 7 | #define OPERATION_ONEDPL_H_ 8 | 9 | #include 10 | #include "tiny_tensor.h" 11 | 12 | TinyTensor run_onedpl_operation_cos(const TinyTensor& inp, sycl::queue *q); 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /sdk/samples/dlworkloads/operation_onemkl.h: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | #ifndef OPERATION_ONEMKL_H_ 7 | #define OPERATION_ONEMKL_H_ 8 | 9 | #include 10 | #include "tiny_tensor.h" 11 | 12 | TinyTensor run_onemkl_operation_fft(const TinyTensor& inp, sycl::queue *q); 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /sdk/samples/dlworkloads/operation_syclkernel.cpp: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | #include 7 | #include "operation_syclkernel.h" 8 | 9 | TinyTensor run_syclkernel_operation_scaledown(const TinyTensor& inp, sycl::queue *q) 10 | { 11 | TinyTensor outp(inp.N, inp.C, inp.H / 2, inp.W / 2); 12 | 13 | float *src = inp.data; 14 | float *dst = outp.data; 15 | 16 | q->submit([&](sycl::handler &h) { 17 | h.parallel_for(outp.count(), [=](sycl::item<1> item) { 18 | int idx = item.get_id(0); 19 | dst[idx] = src[idx*4]; 20 | }); 21 | }); 22 | 23 | return outp; 24 | } 25 | -------------------------------------------------------------------------------- /sdk/samples/dlworkloads/operation_syclkernel.h: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | #ifndef OPERATION_SYCLKERNEL_H_ 7 | #define OPERATION_SYCLKERNEL_H_ 8 | 9 | #include 10 | #include "tiny_tensor.h" 11 | 12 | TinyTensor run_syclkernel_operation_scaledown(const TinyTensor& inp, sycl::queue *q); 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /sdk/samples/dlworkloads/queue.h: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | #ifndef QUEUE_H_ 7 | #define QUEUE_H_ 8 | 9 | #include 10 | #include 11 | 12 | std::unique_ptr CreateQueue(); 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /sdk/samples/dlworkloads/tiny_tensor.h: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | #ifndef TINY_TENSOR_H_ 7 | #define TINY_TENSOR_H_ 8 | 9 | #include 10 | #include 11 | #include "device_memory.h" 12 | 13 | class TinyTensor { 14 | public: 15 | TinyTensor(int n, int c, int h, int w) { 16 | this->N = n; 17 | this->C = c; 18 | this->H = h; 19 | this->W = w; 20 | this->data = GlobalDeviceMemoryManager().alloc(count()); 21 | } 22 | size_t count() const { 23 | return N*C*H*W; 24 | } 25 | void print(bool showdata) const { 26 | std::cout << "NCHW: (" << N << ", " << C << ", " << H << ", " << W << ")" << std::endl; 27 | if (!showdata) { 28 | return; 29 | } 30 | #ifdef USE_HOST_MEMORY 31 | for (int ni = 0; ni < N; ++ni) { 32 | for (int ci = 0; ci < C; ++ci) { 33 | std::cout << "n: " << ni << ", c: " << ci << std::endl; 34 | for (int hi = 0; hi < H; ++hi) { 35 | for (int wi = 0; wi < W; ++wi) { 36 | std::cout << data[wi + hi * W + ci * H * W + ni * C * H * W] << ", "; 37 | } 38 | std::cout << std::endl << std::endl; 39 | } 40 | } 41 | } 42 | #endif 43 | } 44 | // NCHW for the demo 45 | int N; 46 | int C; 47 | int H; 48 | int W; 49 | float *data; 50 | }; 51 | 52 | #endif 53 | -------------------------------------------------------------------------------- /sdk/samples/dlworkloads/utils.h: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | #ifndef UTILS_H_ 7 | #define UTILS_H_ 8 | 9 | #include 10 | 11 | class RandomFloatGen { 12 | public: 13 | inline static constexpr float kUpperBound = 10.0; 14 | inline static constexpr float kLowerBound = -1 * kUpperBound; 15 | 16 | RandomFloatGen() : mt_engine_(rand_num_dev_()) { 17 | } 18 | 19 | inline float Get() { 20 | return distribution_(mt_engine_); 21 | } 22 | 23 | private: 24 | std::random_device rand_num_dev_{}; // defaults to /dev/urandom libstdc++ 25 | std::uniform_real_distribution distribution_{kLowerBound, kUpperBound}; 26 | std::mt19937 mt_engine_; 27 | }; 28 | 29 | // Don't want to change the sample too much, so just use std C++ random number 30 | // generator because C's rand() is flagged by static code analysis. 31 | // (https://en.cppreference.com/w/cpp/numeric/random) 32 | inline auto& RandomFloatGenInstance() { 33 | static RandomFloatGen rand_float_gen = {}; 34 | return rand_float_gen; 35 | } 36 | 37 | // generate rand float [-10.0, 10.0] 38 | inline auto random_float() { 39 | return RandomFloatGenInstance().Get(); 40 | } 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /sdk/samples/dpc_gemm/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | 3 | project(PTI_Samples_DPC_GEMM CXX) 4 | 5 | if(NOT CMAKE_BUILD_TYPE) 6 | set(CMAKE_BUILD_TYPE "Release") 7 | endif() 8 | 9 | # TODO: find_package(IntelSYCL)? 10 | include(CheckCXXCompilerFlag) 11 | check_cxx_compiler_flag("-fsycl" has_sycl) 12 | 13 | if (NOT has_sycl) 14 | message(WARNING "${PROJECT_NAME} requires a sycl compatible compiler") 15 | return() 16 | endif() 17 | 18 | add_executable(dpc_gemm main.cc) 19 | 20 | target_link_options(dpc_gemm PUBLIC -fsycl -gline-tables-only) 21 | target_compile_options(dpc_gemm PUBLIC -fsycl -gline-tables-only) 22 | 23 | target_include_directories(dpc_gemm PRIVATE "${PROJECT_SOURCE_DIR}/../samples_utilities") 24 | 25 | # Allow building in-source and out-of-source 26 | if (NOT TARGET Pti::pti_view) 27 | find_package(Pti REQUIRED) 28 | endif() 29 | 30 | target_link_libraries(dpc_gemm PUBLIC Pti::pti_view) 31 | -------------------------------------------------------------------------------- /sdk/samples/dpc_gemm_threaded/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | 3 | project(PTI_Samples_DPC_GEMM_THREADED CXX) 4 | 5 | if(NOT CMAKE_BUILD_TYPE) 6 | set(CMAKE_BUILD_TYPE "Release") 7 | endif() 8 | 9 | include(CheckCXXCompilerFlag) 10 | check_cxx_compiler_flag("-fsycl" has_sycl) 11 | 12 | if(NOT has_sycl) 13 | message(WARNING "${PROJECT_NAME} requres a sycl compatible compiler") 14 | return() 15 | endif() 16 | 17 | add_executable(dpc_gemm_threaded main.cc) 18 | 19 | target_compile_options(dpc_gemm_threaded PRIVATE -fsycl -gline-tables-only) 20 | target_link_options(dpc_gemm_threaded PRIVATE -fsycl -gline-tables-only) 21 | target_include_directories(dpc_gemm_threaded PRIVATE 22 | "${PROJECT_SOURCE_DIR}/../samples_utilities") 23 | 24 | if (NOT TARGET Threads::Threads) 25 | find_package(Threads REQUIRED) 26 | endif() 27 | 28 | if (NOT TARGET Pti::pti_view) 29 | find_package(Pti REQUIRED) 30 | endif() 31 | 32 | target_link_libraries(dpc_gemm_threaded PUBLIC Threads::Threads Pti::pti_view) 33 | -------------------------------------------------------------------------------- /sdk/samples/iso3dfd_dpcpp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} 2 | "${CMAKE_CURRENT_SOURCE_DIR}/../../cmake/Modules/") 3 | include(macros) 4 | 5 | cmake_minimum_required(VERSION 3.14) 6 | 7 | project (ISO3DFD) 8 | 9 | if (NOT CMAKE_BUILD_TYPE) 10 | message (STATUS "Default CMAKE_BUILD_TYPE not set using Release with Debug Info") 11 | set (CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE 12 | STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel" 13 | FORCE) 14 | endif() 15 | 16 | include(CheckCXXCompilerFlag) 17 | check_cxx_compiler_flag("-fsycl" has_sycl) 18 | 19 | if(NOT has_sycl) 20 | message(WARNING "${PROJECT_NAME} requires a sycl compatible compiler.") 21 | return() 22 | endif() 23 | 24 | if (NOT TARGET DevUtilities::utils) 25 | find_package(DevUtilities) 26 | endif() 27 | 28 | if (NOT TARGET DevUtilities::utils) 29 | message(WARNING "${PROJECT_NAME} requires dev-utilities installed.") 30 | return() 31 | endif() 32 | 33 | if (NOT TARGET Pti::pti_view) 34 | find_package(Pti REQUIRED) 35 | endif() 36 | 37 | add_subdirectory (src) 38 | -------------------------------------------------------------------------------- /sdk/samples/iso3dfd_dpcpp/License.txt: -------------------------------------------------------------------------------- 1 | Copyright Intel Corporation 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /sdk/samples/iso3dfd_dpcpp/sample.json: -------------------------------------------------------------------------------- 1 | { 2 | "guid": "3A7DA713-6083-4CA3-B66E-A3DF21744EB4", 3 | "name": "ISO3DFD", 4 | "categories": ["Toolkit/oneAPI Direct Programming/C++SYCL/Structured Grids"], 5 | "description": "The ISO3DFD Sample illustrates SYCL using Finite Difference Stencil Kernel for solving 3D Acoustic Isotropic Wave Equation", 6 | "toolchain": [ "dpcpp" ], 7 | "targetDevice": [ "CPU", "GPU" ], 8 | "languages": [ { "cpp": {} } ], 9 | "os": [ "linux", "windows" ], 10 | "builder": [ "ide", "cmake" ], 11 | "targetDevice": [ "CPU" ], 12 | "ciTests": { 13 | "linux": [{ 14 | "steps": [ 15 | "mkdir build", 16 | "cd build", 17 | "cmake ..", 18 | "make", 19 | "make run" 20 | ] 21 | }], 22 | "windows": [{ 23 | "steps": [ 24 | "MSBuild iso3dfd.sln /t:Rebuild /p:Configuration=\"Release\"", 25 | "cd x64/Release", 26 | "iso3dfd.exe 256 256 256 32 8 64 10 gpu" 27 | ] 28 | }] 29 | 30 | }, 31 | "expertise": "Concepts and Functionality" 32 | } 33 | -------------------------------------------------------------------------------- /sdk/samples/iso3dfd_dpcpp/src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | option(SHARED_KERNEL "Use SLM Kernel Version - Only for GPU" OFF) 2 | 3 | # Set default build type to RelWithDebInfo if not specified 4 | if(NOT CMAKE_BUILD_TYPE) 5 | message( 6 | STATUS "Default CMAKE_BUILD_TYPE not set using Release with Debug Info") 7 | set(CMAKE_BUILD_TYPE 8 | "RelWithDebInfo" 9 | CACHE 10 | STRING 11 | "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel" 12 | FORCE) 13 | endif() 14 | 15 | add_executable(iso3dfd iso3dfd.cpp iso3dfd_kernels.cpp utils.cpp) 16 | target_compile_options(iso3dfd PUBLIC -O3 -fsycl) 17 | target_link_options(iso3dfd PUBLIC -O3 -fsycl) 18 | target_include_directories( 19 | iso3dfd PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../samples_utilities 20 | ${CMAKE_CURRENT_SOURCE_DIR}/../include) 21 | 22 | target_link_libraries(iso3dfd PUBLIC Pti::pti_view DevUtilities::utils) 23 | 24 | if(SHARED_KERNEL) 25 | target_compile_definitions(iso3dfd PUBLIC USED_SHARED) 26 | endif() 27 | 28 | if(WIN32) 29 | add_custom_target(runiso iso3dfd.exe 256 256 256 32 8 64 10 gpu) 30 | add_custom_target(runiso_cpu iso3dfd.exe 256 256 256 256 1 1 10 cpu) 31 | else() 32 | add_custom_target(runiso iso3dfd 256 256 256 32 8 64 10 gpu) 33 | add_custom_target(runiso_cpu iso3dfd 256 256 256 256 1 1 10 cpu) 34 | endif() 35 | -------------------------------------------------------------------------------- /sdk/samples/metrics_iso3dfd_dpcpp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} 2 | "${CMAKE_CURRENT_SOURCE_DIR}/../../cmake/Modules/") 3 | include(macros) 4 | 5 | cmake_minimum_required(VERSION 3.14) 6 | 7 | project (METRICSISO3DFD) 8 | 9 | if (NOT CMAKE_BUILD_TYPE) 10 | message (STATUS "Default CMAKE_BUILD_TYPE not set using Release with Debug Info") 11 | set (CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE 12 | STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel" 13 | FORCE) 14 | endif() 15 | 16 | include(CheckCXXCompilerFlag) 17 | check_cxx_compiler_flag("-fsycl" has_sycl) 18 | 19 | if(NOT has_sycl) 20 | message(WARNING "${PROJECT_NAME} requires a sycl compatible compiler.") 21 | return() 22 | endif() 23 | 24 | if (NOT TARGET DevUtilities::utils) 25 | find_package(DevUtilities) 26 | endif() 27 | 28 | if (NOT TARGET DevUtilities::utils) 29 | message(WARNING "${PROJECT_NAME} requires dev-utilities installed.") 30 | return() 31 | endif() 32 | 33 | if (NOT TARGET Pti::pti_view) 34 | find_package(Pti REQUIRED) 35 | endif() 36 | 37 | if (NOT TARGET Pti::pti_metrics) 38 | find_package(Pti REQUIRED) 39 | endif() 40 | 41 | if (NOT TARGET LevelZero::level-zero) 42 | find_package(LevelZero REQUIRED) 43 | endif() 44 | 45 | add_subdirectory (src) 46 | -------------------------------------------------------------------------------- /sdk/samples/metrics_iso3dfd_dpcpp/License.txt: -------------------------------------------------------------------------------- 1 | Copyright Intel Corporation 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /sdk/samples/metrics_iso3dfd_dpcpp/src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | option(SHARED_KERNEL "Use SLM Kernel Version - Only for GPU" OFF) 2 | 3 | # Set default build type to RelWithDebInfo if not specified 4 | if(NOT CMAKE_BUILD_TYPE) 5 | message( 6 | STATUS "Default CMAKE_BUILD_TYPE not set using Release with Debug Info") 7 | set(CMAKE_BUILD_TYPE 8 | "RelWithDebInfo" 9 | CACHE 10 | STRING 11 | "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel" 12 | FORCE) 13 | endif() 14 | 15 | add_executable(metrics_iso3dfd iso3dfd.cpp iso3dfd_kernels.cpp utils.cpp) 16 | target_compile_options(metrics_iso3dfd PUBLIC -O3 -fsycl) 17 | target_link_options(metrics_iso3dfd PUBLIC -O3 -fsycl) 18 | target_include_directories( 19 | metrics_iso3dfd PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../samples_utilities 20 | ${PROJECT_SOURCE_DIR}/../../src/utils 21 | ${CMAKE_CURRENT_SOURCE_DIR}/../include) 22 | target_link_libraries(metrics_iso3dfd PUBLIC spdlog::spdlog Pti::pti_view Pti::pti_metrics LevelZero::level-zero DevUtilities::utils) 23 | 24 | if(SHARED_KERNEL) 25 | target_compile_definitions(metrics_iso3dfd PUBLIC USED_SHARED) 26 | endif() 27 | 28 | if(WIN32) 29 | add_custom_target(runmetricsiso metrics_iso3dfd 256 256 256 32 8 64 10 gpu) 30 | add_custom_target(runmetricsiso_cpu metrics_iso3dfd 256 256 256 256 1 1 10 cpu) 31 | else() 32 | add_custom_target(runmetricsiso metrics_iso3dfd 256 256 256 32 8 64 10 gpu) 33 | add_custom_target(runmetricsiso_cpu metrics_iso3dfd 256 256 256 256 1 1 10 cpu) 34 | endif() 35 | -------------------------------------------------------------------------------- /sdk/samples/onemkl_gemm/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | 3 | project(PTI_Samples_ONEMKL_GEMM CXX) 4 | 5 | if(NOT CMAKE_BUILD_TYPE) 6 | set(CMAKE_BUILD_TYPE "Release") 7 | endif() 8 | 9 | include(CheckCXXCompilerFlag) 10 | check_cxx_compiler_flag("-fsycl" has_sycl) 11 | 12 | if(NOT has_sycl) 13 | message(WARNING "${PROJECT_NAME} requres a sycl compatible compiler") 14 | return() 15 | endif() 16 | 17 | if (NOT TARGET MKL::MKL) 18 | find_package(MKL CONFIG PATHS $ENV{MKLROOT} NO_DEFAULT_PATH) 19 | message(STATUS "Imported oneMKL targets: ${MKL_IMPORTED_TARGETS}") 20 | endif() 21 | 22 | if (NOT TARGET MKL::MKL) 23 | message(WARNING "${PROJECT_NAME} requires oneMKL installed") 24 | return() 25 | endif() 26 | 27 | add_executable(onemkl_gemm_exe onemkl_gemm.cc) 28 | 29 | if (NOT TARGET Pti::pti_view) 30 | find_package(Pti REQUIRED) 31 | endif() 32 | 33 | target_link_libraries(onemkl_gemm_exe PUBLIC MKL::MKL MKL::MKL_SYCL Pti::pti_view) 34 | target_include_directories(onemkl_gemm_exe 35 | PRIVATE "${PROJECT_SOURCE_DIR}/../../src/utils" 36 | "${PROJECT_SOURCE_DIR}/../samples_utilities") 37 | -------------------------------------------------------------------------------- /sdk/samples/vector_sq_add/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | 3 | project(PTI_Samples_KERNEL_V CXX) 4 | 5 | if(NOT CMAKE_BUILD_TYPE) 6 | set(CMAKE_BUILD_TYPE "Release") 7 | endif() 8 | 9 | include(CheckCXXCompilerFlag) 10 | check_cxx_compiler_flag("-fsycl" has_sycl) 11 | 12 | if(NOT has_sycl) 13 | message(WARNING "${PROJECT_NAME} requres a sycl compatible compiler") 14 | return() 15 | endif() 16 | 17 | add_executable(vec_sqadd vector_sq_add.cc) 18 | 19 | target_link_options(vec_sqadd PUBLIC -fsycl -gline-tables-only) 20 | target_compile_options(vec_sqadd PUBLIC -fsycl -gline-tables-only) 21 | target_include_directories(vec_sqadd 22 | PRIVATE "${PROJECT_SOURCE_DIR}/../samples_utilities") 23 | 24 | if (NOT TARGET Pti::pti_view) 25 | find_package(Pti REQUIRED) 26 | endif() 27 | 28 | target_link_libraries(vec_sqadd PUBLIC Pti::pti_view) 29 | 30 | -------------------------------------------------------------------------------- /sdk/src/default_buffer_callbacks.h: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | #ifndef SRC_DEFAULT_BUFFER_CALLBACKS_H_ 7 | #define SRC_DEFAULT_BUFFER_CALLBACKS_H_ 8 | 9 | #include 10 | #include 11 | 12 | #include "view_record_info.h" 13 | 14 | namespace pti { 15 | namespace view { 16 | namespace defaults { 17 | constexpr std::size_t kBufferAlignment = 8; 18 | constexpr std::size_t kDefaultSizeOfBuffer = 1'000 * SizeOfLargestViewRecord(); 19 | 20 | void DefaultBufferAllocation(unsigned char** buf, std::size_t* buf_size) { 21 | *buf_size = kDefaultSizeOfBuffer; 22 | auto* ptr = ::operator new(*buf_size); 23 | ptr = std::align(kBufferAlignment, sizeof(unsigned char), ptr, *buf_size); 24 | *buf = static_cast(ptr); 25 | if (!*buf) { 26 | std::cerr << "Unable to allocate memory for default buffer" << '\n'; 27 | std::abort(); 28 | } 29 | } 30 | 31 | void DefaultRecordParser(unsigned char* const buf, [[maybe_unused]] std::size_t buf_size, 32 | std::size_t valid_buf_size) { 33 | if (valid_buf_size) { 34 | ::operator delete(buf); 35 | } 36 | } 37 | } // namespace defaults 38 | } // namespace view 39 | } // namespace pti 40 | #endif // SRC_DEFAULT_BUFFER_CALLBACKS_H 41 | -------------------------------------------------------------------------------- /sdk/src/levelzero/collector_options.h: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | 7 | #ifndef SDK_SRC_COLLECTOR_OPTIONS_ 8 | #define SDK_SRC_COLLECTOR_OPTIONS_ 9 | 10 | #include 11 | 12 | // Structure holds flags that convey to collector that the associated viewKind is enabled by user. 13 | // Enabled flags trigger callbacks to view_handler to issue buffer record. 14 | struct ViewsBufferable { 15 | std::atomic synch_enabled = false; // is synchronization viewkind enabled by user. 16 | std::atomic api_calls_enabled = false; // are driver api calls enabled by user. 17 | 18 | ViewsBufferable() : synch_enabled(false), api_calls_enabled(false) {} 19 | 20 | // to copy atomics 21 | ViewsBufferable(ViewsBufferable& other) 22 | : synch_enabled(other.synch_enabled.load()), 23 | api_calls_enabled(other.api_calls_enabled.load()) {} 24 | 25 | ViewsBufferable& operator=(const ViewsBufferable&) = delete; // Not used. 26 | ViewsBufferable(ViewsBufferable&&) = delete; 27 | ViewsBufferable& operator=(const ViewsBufferable&&) = delete; 28 | virtual ~ViewsBufferable() = default; 29 | }; 30 | 31 | struct CollectorOptions { 32 | // collector present in the process but does nothing, ready to start 33 | bool disabled_mode = true; 34 | // collector shifts to this mode only when introspection apis available and only when user forces 35 | // to be so. 36 | bool hybrid_mode = false; 37 | 38 | bool kernel_tracing = false; 39 | bool api_tracing = false; 40 | 41 | bool demangle = true; 42 | 43 | ViewsBufferable lz_enabled_views; 44 | }; 45 | 46 | #endif // SDK_SRC_COLLECTOR_OPTIONS_ 47 | -------------------------------------------------------------------------------- /sdk/src/levelzero/ze_driver_init.h: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | 7 | #ifndef LEVEL_ZERO_ZE_DRIVER_INIT_H_ 8 | #define LEVEL_ZERO_ZE_DRIVER_INIT_H_ 9 | 10 | #include 11 | 12 | #include 13 | 14 | class ZeDriverInit { 15 | public: 16 | ZeDriverInit(); 17 | 18 | bool Success() const; 19 | 20 | const std::vector& Drivers() const; 21 | 22 | std::vector& Drivers(); 23 | 24 | private: 25 | bool InitDrivers(); 26 | void CollectLegacyDrivers(); 27 | void InitSysmanDrivers(); 28 | 29 | bool init_success_ = false; 30 | std::vector drivers_; 31 | }; 32 | 33 | #endif // LEVEL_ZERO_ZE_DRIVER_INIT_H_ 34 | -------------------------------------------------------------------------------- /sdk/src/levelzero/ze_timer_helper.h: -------------------------------------------------------------------------------- 1 | // 2 | //============================================================== 3 | // Copyright (C) Intel Corporation 4 | // 5 | // SPDX-License-Identifier: MIT 6 | // ============================================================= 7 | 8 | #ifndef PTI_TOOLS_ZE_TIMER_HELPER_H_ 9 | #define PTI_TOOLS_ZE_TIMER_HELPER_H_ 10 | 11 | #include 12 | 13 | #include "pti_assert.h" 14 | 15 | struct CPUGPUTimeInterpolationHelper { 16 | constexpr static uint64_t kSyncDeltaDefault = 10'000; // 10 us 17 | ze_device_handle_t device_; 18 | uint32_t gpu_freq_; 19 | uint64_t gpu_timer_mask_; 20 | uint64_t cpu_timestamp_; 21 | uint64_t gpu_timestamp_; 22 | uint64_t delta_ = kSyncDeltaDefault; 23 | uint64_t coeff_; 24 | CPUGPUTimeInterpolationHelper(ze_device_handle_t device, uint32_t gpu_freq, 25 | uint64_t gpu_timer_mask, uint64_t sync_delta) 26 | : device_(device), 27 | gpu_freq_(gpu_freq), 28 | gpu_timer_mask_(gpu_timer_mask), 29 | cpu_timestamp_(0), 30 | gpu_timestamp_(0) { 31 | PTI_ASSERT(device_ != nullptr); 32 | PTI_ASSERT(gpu_freq != 0ULL); 33 | PTI_ASSERT(gpu_timer_mask != 0ULL); 34 | if (sync_delta != 0ULL) { 35 | delta_ = sync_delta; 36 | } 37 | coeff_ = 1'000'000'000 / gpu_freq_; 38 | } 39 | }; 40 | 41 | #endif // PTI_TOOLS_ZE_TIMER_HELPER_H_ 42 | -------------------------------------------------------------------------------- /sdk/src/pti_version.cc: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | #include "pti/pti_version.h" 7 | 8 | constexpr auto kPtiVersion = pti_version{PTI_VERSION_MAJOR, PTI_VERSION_MINOR, PTI_VERSION_PATCH}; 9 | constexpr const char* const kPtiVersionString = PTI_VERSION_STRING; 10 | 11 | const char* ptiVersionString() { return kPtiVersionString; } 12 | 13 | pti_version ptiVersion() { return kPtiVersion; } 14 | -------------------------------------------------------------------------------- /sdk/src/utils/demangle.h: -------------------------------------------------------------------------------- 1 | // 2 | //-- copied here from root of this project -- directory to facilitate independent 3 | // ptisdk build 4 | // 5 | #ifndef PTI_UTILS_DEMANGLE_H_ 6 | #define PTI_UTILS_DEMANGLE_H_ 7 | 8 | #if __has_include() 9 | #define HAVE_CXXABI 1 10 | #include 11 | 12 | #include 13 | #else 14 | #define HAVE_CXXABI 0 15 | #endif 16 | #include 17 | 18 | #include "pti_assert.h" 19 | 20 | namespace utils { 21 | 22 | static inline std::string Demangle(const char* name) { 23 | PTI_ASSERT(name != nullptr); 24 | 25 | #if HAVE_CXXABI 26 | int status = 0; 27 | char* demangled = abi::__cxa_demangle(name, nullptr, 0, &status); 28 | if (status != 0) { 29 | return name; 30 | } 31 | 32 | constexpr const char* const prefix_to_skip = "typeinfo name for "; 33 | const size_t prefix_to_skip_len = strlen(prefix_to_skip); 34 | const size_t shift = 35 | (std::strncmp(demangled, prefix_to_skip, prefix_to_skip_len) == 0) ? prefix_to_skip_len : 0; 36 | 37 | std::string result(demangled + shift); 38 | free(demangled); 39 | return result; 40 | #else 41 | return name; 42 | #endif 43 | } 44 | 45 | } // namespace utils 46 | 47 | #undef HAVE_CXXABI 48 | 49 | #endif // PTI_UTILS_DEMANGLE_H_ 50 | -------------------------------------------------------------------------------- /sdk/src/utils/platform_config.h.in: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | 7 | #ifndef UTILS_PLATFORM_CONFIG_H_ 8 | #define UTILS_PLATFORM_CONFIG_H_ 9 | 10 | #cmakedefine PTI_EXPERIMENTAL_FILESYSTEM 11 | 12 | #endif // UTILS_PLATFORM_CONFIG_H_ 13 | 14 | -------------------------------------------------------------------------------- /sdk/src/utils/platform_strings.h: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | #ifndef UTILS_PLATFORM_STRINGS_H_ 7 | #define UTILS_PLATFORM_STRINGS_H_ 8 | 9 | #include "utils.h" 10 | 11 | #if !defined(PTI_XPTI_FRAMEWORK_DISPATCHER_NAME) 12 | #if defined(_WIN32) 13 | #define PTI_XPTI_FRAMEWORK_DISPATCHER_NAME_STRING "xptifw.dll" 14 | #else 15 | #define PTI_XPTI_FRAMEWORK_DISPATCHER_NAME_STRING "libxptifw.so" 16 | #endif 17 | #else 18 | #define PTI_XPTI_FRAMEWORK_DISPATCHER_NAME_STRING TOSTRING(PTI_XPTI_FRAMEWORK_DISPATCHER_NAME) 19 | #endif 20 | 21 | #if !defined(PTI_VIEW_CORE_LIB_NAME) 22 | #if defined(_WIN32) 23 | #define PTI_VIEW_CORE_LIB_NAME_STRING "pti.dll" 24 | #else 25 | #define PTI_VIEW_CORE_LIB_NAME_STRING "libpti.so" 26 | #endif 27 | #else 28 | #define PTI_VIEW_CORE_LIB_NAME_STRING TOSTRING(PTI_VIEW_CORE_LIB_NAME) 29 | #endif 30 | 31 | namespace pti { 32 | namespace strings { 33 | inline static constexpr const char* const kXptiLibName = PTI_XPTI_FRAMEWORK_DISPATCHER_NAME_STRING; 34 | inline static constexpr const char* const kPtiViewLib = PTI_VIEW_CORE_LIB_NAME_STRING; 35 | } // namespace strings 36 | } // namespace pti 37 | #endif // UTILS_PLATFORM_STRINGS_H_ 38 | -------------------------------------------------------------------------------- /sdk/src/utils/pti_assert.h: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | 7 | #ifndef PTI_UTILS_PTI_ASSERT_H_ 8 | #define PTI_UTILS_PTI_ASSERT_H_ 9 | 10 | #include 11 | #include 12 | 13 | #if defined(__gnu_linux__) 14 | #define PTI_FUNCTION_NAME __PRETTY_FUNCTION__ 15 | #elif defined(_WIN32) 16 | #define PTI_FUNCTION_NAME __FUNCSIG__ 17 | #else 18 | #define PTI_FUNCTION_NAME __FUNCTION__ 19 | #endif 20 | 21 | #define PTI_ASSERT(X) \ 22 | do { \ 23 | if (!(X)) { \ 24 | std::fprintf(stderr, "Condition " #X " Failed on %s at " __FILE__ ":%d\n", \ 25 | PTI_FUNCTION_NAME, __LINE__); \ 26 | std::abort(); \ 27 | } \ 28 | } while (0) 29 | 30 | #endif // PTI_UTILS_PTI_ASSERT_H_ 31 | -------------------------------------------------------------------------------- /sdk/src/utils/pti_filesystem.h: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | 7 | #ifndef UTILS_PTI_FILESYSTEM_H_ 8 | #define UTILS_PTI_FILESYSTEM_H_ 9 | 10 | #include "platform_config.h" 11 | 12 | #if defined(PTI_EXPERIMENTAL_FILESYSTEM) 13 | #include 14 | #else 15 | #include 16 | #endif 17 | 18 | namespace pti { 19 | namespace utils { 20 | #if defined(PTI_EXPERIMENTAL_FILESYSTEM) 21 | namespace filesystem = std::experimental::filesystem; 22 | #else 23 | namespace filesystem = std::filesystem; 24 | #endif 25 | } // namespace utils 26 | } // namespace pti 27 | 28 | #endif // UTILS_PTI_FILESYSTEM_H_ 29 | -------------------------------------------------------------------------------- /sdk/src/xpti_adapter.cc: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | #include "pti_lib_handler.h" 7 | #include "utils/platform_strings.h" 8 | #include "utils/utils.h" 9 | 10 | class GlobalSyclInitializer { 11 | public: 12 | inline static bool Initialize() { 13 | utils::SetEnv("XPTI_SUBSCRIBERS", utils::GetPathToSharedObject(Initialize).c_str()); 14 | utils::SetEnv("XPTI_FRAMEWORK_DISPATCHER", pti::strings::kXptiLibName); 15 | utils::SetEnv("XPTI_TRACE_ENABLE", "1"); 16 | return true; 17 | } 18 | 19 | inline static bool result_ = Initialize(); 20 | }; 21 | 22 | void xptiTraceInit(unsigned int major_version, unsigned int minor_version, const char* version_str, 23 | const char* stream_name) { 24 | if (!pti::PtiLibHandler::Instance().xptiTraceInit_) { 25 | return; 26 | } 27 | pti::PtiLibHandler::Instance().xptiTraceInit_(major_version, minor_version, version_str, 28 | stream_name); 29 | } 30 | 31 | void xptiTraceFinish(const char* /*stream_name*/) {} 32 | -------------------------------------------------------------------------------- /sdk/src/xpti_adapter.h: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | #ifndef SRC_XPTI_ADAPTER_H_ 7 | #define SRC_XPTI_ADAPTER_H_ 8 | 9 | #if defined(_WIN32) 10 | #define PTI_XPTI_HOOK_VISIBILITY __declspec(dllexport) 11 | #else 12 | #define PTI_XPTI_HOOK_VISIBILITY __attribute__((visibility("default"))) 13 | #endif 14 | 15 | extern "C" { 16 | PTI_XPTI_HOOK_VISIBILITY void xptiTraceInit(unsigned int major_version, unsigned int minor_version, 17 | const char* version_str, const char* stream_name); 18 | 19 | PTI_XPTI_HOOK_VISIBILITY void xptiTraceFinish(const char* stream_name); // NOLINT 20 | } 21 | 22 | #endif // SRC_XPTI_ADAPTER_H_ 23 | -------------------------------------------------------------------------------- /sdk/test/assert_exception_test.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | 6 | class AssertExceptionFixtureTest : public ::testing::Test { 7 | protected: 8 | void SetUp() override {} 9 | 10 | void TearDown() override {} 11 | }; 12 | 13 | TEST_F(AssertExceptionFixtureTest, AssertApplyTimeShiftUFValid) { 14 | Instance().SetState(pti_result::PTI_SUCCESS); 15 | uint64_t aTS = ApplyTimeShift(10, -10); 16 | ASSERT_EQ(aTS, 0ULL); 17 | } 18 | 19 | TEST_F(AssertExceptionFixtureTest, AssertApplyTimeShiftUnderflow) { 20 | Instance().SetState(pti_result::PTI_SUCCESS); 21 | uint64_t aTS = ApplyTimeShift(0, -1); 22 | ASSERT_EQ(aTS, 0ULL); 23 | } 24 | 25 | TEST_F(AssertExceptionFixtureTest, AssertApplyTimeShiftOFValid) { 26 | Instance().SetState(pti_result::PTI_SUCCESS); 27 | uint64_t aTS = ApplyTimeShift(UINT64_MAX - 1, 1); 28 | ASSERT_EQ(aTS, UINT64_MAX); 29 | } 30 | 31 | TEST_F(AssertExceptionFixtureTest, AssertApplyTimeShiftOverflow) { 32 | Instance().SetState(pti_result::PTI_SUCCESS); 33 | uint64_t aTS = ApplyTimeShift(UINT64_MAX, 1); 34 | ASSERT_EQ(aTS, 0ULL); 35 | } 36 | 37 | TEST_F(AssertExceptionFixtureTest, ExceptionGetViewNameAndCallbackThrow) { 38 | Instance().SetState(pti_result::PTI_SUCCESS); 39 | EXPECT_THROW({ GetViewNameAndCallback(pti_view_kind::PTI_VIEW_EXTERNAL_CORRELATION); }, 40 | std::out_of_range); 41 | } 42 | -------------------------------------------------------------------------------- /sdk/test/gemm.cl: -------------------------------------------------------------------------------- 1 | __kernel void GEMM(__global float* a, __global float* b, 2 | __global float* c, int size) { 3 | int j = get_global_id(0); 4 | int i = get_global_id(1); 5 | float sum = 0.0f; 6 | for (int k = 0; k < size; ++k) { 7 | sum += a[i * size + k] * b[k * size + j]; 8 | } 9 | c[i * size + j] = sum; 10 | } -------------------------------------------------------------------------------- /sdk/test/gemm.spv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/sdk/test/gemm.spv -------------------------------------------------------------------------------- /sdk/test/pti_assert_test.cc: -------------------------------------------------------------------------------- 1 | #include "pti_assert.h" 2 | 3 | #include 4 | #include 5 | 6 | TEST(PtiAssertTest, CheckCorrectAssertion) { 7 | const int value_1 = 0; 8 | const int value_2 = 0; 9 | EXPECT_EQ(value_1, value_2); 10 | PTI_ASSERT(value_1 == value_2); 11 | } 12 | 13 | TEST(PtiAssertTest, CheckIncorrectAssertion) { 14 | const int value_1 = 0; 15 | const int value_2 = 1; 16 | EXPECT_NE(value_1, value_2); 17 | // clang-format off 18 | EXPECT_DEATH_IF_SUPPORTED(PTI_ASSERT(value_1 == value_2), ::testing::HasSubstr(__FILE__ ":")); 19 | // clang-format on 20 | PTI_ASSERT(value_1 != value_2); 21 | } 22 | -------------------------------------------------------------------------------- /sdk/test/suppressions/ASan.supp: -------------------------------------------------------------------------------- 1 | # File to suppress results from AddressSanitizer 2 | 3 | # Usage: 4 | # * ASAN_OPTIONS=suppressions=/ASan.supp ./your-program 5 | 6 | # Additions should rarely be added, meant to suppress errors from libpti.so 7 | # dependencies. 8 | 9 | # This does not seem to work. However, in spirit, we want it. 10 | interceptor_via_lib:libigc.so 11 | -------------------------------------------------------------------------------- /sdk/test/suppressions/LSan.supp: -------------------------------------------------------------------------------- 1 | # File to suppress results from LeakSanitizer 2 | 3 | # Usage: 4 | # * LSAN_OPTIONS=suppressions=/LSan.supp ./your-program 5 | 6 | # Additions should rarely be added, meant to suppress errors from libpti.so 7 | # dependencies. 8 | 9 | # libigc.so has a memory leak in strdup somewhere. 10 | leak:libigc.so 11 | 12 | # strange leak reported - nothing of app on the stack, compiler LSAN issue? 13 | # while it might hide other leaks 14 | # temp workaround 15 | leak:stdlib_new_delete.cpp 16 | 17 | # libmkl_sycl.so in zero_pool.cpp. 18 | leak:oneapi::mkl::gpu::calloc_device 19 | 20 | # libmkl_sycl.so (unknown file) 21 | leak:oneapi::mkl::gpu::cache_program 22 | 23 | leak:libmkl_sycl.so 24 | 25 | leak:libze_intel_gpu.so.1* 26 | 27 | leak:libigdmd.so.1* 28 | 29 | leak:libdrm.so.2* 30 | -------------------------------------------------------------------------------- /sdk/test/suppressions/TSan.supp: -------------------------------------------------------------------------------- 1 | # File to suppress results from ThreadSanitizer 2 | 3 | # Usage: 4 | # * TSAN_OPTIONS=suppressions=/TSan.supp ./your-program 5 | 6 | # Additions should rarely be added, meant to suppress errors from libpti.so 7 | # dependencies. 8 | 9 | # TODO: Fixed in compiler >= 2024.0.0? Definitely an issue on == 2023.2.1 10 | race:libxpti* 11 | race:libsycl.so.6* 12 | 13 | # compute-runtime bug 14 | # I believe I saw recent fixes for this in the repository. They may not have 15 | # been released yet or added to our test machines. 16 | mutex:*ze_intel_gpu.so* 17 | deadlock:*ze_intel_gpu.so* 18 | 19 | # Need to file a bug, might be related to above. 20 | called_from_lib:libigdgmm.so* 21 | 22 | #TODO: Fixed in compiler >= 2024.0.0? Definitely an issue on == 2023.2.1 23 | deadlock:libigdrcl* 24 | -------------------------------------------------------------------------------- /sdk/test/suppressions/UBSan.supp: -------------------------------------------------------------------------------- 1 | # File to suppress results from UndefinedBehaviorSanitizer 2 | 3 | # Usage: 4 | # * UBSAN_OPTIONS=suppressions=/UBSan.supp ./your-program 5 | 6 | # ISO sample has some undefined overflow behavior. However, we did not write 7 | # this code. 8 | pointer-overflow:iso3dfd 9 | pointer-overflow:metrics_iso3dfd 10 | -------------------------------------------------------------------------------- /sdk/test/utils/sycl_config_info.h: -------------------------------------------------------------------------------- 1 | #ifndef TEST_UTILS_SYCL_CONFIG_INFO_H_ 2 | #define TEST_UTILS_SYCL_CONFIG_INFO_H_ 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | #include "ze_config_info.h" 10 | 11 | namespace pti::test::utils { 12 | [[nodiscard]] inline bool IsIntegratedGraphics(const sycl::device& device) { 13 | if (!device.is_gpu()) { 14 | return false; 15 | } 16 | 17 | // Ideally, we want get_info or 18 | // to do this via sycl but that seems to be deprecated with no replacement. 19 | bool result = false; 20 | if (device.get_backend() == sycl::backend::ext_oneapi_level_zero) { 21 | auto* device_handle = sycl::get_native(device); 22 | result = level_zero::CheckIntegratedGraphics(device_handle); 23 | } 24 | 25 | // TODO: add supported backends as they come and move implementation to .cc 26 | 27 | return result; 28 | } 29 | 30 | } // namespace pti::test::utils 31 | 32 | #endif // TEST_UTILS_SYCL_CONFIG_INFO_H_ 33 | -------------------------------------------------------------------------------- /sdk/test/view_gpu_local_test.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include "pti/pti_view.h" 6 | #include "utils/test_helpers.h" 7 | 8 | class ViewGPULocalFixtureTest : public ::testing::Test { 9 | protected: 10 | void SetUp() override {} 11 | void TearDown() override {} 12 | }; 13 | 14 | // at the moment at least execute this API 15 | // TODO: more tests to be added; 16 | // the will respect the version of L0 installed to the system 17 | TEST_F(ViewGPULocalFixtureTest, CheckGPULocalViewAvailable) { 18 | pti_result result = ptiViewGPULocalAvailable(); 19 | EXPECT_TRUE(result == pti_result::PTI_SUCCESS || 20 | result == pti_result::PTI_ERROR_L0_LOCAL_PROFILING_NOT_SUPPORTED); 21 | 22 | std::cout << "result: " << result << std::endl; 23 | 24 | // just exercising two times and verifying the answer is the same 25 | pti_result result2 = ptiViewGPULocalAvailable(); 26 | EXPECT_TRUE(result2 == result); 27 | } 28 | -------------------------------------------------------------------------------- /tests/samples/cl_gemm.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import sys 4 | 5 | import utils 6 | 7 | if sys.platform == 'win32': 8 | cmake_generator = "NMake Makefiles" 9 | file_extention = ".exe" 10 | file_name_prefix = "" 11 | make = ["nmake"] 12 | else: 13 | cmake_generator = "Unix Makefiles" 14 | file_extention = "" 15 | file_name_prefix = "./" 16 | make = ["make"] 17 | 18 | 19 | def config(path): 20 | cmake = ["cmake", "-G", cmake_generator,\ 21 | "-DCMAKE_BUILD_TYPE=" + utils.get_build_flag(), ".."] 22 | stdout, stderr = utils.run_process(cmake, path) 23 | if stderr and stderr.find("CMake Error") != -1: 24 | return stderr 25 | return None 26 | 27 | def build(path): 28 | stdout, stderr = utils.run_process(make, path) 29 | if stderr and stderr.lower().find("error") != -1: 30 | return stderr 31 | return None 32 | 33 | def run(path, option): 34 | command = [file_name_prefix + "cl_gemm" + file_extention,\ 35 | option, "1024", "1"] 36 | stdout, stderr = utils.run_process(command, path) 37 | if stderr: 38 | return stderr 39 | if not stdout: 40 | return "stdout is empty" 41 | if stdout.find(" CORRECT") == -1: 42 | return stdout 43 | return None 44 | 45 | def main(option): 46 | path = utils.get_sample_build_path("cl_gemm") 47 | log = config(path) 48 | if log: 49 | return log 50 | log = build(path) 51 | if log: 52 | return log 53 | log = run(path, option) 54 | if log: 55 | return log 56 | 57 | if __name__ == "__main__": 58 | option = "gpu" 59 | if len(sys.argv) > 1 and sys.argv[1] == "cpu": 60 | option = "cpu" 61 | log = main(option) 62 | if log: 63 | print(log) 64 | -------------------------------------------------------------------------------- /tests/samples/cl_gemm_itt.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import sys 4 | 5 | import utils 6 | 7 | if sys.platform == 'win32': 8 | cmake_generator = "NMake Makefiles" 9 | file_extention = ".exe" 10 | file_name_prefix = "" 11 | make = ["nmake"] 12 | else: 13 | cmake_generator = "Unix Makefiles" 14 | file_extention = "" 15 | file_name_prefix = "./" 16 | make = ["make"] 17 | 18 | 19 | def config(path): 20 | cmake = ["cmake", "-G", cmake_generator,\ 21 | "-DCMAKE_BUILD_TYPE=" + utils.get_build_flag(), ".."] 22 | stdout, stderr = utils.run_process(cmake, path) 23 | if stderr and stderr.find("CMake Error") != -1: 24 | return stderr 25 | return None 26 | 27 | def build(path): 28 | stdout, stderr = utils.run_process(make, path) 29 | if stderr and stderr.lower().find("error") != -1: 30 | return stderr 31 | return None 32 | 33 | def run(path, option): 34 | command = [file_name_prefix + "cl_gemm_itt" + file_extention,\ 35 | option, "1024", "1"] 36 | stdout, stderr = utils.run_process(command, path) 37 | if stderr: 38 | return stderr 39 | if not stdout: 40 | return "stdout is empty" 41 | if stdout.find(" CORRECT") == -1: 42 | return stdout 43 | return None 44 | 45 | def main(option): 46 | path = utils.get_sample_build_path("cl_gemm_itt") 47 | log = config(path) 48 | if log: 49 | return log 50 | log = build(path) 51 | if log: 52 | return log 53 | log = run(path, option) 54 | if log: 55 | return log 56 | 57 | if __name__ == "__main__": 58 | option = "gpu" 59 | if len(sys.argv) > 1 and sys.argv[1] == "cpu": 60 | option = "cpu" 61 | log = main(option) 62 | if log: 63 | print(log) -------------------------------------------------------------------------------- /tests/samples/gpu_perfmon_set.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import sys 4 | 5 | import utils 6 | 7 | def config(path): 8 | cmake = ["cmake",\ 9 | "-DCMAKE_BUILD_TYPE=" + utils.get_build_flag(), ".."] 10 | stdout, stderr = utils.run_process(cmake, path) 11 | if stderr and stderr.find("CMake Error") != -1: 12 | return stderr 13 | return None 14 | 15 | def build(path): 16 | stdout, stderr = utils.run_process(["make"], path) 17 | if stderr and stderr.lower().find("error") != -1: 18 | return stderr 19 | return None 20 | 21 | def run(path): 22 | command = ["./gpu_perfmon_set", "4", "-t"] 23 | stdout, stderr = utils.run_process(command, path) 24 | if stderr: 25 | return stderr 26 | if not stdout: 27 | return "stdout is empty" 28 | if stdout.find("GPU PefMon configuration is completed") == -1: 29 | return stdout 30 | return None 31 | 32 | def main(option): 33 | path = utils.get_sample_build_path("gpu_perfmon_set") 34 | log = config(path) 35 | if log: 36 | return log 37 | log = build(path) 38 | if log: 39 | return log 40 | log = run(path) 41 | if log: 42 | return log 43 | 44 | if __name__ == "__main__": 45 | log = main(None) 46 | if log: 47 | print(log) -------------------------------------------------------------------------------- /tests/samples/ze_gemm.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import sys 4 | 5 | import utils 6 | 7 | def config(path): 8 | if (sys.platform != 'win32'): 9 | cmake = ["cmake", "-DCMAKE_BUILD_TYPE=" + utils.get_build_flag(), ".."] 10 | else: 11 | cmake = ["cmake", "-G", "NMake Makefiles", "-DCMAKE_BUILD_TYPE=" + utils.get_build_flag(), ".."] 12 | stdout, stderr = utils.run_process(cmake, path) 13 | if stderr and stderr.find("CMake Error") != -1: 14 | return stderr 15 | return None 16 | 17 | def build(path): 18 | if (sys.platform != 'win32'): 19 | stdout, stderr = utils.run_process(["make"], path) 20 | else: 21 | stdout, stderr = utils.run_process(["nmake"], path) 22 | if stderr and stderr.lower().find("error") != -1: 23 | return stderr 24 | return None 25 | 26 | def run(path): 27 | if (sys.platform != 'win32'): 28 | command = ["./ze_gemm", "1024", "1"] 29 | else: 30 | command = ["ze_gemm", "1024", "1"] 31 | 32 | stdout, stderr = utils.run_process(command, path) 33 | if stderr: 34 | return stderr 35 | if not stdout: 36 | return "stdout is empty" 37 | if stdout.find(" CORRECT") == -1: 38 | return stdout 39 | return None 40 | 41 | def main(option): 42 | path = utils.get_sample_build_path("ze_gemm") 43 | log = config(path) 44 | if log: 45 | return log 46 | log = build(path) 47 | if log: 48 | return log 49 | log = run(path) 50 | if log: 51 | return log 52 | 53 | if __name__ == "__main__": 54 | log = main(None) 55 | if log: 56 | print(log) 57 | -------------------------------------------------------------------------------- /tests/samples/ze_sysman.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import sys 4 | 5 | import utils 6 | 7 | def config(path): 8 | cmake = ["cmake",\ 9 | "-DCMAKE_BUILD_TYPE=" + utils.get_build_flag(), ".."] 10 | stdout, stderr = utils.run_process(cmake, path) 11 | if stderr and stderr.find("CMake Error") != -1: 12 | return stderr 13 | return None 14 | 15 | def build(path): 16 | stdout, stderr = utils.run_process(["make"], path) 17 | if stderr and stderr.lower().find("error") != -1: 18 | return stderr 19 | return None 20 | 21 | def parse(output): 22 | count = 0 23 | lines = output.split("\n") 24 | for line in lines: 25 | if line.find("Device:") == 0: 26 | count +=1 27 | if line.find("-- Subdevice Count:") == 0: 28 | count +=1 29 | if count != 2: 30 | return False 31 | return True 32 | 33 | def run(path): 34 | command = ["./ze_sysman"] 35 | stdout, stderr = utils.run_process(command, path) 36 | if stderr: 37 | return stderr 38 | if not stdout: 39 | return "stdout is empty" 40 | if not parse(stdout): 41 | return stdout 42 | return None 43 | 44 | def main(option): 45 | path = utils.get_sample_build_path("ze_sysman") 46 | log = config(path) 47 | if log: 48 | return log 49 | log = build(path) 50 | if log: 51 | return log 52 | log = run(path) 53 | if log: 54 | return log 55 | 56 | if __name__ == "__main__": 57 | log = main(None) 58 | if log: 59 | print(log) -------------------------------------------------------------------------------- /tests/tools/unitrace-build.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import os 3 | import subprocess 4 | import sys 5 | 6 | import utils 7 | 8 | def config(path): 9 | if (sys.platform != 'win32'): 10 | cmake = ["cmake",\ 11 | "-DBUILD_WITH_MPI=0", "-DCMAKE_BUILD_TYPE=" + utils.get_build_flag(), ".."] 12 | else: 13 | cmake = ["cmake",\ 14 | "-G", "NMake Makefiles", "-DBUILD_WITH_MPI=0", "-DCMAKE_BUILD_TYPE=" + utils.get_build_flag(), ".."] 15 | stdout, stderr = utils.run_process(cmake, path) 16 | 17 | if stderr and stderr.find("CMake Error") != -1: 18 | print("======================== CMake ===============================") 19 | print(stdout) 20 | print(stderr) 21 | print("==============================================================") 22 | 23 | return stderr 24 | 25 | return None 26 | 27 | def build(path): 28 | if (sys.platform != 'win32'): 29 | stdout, stderr = utils.run_process(["make"], path) 30 | else: 31 | stdout, stderr = utils.run_process(["nmake"], path) 32 | 33 | if stderr and stderr.lower().find("error") != -1: 34 | print("======================== Build ===============================") 35 | print(stdout) 36 | print(stderr) 37 | print("==============================================================") 38 | 39 | return stderr 40 | 41 | return None 42 | 43 | def main(tooloption): 44 | path = utils.get_tool_build_path("unitrace") 45 | 46 | log = config(path) 47 | if log is not None: 48 | return log 49 | 50 | log = build(path) 51 | return log 52 | 53 | if __name__ == "__main__": 54 | if len(sys.argv) > 1: 55 | log = main(sys.argv[1]) 56 | if log is not None: 57 | print(log) 58 | -------------------------------------------------------------------------------- /tools/cl_tracer/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include("../../build_utils/CMakeLists.txt") 2 | SetRequiredCMakeVersion() 3 | cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) 4 | 5 | project(PTI_Samples_CL_Tracer CXX) 6 | SetCompilerFlags() 7 | SetBuildType() 8 | 9 | # Tool Library 10 | 11 | add_library(clt_tracer SHARED 12 | "${PROJECT_SOURCE_DIR}/../../loader/init.cc" 13 | "${PROJECT_SOURCE_DIR}/../utils/correlator.cc" 14 | "${PROJECT_SOURCE_DIR}/../../utils/trace_guard.cc" 15 | cl_ext_collector.cc 16 | tool.cc) 17 | target_include_directories(clt_tracer 18 | PRIVATE "${PROJECT_SOURCE_DIR}" 19 | PRIVATE "${PROJECT_SOURCE_DIR}/../utils" 20 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 21 | if(CMAKE_INCLUDE_PATH) 22 | target_include_directories(clt_tracer 23 | PUBLIC "${CMAKE_INCLUDE_PATH}") 24 | endif() 25 | 26 | FindOpenCLLibrary(clt_tracer) 27 | FindOpenCLHeaders(clt_tracer) 28 | 29 | GetOpenCLTracingHeaders(clt_tracer) 30 | 31 | # Loader 32 | 33 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTOOL_NAME=clt_tracer") 34 | add_executable(cl_tracer "${PROJECT_SOURCE_DIR}/../../loader/loader.cc") 35 | target_include_directories(cl_tracer 36 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 37 | if(UNIX) 38 | target_link_libraries(cl_tracer 39 | dl) 40 | endif() 41 | 42 | # Installation 43 | 44 | install(TARGETS cl_tracer clt_tracer DESTINATION bin) -------------------------------------------------------------------------------- /tools/gpuinfo/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include("../../build_utils/CMakeLists.txt") 2 | SetRequiredCMakeVersion() 3 | cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) 4 | 5 | project(PTI_Samples_GPU_Info CXX) 6 | SetCompilerFlags() 7 | SetBuildType() 8 | 9 | add_executable(gpuinfo main.cc) 10 | target_include_directories(gpuinfo 11 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 12 | if(CMAKE_INCLUDE_PATH) 13 | target_include_directories(gpuinfo 14 | PUBLIC "${CMAKE_INCLUDE_PATH}") 15 | endif() 16 | 17 | if(UNIX) 18 | target_link_libraries(gpuinfo 19 | dl) 20 | endif() 21 | 22 | GetMDHeaders(gpuinfo) 23 | CheckForMDLibrary(gpuinfo) 24 | 25 | install(TARGETS gpuinfo DESTINATION bin) -------------------------------------------------------------------------------- /tools/instcount/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include("../../build_utils/CMakeLists.txt") 2 | SetRequiredCMakeVersion() 3 | cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) 4 | 5 | 6 | project(PTI_Samples_GPU_Instruction_Count CXX) 7 | SetCompilerFlags() 8 | SetBuildType() 9 | 10 | # Tool Library 11 | 12 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") 13 | 14 | set(CMAKE_CXX_FLAGS_DEBUG "-g") 15 | 16 | add_library(instcount_tool SHARED 17 | "${PROJECT_SOURCE_DIR}/tool.cc" 18 | "${PTI_CMAKE_MACRO_DIR}/../loader/init.cc" 19 | "${PROJECT_SOURCE_DIR}/instcount.cpp" 20 | ) 21 | target_include_directories(instcount_tool 22 | PRIVATE "${PTI_CMAKE_MACRO_DIR}/../utils") 23 | if(CMAKE_INCLUDE_PATH) 24 | target_include_directories(instcount_tool 25 | PUBLIC "${CMAKE_INCLUDE_PATH}") 26 | endif() 27 | 28 | add_subdirectory("${PTI_CMAKE_MACRO_DIR}/../utils/gtpin_utils" gtpin_tool_utils) 29 | FindGTPinToolUtilsHeaders(instcount_tool) 30 | target_link_libraries(instcount_tool gtpin_tool_utils) 31 | 32 | FindGTPinLibrary(instcount_tool) 33 | FindGTPinHeaders(instcount_tool) 34 | FindGTPinUtils(instcount_tool) 35 | 36 | # Loader 37 | 38 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTOOL_NAME=instcount_tool") 39 | add_executable(instcount "${PTI_CMAKE_MACRO_DIR}/../loader/loader.cc") 40 | target_include_directories(instcount 41 | PRIVATE "${PTI_CMAKE_MACRO_DIR}/../utils") 42 | if(UNIX) 43 | target_link_libraries(instcount 44 | dl) 45 | endif() 46 | 47 | # Installation 48 | 49 | install(TARGETS instcount instcount_tool DESTINATION bin) 50 | -------------------------------------------------------------------------------- /tools/memaccess/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include("../../build_utils/CMakeLists.txt") 2 | SetRequiredCMakeVersion() 3 | cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) 4 | 5 | 6 | project(PTI_Samples_GPU_Instruction_Count CXX) 7 | SetCompilerFlags() 8 | SetBuildType() 9 | 10 | # Tool Library 11 | 12 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") 13 | 14 | set(CMAKE_CXX_FLAGS_DEBUG "-g") 15 | 16 | add_library(memaccess_tool SHARED 17 | "${PROJECT_SOURCE_DIR}/tool.cc" 18 | "${PTI_CMAKE_MACRO_DIR}/../loader/init.cc" 19 | "${PROJECT_SOURCE_DIR}/memaccess.cpp" 20 | ) 21 | target_include_directories(memaccess_tool 22 | PRIVATE "${PTI_CMAKE_MACRO_DIR}/../utils") 23 | if(CMAKE_INCLUDE_PATH) 24 | target_include_directories(memaccess_tool 25 | PUBLIC "${CMAKE_INCLUDE_PATH}") 26 | endif() 27 | 28 | add_subdirectory("${PTI_CMAKE_MACRO_DIR}/../utils/gtpin_utils" gtpin_tool_utils) 29 | FindGTPinToolUtilsHeaders(memaccess_tool) 30 | target_link_libraries(memaccess_tool gtpin_tool_utils) 31 | 32 | FindGTPinLibrary(memaccess_tool) 33 | FindGTPinHeaders(memaccess_tool) 34 | FindGTPinUtils(memaccess_tool) 35 | 36 | # Loader 37 | 38 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTOOL_NAME=memaccess_tool") 39 | add_executable(memaccess "${PTI_CMAKE_MACRO_DIR}/../loader/loader.cc") 40 | target_include_directories(memaccess 41 | PRIVATE "${PTI_CMAKE_MACRO_DIR}/../utils") 42 | if(UNIX) 43 | target_link_libraries(memaccess 44 | dl) 45 | endif() 46 | 47 | # Installation 48 | 49 | install(TARGETS memaccess memaccess_tool DESTINATION bin) 50 | -------------------------------------------------------------------------------- /tools/onetrace/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include("../../build_utils/CMakeLists.txt") 2 | SetRequiredCMakeVersion() 3 | cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) 4 | 5 | project(PTI_Tools_OneTrace CXX) 6 | SetCompilerFlags() 7 | SetBuildType() 8 | 9 | # Tool Library 10 | 11 | add_library(onetrace_tool SHARED 12 | "${PROJECT_SOURCE_DIR}/../../loader/init.cc" 13 | "${PROJECT_SOURCE_DIR}/../cl_tracer/cl_ext_collector.cc" 14 | "${PROJECT_SOURCE_DIR}/../utils/correlator.cc" 15 | "${PROJECT_SOURCE_DIR}/../../utils/trace_guard.cc" 16 | tool.cc) 17 | target_include_directories(onetrace_tool 18 | PRIVATE "${PROJECT_SOURCE_DIR}" 19 | PRIVATE "${PROJECT_SOURCE_DIR}/../utils" 20 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils" 21 | PRIVATE "${PROJECT_SOURCE_DIR}/../cl_tracer" 22 | PRIVATE "${PROJECT_SOURCE_DIR}/../ze_tracer") 23 | target_compile_definitions(onetrace_tool PUBLIC PTI_LEVEL_ZERO=1) 24 | if(CMAKE_INCLUDE_PATH) 25 | target_include_directories(onetrace_tool 26 | PUBLIC "${CMAKE_INCLUDE_PATH}") 27 | endif() 28 | 29 | FindOpenCLLibrary(onetrace_tool) 30 | FindOpenCLHeaders(onetrace_tool) 31 | 32 | GetOpenCLTracingHeaders(onetrace_tool) 33 | 34 | FindL0Library(onetrace_tool) 35 | FindL0Headers(onetrace_tool) 36 | 37 | FindL0HeadersPath(onetrace_tool "${PROJECT_SOURCE_DIR}/../ze_tracer/gen_tracing_callbacks.py") 38 | 39 | # Loader 40 | 41 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTOOL_NAME=onetrace_tool") 42 | add_executable(onetrace "${PROJECT_SOURCE_DIR}/../../loader/loader.cc") 43 | target_include_directories(onetrace 44 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 45 | if(UNIX) 46 | target_link_libraries(onetrace 47 | dl) 48 | endif() 49 | 50 | # Installation 51 | 52 | install(TARGETS onetrace onetrace_tool DESTINATION bin) -------------------------------------------------------------------------------- /tools/sysmon/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include("../../build_utils/CMakeLists.txt") 2 | SetRequiredCMakeVersion() 3 | cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) 4 | 5 | project(PTI_Tools_System_Monitor CXX) 6 | SetCompilerFlags() 7 | SetBuildType() 8 | 9 | if(NOT UNIX) 10 | message(FATAL_ERROR "Linux only is supported") 11 | endif() 12 | 13 | add_executable(sysmon main.cc) 14 | target_include_directories(sysmon 15 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 16 | if(CMAKE_INCLUDE_PATH) 17 | target_include_directories(sysmon 18 | PUBLIC "${CMAKE_INCLUDE_PATH}") 19 | endif() 20 | 21 | FindL0Library(sysmon) 22 | FindL0Headers(sysmon) 23 | 24 | # Installation 25 | 26 | install(TARGETS sysmon DESTINATION bin) -------------------------------------------------------------------------------- /tools/unitrace/cmake/FindXptifw.cmake: -------------------------------------------------------------------------------- 1 | include(FindPackageHandleStandardArgs) 2 | find_library(Xptifw_LIBRARY 3 | NAMES xptifw 4 | HINTS 5 | ENV LD_LIBRARY_PATH 6 | ENV LIBRARY_PATH 7 | ) 8 | find_path(Xptifw_INCLUDE_DIR 9 | NAMES xpti/xpti_trace_framework.h 10 | PATH_SUFFIXES 11 | include 12 | ../include 13 | ) 14 | 15 | find_package_handle_standard_args(Xptifw REQUIRED_VARS Xptifw_LIBRARY Xptifw_INCLUDE_DIR) 16 | 17 | if (Xptifw_FOUND) 18 | mark_as_advanced(Xptifw_LIBRARY) 19 | mark_as_advanced(Xptifw_INCLUDE_DIR) 20 | endif() 21 | 22 | if (Xptifw_FOUND AND NOT TARGET Xptifw::Xptifw) 23 | add_library(Xptifw::Xptifw SHARED IMPORTED) 24 | set_target_properties( 25 | Xptifw::Xptifw 26 | PROPERTIES IMPORTED_LOCATION "${Xptifw_LIBRARY}" 27 | INTERFACE_COMPILE_DEFINITIONS 28 | "XPTI_API_EXPORTS;XPTI_CALLBACK_API_EXPORTS" 29 | INTERFACE_INCLUDE_DIRECTORIES "${Xptifw_INCLUDE_DIR}") 30 | endif() 31 | -------------------------------------------------------------------------------- /tools/unitrace/doc/images/call-logging.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/call-logging.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/call-trace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/call-trace.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/ccl_logging.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/ccl_logging.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/ccl_summary_report.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/ccl_summary_report.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/chrome_itt_logging.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/chrome_itt_logging.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/device-logging.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/device-logging.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/device-no-thread-no-engine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/device-no-thread-no-engine.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/device-per-engine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/device-per-engine.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/device-per-thread-per-engine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/device-per-thread-per-engine.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/device-per-thread.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/device-per-thread.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/device-timeline-text.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/device-timeline-text.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/device-timing-with-no-shape.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/device-timing-with-no-shape.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/device-timing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/device-timing.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/event_query.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/event_query.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/host-device-times.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/host-device-times.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/host-timing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/host-timing.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/implicit-per-tile-kernel-logging.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/implicit-per-tile-kernel-logging.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/implicit-per-tile-timing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/implicit-per-tile-timing.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/kernel-info-with-no-shape.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/kernel-info-with-no-shape.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/kernel-info.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/kernel-info.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/kernel-logging.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/kernel-logging.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/kernel-submissions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/kernel-submissions.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/metric-query.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/metric-query.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/metric-sampling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/metric-sampling.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/mpi-counter-parameter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/mpi-counter-parameter.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/mpi-device-initiated.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/mpi-device-initiated.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/mpi-imbalance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/mpi-imbalance.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/mpi-logging.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/mpi-logging.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/multipl-ranks-timelines.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/multipl-ranks-timelines.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/perfchart-multi-sets.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/perfchart-multi-sets.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/perfchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/perfchart.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/perfmetricsbrowser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/perfmetricsbrowser.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/perfmetricsbrowser2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/perfmetricsbrowser2.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/perfmetricstrace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/perfmetricstrace.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/pytorch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/pytorch.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/roofline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/roofline.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/stall-sampling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/stall-sampling.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/stallchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/stallchart.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/stallreport.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/stallreport.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/stallstatistics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/stallstatistics.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/sycl-logging.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/sycl-logging.png -------------------------------------------------------------------------------- /tools/unitrace/doc/images/throughput.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/doc/images/throughput.png -------------------------------------------------------------------------------- /tools/unitrace/requirements.txt: -------------------------------------------------------------------------------- 1 | pandas >= 2.2.1 2 | matplotlib >= 3.8.0 3 | -------------------------------------------------------------------------------- /tools/unitrace/scripts/get_commit_hash.py: -------------------------------------------------------------------------------- 1 | #============================================================== 2 | # Copyright (C) Intel Corporation 3 | # 4 | # SPDX-License-Identifier: MIT 5 | # ============================================================= 6 | 7 | import os 8 | import subprocess 9 | import sys 10 | import re 11 | 12 | def main(): 13 | if len(sys.argv) < 3: 14 | print("Usage: python get_commit_hash.py ") 15 | return 16 | 17 | dir_path = sys.argv[1] 18 | if (not os.path.exists(dir_path)): 19 | os.mkdir(dir_path) 20 | 21 | file_path = os.path.join(dir_path, sys.argv[2]) 22 | if (os.path.isfile(file_path)): 23 | os.remove(file_path) 24 | 25 | of = open(file_path, "wt") 26 | 27 | of.write("#ifndef PTI_TOOLS_UNITRACE_COMMIT_HASH_H_\n") 28 | of.write("#define PTI_TOOLS_UNITRACE_COMMIT_HASH_H_\n\n") 29 | 30 | result = subprocess.run(['git', 'rev-parse', 'HEAD'], stdout=subprocess.PIPE, universal_newlines=True) 31 | of.write("#define COMMIT_HASH \"") 32 | of.write(result.stdout[0:len(result.stdout) - 1]) 33 | of.write("\"\n\n") 34 | 35 | of.write("#endif /* PTI_TOOLS_UNITRACE_COMMIT_HASH_ */\n") 36 | of.close() 37 | 38 | if __name__ == "__main__": 39 | main() 40 | -------------------------------------------------------------------------------- /tools/unitrace/scripts/metrics/addrasm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | #============================================================== 3 | # Copyright (C) Intel Corporation 4 | # 5 | # SPDX-License-Identifier: MIT 6 | # ============================================================= 7 | 8 | 9 | import os 10 | import argparse 11 | 12 | def ParseArguments(): 13 | argparser = argparse.ArgumentParser(description = "Add instruction pointers in GPU assembly") 14 | argparser.add_argument('-o', '--output', required = True, help = "output .asm file with instruction pointers") 15 | argparser.add_argument('input', help = '.asm file withour instruction pointers') 16 | 17 | return argparser.parse_args() 18 | 19 | def main(args): 20 | if (os.path.isfile(args.input) == False): 21 | print("File " + args.input + " does not exist or cannot be opened") 22 | return 23 | 24 | if (os.stat(args.input).st_size ==0): 25 | print("File " + args.input + " is empty") 26 | return 27 | 28 | ip = 0 29 | with open(args.input, "r") as inf: 30 | with open(args.output, "w") as outf: 31 | for row in inf: 32 | if ((row.startswith("//") == False) and ("//" in row)): 33 | outf.write("/* [" + str('{:08X}'.format(ip)) + "] */ " + row) 34 | if ("Compacted" in row): 35 | ip = ip + 0x8 36 | else: 37 | ip = ip + 0x10 38 | else: 39 | outf.write(row) 40 | 41 | if __name__=="__main__": 42 | main(ParseArguments()) 43 | -------------------------------------------------------------------------------- /tools/unitrace/scripts/metrics/config/bmg/ComputeBasic.txt: -------------------------------------------------------------------------------- 1 | -m "XVE_STALL[%],XVE_INST_EXECUTED_ALU0_ALL_UTILIZATION[%],XVE_INST_EXECUTED_ALU1_ALL_UTILIZATION[%],XVE_INST_EXECUTED_ALU2_ALL_UTILIZATION[%]" -y "XVE Stall and ALU Utilizations (%)" 2 | -m "XVE_ACTIVE[%],XVE_STALL[%],XVE_THREADS_OCCUPANCY_ALL[%]" -y "Active, Stall and Occupancy (%)" 3 | -m "XVE_INST_EXECUTED_ALU0_ALL[events],XVE_INST_EXECUTED_ALU1_ALL[events],XVE_INST_EXECUTED_SEND_ALL[events]" -y "ALU Instruction Events Executed" 4 | -m "ICACHE_HIT[events],ICACHE_MISS[events]" -y "Instruction Cache Hit/Miss (events)" 5 | -m "GPU_MEMORY_L3_READ[events],GPU_MEMORY_L3_WRITE[events]" -y "GPU Memory Read/Write Caused by L3 Miss (events)" 6 | -m "GPU_MEMORY_BYTE_READ[bytes],GPU_MEMORY_BYTE_WRITE[bytes]" -y "GPU Memory Read/Write (bytes)" 7 | -b "GPU_MEMORY_BYTE_READ[bytes],GPU_MEMORY_BYTE_WRITE[bytes]" 8 | -m "L3_HIT[events],L3_MISS[events]" -y "L3 Cache Hit/Miss (events)" 9 | -m "LOAD_STORE_CACHE_ACCESS[events],LOAD_STORE_CACHE_HIT[events]" -y "LSC Access/Hit (events)" 10 | -m "LOAD_STORE_CACHE_BYTE_READ[bytes],LOAD_STORE_CACHE_BYTE_WRITE[bytes]" -y "LSC Read/Write Excluding SLM (bytes)" 11 | -b "LOAD_STORE_CACHE_BYTE_READ[bytes],LOAD_STORE_CACHE_BYTE_WRITE[bytes]" 12 | -m "SLM_BYTE_READ[bytes],SLM_BYTE_WRITE[bytes]" -y "SLM Read/Write (bytes)" 13 | -b "SLM_BYTE_READ[bytes],SLM_BYTE_WRITE[bytes]" 14 | -m "TLB_MISS[events]" -y "TLB Miss (events)" 15 | -m "COMPRESSOR_INPUT[events],COMPRESSOR_OUTPUT[events]" -y "256B Write at Compressor Input/Output (events)" 16 | -m "AvgGpuCoreFrequencyMHz[MHz]" -y "Frequency (MHz)" 17 | -------------------------------------------------------------------------------- /tools/unitrace/scripts/metrics/config/bmg/MemoryProfile.txt: -------------------------------------------------------------------------------- 1 | -m "GPU_MEMORY_BYTE_READ[bytes],GPU_MEMORY_BYTE_WRITE[bytes]" -y "GPU Memory Read/Write (bytes)" 2 | -b "GPU_MEMORY_BYTE_READ[bytes],GPU_MEMORY_BYTE_WRITE[bytes]" 3 | -m "L3_HIT[events],L3_MISS[events]" -y "L3 Cache Hit/Miss (events)" 4 | -m "L3_READ[events],L3_WRITE[events]" -y "L3 64-byte Read/Write (events)" 5 | -m "HOST_TO_GPUMEM_TRANSACTION_READ[events],HOST_TO_GPUMEM_TRANSACTION_WRITE[events]" -y "Host 64B Read/Write GPU Memory (events)" 6 | -b "GPU_MEMORY_BYTE_READ[bytes],GPU_MEMORY_BYTE_WRITE[bytes]" 7 | -m "SLM_ACCESS_COUNT[events],SLM_BANK_CONFLICT_COUNT[events]" -y "SLM Access and Bank Conflict (events)" 8 | -m "SLM_BYTE_READ[bytes],SLM_BYTE_WRITE[bytes]" -y "SLM Read/Write (bytes)" 9 | -b "SLM_BYTE_READ[bytes],SLM_BYTE_WRITE[bytes]" 10 | -m "XVE_SLM_READ_MESSAGE_COUNT[messages],XVE_SLM_WRITE_MESSAGE_COUNT[messages],XVE_SLM_ATOMIC_MESSAGE_COUNT[events],XVE_SLM_FENCE_MESSAGE_COUNT[events]" -y "SLM Read/Write/Atomic/Fence (messages)" 11 | -m "TLB_MISS[events]" -y "TLB miss (events)" 12 | -m "AvgGpuCoreFrequencyMHz[MHz]" -y "Frequency (MHz)" 13 | -------------------------------------------------------------------------------- /tools/unitrace/scripts/metrics/config/pvc/ComputeBasic.txt: -------------------------------------------------------------------------------- 1 | -m "XVE_STALL[%],XVE_INST_EXECUTED_ALU0_ALL_UTILIZATION[%],XVE_INST_EXECUTED_ALU1_ALL_UTILIZATION[%],XVE_INST_EXECUTED_SEND_ALL_UTILIZATION[%],XVE_INST_EXECUTED_CONTROL_ALL_UTILIZATION[%],XVE_INST_EXECUTED_XMX_ALL_UTILIZATION[%]" -y "XVE Stall and ALU Utilizations (%)" -m "XVE_ACTIVE[%],XVE_STALL[%],XVE_THREADS_OCCUPANCY_ALL[%]" -y "Active, Stall and Occupancy (%)" -m "XVE_ATOMIC_ACCESS_COUNT[messages],XVE_BARRIER_MESSAGE_COUNT[messages]" -y "Atomics and Barriers Messages" -m "XVE_INST_EXECUTED_ALU0_ALL[events],XVE_INST_EXECUTED_ALU1_ALL[events],XVE_INST_EXECUTED_XMX_ALL[events],XVE_INST_EXECUTED_SEND_ALL[events],XVE_INST_EXECUTED_CONTROL_ALL[events]" -y "ALU Instruction Events Executed" -m "L3_BYTE_READ[bytes],L3_BYTE_WRITE[bytes],GPU_MEMORY_BYTE_READ[bytes],GPU_MEMORY_BYTE_WRITE[bytes]" -y "L3 and GPU Memory (bytes)" -b "L3_BYTE_READ[bytes],L3_BYTE_WRITE[bytes],GPU_MEMORY_BYTE_READ[bytes],GPU_MEMORY_BYTE_WRITE[bytes]" -m "AvgGpuCoreFrequencyMHz[MHz],AvgGpuSliceFrequencyMHz[MHz]" -y "Frequency (MHz)" 2 | -------------------------------------------------------------------------------- /tools/unitrace/scripts/metrics/config/pvc/GpuOffload.txt: -------------------------------------------------------------------------------- 1 | -m "XVE_ACTIVE[%],XVE_STALL[%],XVE_THREADS_OCCUPANCY_ALL[%]" -y "Active, Stall and Occupancy (%)" -m "GPU_MEMORY_BYTE_READ[bytes],GPU_MEMORY_BYTE_WRITE[bytes]" -y "GPU Memory (bytes)" -m "SYSMEM_BYTE_READ[bytes],SYSMEM_BYTE_WRITE[bytes]" -y "System Memory (bytes)" -m "STACK_TO_STACK_DATA_BYTE_RECEIVE[bytes],STACK_TO_STACK_DATA_BYTE_TRANSMIT[bytes]" -y "Stack to Stack Traffic (bytes)" -b "SYSMEM_BYTE_READ[bytes],SYSMEM_BYTE_WRITE[bytes]" -b "GPU_MEMORY_BYTE_READ[bytes],GPU_MEMORY_BYTE_WRITE[bytes]" -b "STACK_TO_STACK_DATA_BYTE_RECEIVE[bytes],STACK_TO_STACK_DATA_BYTE_TRANSMIT[bytes]" -m "AvgGpuCoreFrequencyMHz[MHz],AvgGpuSliceFrequencyMHz[MHz]" -y "Frequency (MHz)" 2 | -------------------------------------------------------------------------------- /tools/unitrace/scripts/metrics/config/pvc/L1ProfileReads.txt: -------------------------------------------------------------------------------- 1 | -m "XVE_ACTIVE[%],XVE_STALL[%],XVE_THREADS_OCCUPANCY_ALL[%]" -y "Active, Stall and Occupancy (%)" -m "GPU_MEMORY_BYTE_READ[bytes],GPU_MEMORY_BYTE_WRITE[bytes]" -y "GPU Memory (bytes)" -m "LOAD_STORE_CACHE_BYTE_READ[bytes],SLM_BYTE_READ[bytes]" -y "Load Store Cache Read (bytes)" -b "GPU_MEMORY_BYTE_READ[bytes],GPU_MEMORY_BYTE_WRITE[bytes]" -b "LOAD_STORE_CACHE_BYTE_READ[bytes],SLM_BYTE_READ[bytes]" -m "AvgGpuCoreFrequencyMHz[MHz],AvgGpuSliceFrequencyMHz[MHz]" -y "Frequency (MHz)" 2 | -------------------------------------------------------------------------------- /tools/unitrace/scripts/metrics/config/pvc/L1ProfileSlmBankConflicts.txt: -------------------------------------------------------------------------------- 1 | -m "XVE_ACTIVE[%],XVE_STALL[%],XVE_THREADS_OCCUPANCY_ALL[%]" -y "Active, Stall and Occupancy (%)" -m "GPU_MEMORY_BYTE_READ[bytes],GPU_MEMORY_BYTE_WRITE[bytes]" -y "GPU Memory (bytes)" -m "SLM_BANK_CONFLICT_COUNT[events]" -y "SLM Bank Conflicts" -b "GPU_MEMORY_BYTE_READ[bytes],GPU_MEMORY_BYTE_WRITE[bytes]" -m "AvgGpuCoreFrequencyMHz[MHz],AvgGpuSliceFrequencyMHz[MHz]" -y "Frequency (MHz)" 2 | -------------------------------------------------------------------------------- /tools/unitrace/scripts/metrics/config/pvc/L1ProfileWrites.txt: -------------------------------------------------------------------------------- 1 | -m "XVE_ACTIVE[%],XVE_STALL[%],XVE_THREADS_OCCUPANCY_ALL[%]" -y "Active, Stall and Occupancy (%)" -m "GPU_MEMORY_BYTE_READ[bytes],GPU_MEMORY_BYTE_WRITE[bytes]" -y "GPU Memory (bytes)" -m "LOAD_STORE_CACHE_BYTE_WRITE[bytes],SLM_BYTE_WRITE[bytes]" -y "Load Store Cache Write(bytes)" -b "GPU_MEMORY_BYTE_READ[bytes],GPU_MEMORY_BYTE_WRITE[bytes]" -b "LOAD_STORE_CACHE_BYTE_WRITE[bytes],SLM_BYTE_WRITE[bytes]" -m "AvgGpuCoreFrequencyMHz[MHz],AvgGpuSliceFrequencyMHz[MHz]" -y "Frequency (MHz)" 2 | -------------------------------------------------------------------------------- /tools/unitrace/scripts/metrics/config/pvc/MemProfile.txt: -------------------------------------------------------------------------------- 1 | -m "XVE_ACTIVE[%],XVE_STALL[%],XVE_THREADS_OCCUPANCY_ALL[%]" -y "Active, Stall and Occupancy (%)" -m "GPU_MEMORY_BYTE_READ[bytes],GPU_MEMORY_BYTE_WRITE[bytes]" -y "GPU Memory (bytes)" -m "HOST_TO_GPUMEM_BYTE_READ[bytes],HOST_TO_GPUMEM_BYTE_WRITE[bytes]" -y "Host and GPU Traffic (bytes)" -m "SCALEUP_DATA_BYTE_RECEIVE[bytes],SCALEUP_DATA_BYTE_TRANSMIT[bytes]" -y "Scaleup Data Transfer (bytes)" -b "GPU_MEMORY_BYTE_READ[bytes],GPU_MEMORY_BYTE_WRITE[bytes]" -b "HOST_TO_GPUMEM_BYTE_READ[bytes],HOST_TO_GPUMEM_BYTE_WRITE[bytes]" -b "SCALEUP_DATA_BYTE_RECEIVE[bytes],SCALEUP_DATA_BYTE_TRANSMIT[bytes]" -m "AvgGpuCoreFrequencyMHz[MHz],AvgGpuSliceFrequencyMHz[MHz]" -y "Frequency (MHz)" 2 | -------------------------------------------------------------------------------- /tools/unitrace/scripts/roofline/device_configs/PVC_1tile.csv: -------------------------------------------------------------------------------- 1 | PlatformName,"PVC_1tile" 2 | FP16_GFLOPS,39000 3 | FP16_XMX_GFLOPS,314000 4 | BF16_XMX_GFLOPS,314000 5 | FP32_GFLOPS,32000 6 | FP64_GFLOPS,32000 7 | GPU_MEMORY_BW_in_GB_per_sec,1055 8 | L3_BW_in_GB_per_sec,2043 9 | -------------------------------------------------------------------------------- /tools/unitrace/src/collector_options.h: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | 7 | #ifndef PTI_TOOLS_UNITRACE_COLLECTOR_OPTIONS_ 8 | #define PTI_TOOLS_UNITRACE_COLLECTOR_OPTIONS_ 9 | 10 | struct CollectorOptions { 11 | bool device_timing = false; 12 | bool device_timeline = false; 13 | bool kernel_submission = false; 14 | bool host_timing = false; 15 | bool kernel_tracing = false; 16 | bool api_tracing = false; 17 | bool call_logging = false; 18 | bool need_tid = false; 19 | bool need_pid = false; 20 | bool verbose = false; 21 | bool demangle = false; 22 | bool kernels_per_tile = false; 23 | bool metric_query = false; 24 | bool metric_stream = false; 25 | bool stall_sampling = false; 26 | }; 27 | 28 | #endif //PTI_TOOLS_UNITRACE_COLLECTOR_OPTIONS_ 29 | -------------------------------------------------------------------------------- /tools/unitrace/src/opencl/cl_intel_ext.h: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | 7 | #ifndef PTI_TOOLS_UNITRACE_CL_INTEL_EXT_H_ 8 | #define PTI_TOOLS_UNITRACE_CL_INTEL_EXT_H_ 9 | 10 | enum cl_ext_api_id { 11 | ClExtApiStart = 4321, 12 | ClExtHostMemAllocINTEL = ClExtApiStart, 13 | ClExtDeviceMemAllocINTEL, 14 | ClExtSharedMemAllocINTEL, 15 | ClExtMemFreeINTEL, 16 | ClExtGetMemAllocInfoINTEL, 17 | ClExtSetKernelArgMemPointerINTEL, 18 | ClExtEnqueueMemcpyINTEL, 19 | ClExtGetDeviceGlobalVariablePointerINTEL, 20 | ClExtGetKernelSuggestedLocalWorkSizeINTEL, 21 | ClExtCreateBufferWithPropertiesINTEL, 22 | ClExtEnqueueMemsetINTEL, 23 | ClExtEnqueueMigrateMemINTEL, 24 | ClExtEnqueueMemAdviseINTEL, 25 | ClExtEnqueueMemFillINTEL, 26 | ClExtMemBlockingFreeINTEL, 27 | ClExtApiEnd 28 | }; 29 | 30 | static const char *cl_ext_api[] = { 31 | "clHostMemAllocINTEL", 32 | "clDeviceMemAllocINTEL", 33 | "clSharedMemAllocINTEL", 34 | "clMemFreeINTEL", 35 | "clGetMemAllocInfoINTEL", 36 | "clSetKernelArgMemPointerINTEL", 37 | "clEnqueueMemcpyINTEL", 38 | "clGetDeviceGlobalVariablePointerINTEL", 39 | "clGetKernelSuggestedLocalWorkSizeINTEL", 40 | "clCreateBufferWithPropertiesINTEL", 41 | "clEnqueueMemsetINTEL", 42 | "clEnqueueMigrateMemINTEL", 43 | "clEnqueueMemAdviseINTEL", 44 | "clEnqueueMemFillINTEL", 45 | "clMemBlockingFreeINTEL" 46 | }; 47 | 48 | #endif /* PTI_TOOLS_UNITRACE_CL_INTEL_EXT_H_ */ 49 | -------------------------------------------------------------------------------- /tools/unitrace/src/unievent.h: -------------------------------------------------------------------------------- 1 | 2 | //============================================================== 3 | // Copyright (C) Intel Corporation 4 | // 5 | // SPDX-License-Identifier: MIT 6 | // ============================================================= 7 | 8 | #ifndef PTI_TOOLS_UNITRACE_UNIEVENT_H 9 | #define PTI_TOOLS_UNITRACE_UNIEVENT_H 10 | 11 | enum EVENT_TYPE { 12 | EVENT_NULL = 0, 13 | EVENT_DURATION_START, 14 | EVENT_DURATION_END, 15 | EVENT_FLOW_SOURCE, 16 | EVENT_FLOW_SINK, 17 | EVENT_COMPLETE, 18 | EVENT_MARK, 19 | }; 20 | 21 | enum API_TYPE { 22 | API_TYPE_NONE, 23 | API_TYPE_MPI, 24 | API_TYPE_ITT, 25 | API_TYPE_CCL 26 | }; 27 | 28 | typedef struct MpiArgs_ { 29 | int src_location; 30 | int src_tag; 31 | int dst_location; 32 | int dst_tag; 33 | size_t src_size; 34 | size_t dst_size; 35 | int64_t mpi_counter; 36 | bool is_tagged; 37 | } MpiArgs; 38 | 39 | typedef struct IttArgs_ { 40 | size_t count = 0; 41 | bool isIndirectData = false; 42 | int type; 43 | const char* key; 44 | struct IttArgs_* next = nullptr; 45 | void* data[1]; 46 | } IttArgs; 47 | 48 | typedef struct HostEventRecord_ { 49 | uint64_t id_; 50 | uint64_t start_time_; 51 | uint64_t end_time_; 52 | char *name_ = nullptr; 53 | API_TRACING_ID api_id_; 54 | EVENT_TYPE type_; 55 | 56 | API_TYPE api_type_ = API_TYPE::API_TYPE_NONE; 57 | union{ 58 | MpiArgs mpi_args_; 59 | IttArgs itt_args_; 60 | }; 61 | } HostEventRecord; 62 | 63 | #endif // PTI_TOOLS_UNITRACE_UNIEVENT_H 64 | -------------------------------------------------------------------------------- /tools/unitrace/src/unimemory.h: -------------------------------------------------------------------------------- 1 | 2 | //============================================================== 3 | // Copyright (C) Intel Corporation 4 | // 5 | // SPDX-License-Identifier: MIT 6 | // ============================================================= 7 | 8 | #ifndef PTI_TOOLS_UNITRACE_UNIMEMORY_H 9 | #define PTI_TOOLS_UNITRACE_UNIMEMORY_H 10 | 11 | #include 12 | #include 13 | 14 | namespace UniMemory { 15 | void 16 | AbortIfOutOfMemory(void *ptr) { 17 | if (ptr == nullptr) { 18 | std::cerr << "Out of memory" << std::endl; 19 | std::abort(); 20 | } 21 | } 22 | 23 | void 24 | ExitIfOutOfMemory(void *ptr) { 25 | if (ptr == nullptr) { 26 | std::cerr << "Out of memory" << std::endl; 27 | std::_Exit(-1); 28 | } 29 | } 30 | } 31 | 32 | #endif // PTI_TOOLS_UNITRACE_UNIMEMORY_H 33 | -------------------------------------------------------------------------------- /tools/unitrace/src/version.h: -------------------------------------------------------------------------------- 1 | #ifndef PTI_TOOLS_UNITRACE_VERSION_H_ 2 | #define PTI_TOOLS_UNITRACE_VERSION_H_ 3 | 4 | #define UNITRACE_VERSION "2.2.2" 5 | 6 | std::string get_version(); 7 | 8 | #if !defined(_WIN32) && (defined(__gnu_linux__) || defined(__unix__)) 9 | #define LIB_UNITRACE_TOOL_NAME "libunitrace_tool.so" 10 | #define LIB_UNITRACE_MPI_NAME "libunitrace_mpi.so" 11 | #else /* !defined(_WIN32) && (defined(__gnu_linux__) || defined(__unix__)) */ 12 | #define LIB_UNITRACE_TOOL_NAME "unitrace_tool.dll" 13 | #define LIB_UNITRACE_MPI_NAME "unitrace_mpi.dll" 14 | #endif /* !defined(_WIN32) && (defined(__gnu_linux__) || defined(__unix__)) */ 15 | 16 | #endif /* PTI_TOOLS_UNITRACE_VERSION_H_ */ 17 | -------------------------------------------------------------------------------- /tools/unitrace/test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.22) 3 | 4 | if(WIN32) 5 | set(CMAKE_C_COMPILER "$ENV{CMPLR_ROOT}/bin/icx.exe") 6 | set(CMAKE_CXX_COMPILER "$ENV{CMPLR_ROOT}/bin/icx.exe") 7 | else() 8 | set(CMAKE_C_COMPILER "$ENV{CMPLR_ROOT}/bin/icx") 9 | set(CMAKE_CXX_COMPILER "$ENV{CMPLR_ROOT}/bin/icpx") 10 | endif() 11 | 12 | project(UnitraceTest C CXX) 13 | 14 | find_package(PythonInterp 3.9 REQUIRED) 15 | find_package(IntelSYCL REQUIRED) 16 | 17 | # Enable testing 18 | enable_testing() 19 | 20 | add_subdirectory(graph) 21 | add_subdirectory(cl_gemm) 22 | add_subdirectory(ze_gemm) 23 | add_subdirectory(dpc_gemm) 24 | add_subdirectory(grf) 25 | add_subdirectory(omp_gemm) 26 | -------------------------------------------------------------------------------- /tools/unitrace/test/cl_gemm/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(cl_gemm CXX) 2 | 3 | add_executable(cl_gemm main.cc) 4 | add_sycl_to_target(cl_gemm) 5 | 6 | target_include_directories(cl_gemm PRIVATE "${PROJECT_SOURCE_DIR}/../../../../utils") 7 | 8 | target_link_libraries(cl_gemm PRIVATE OpenCL) 9 | 10 | add_test(NAME cl_gemm COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/run_test.py ${CMAKE_SOURCE_DIR} cl_gemm GPU) 11 | -------------------------------------------------------------------------------- /tools/unitrace/test/cl_gemm/gold/linux/d.txt: -------------------------------------------------------------------------------- 1 | 2 | === Device Timing Summary === 3 | 4 | Total Execution Time (ns): 89614076 5 | Total Device Time for CL GPU backend (ns): 46625120 6 | 7 | == CL GPU Backend == 8 | 9 | Kernel, Calls, Time (ns), Time (%), Average (ns), Min (ns), Max (ns) 10 | GEMM, 4, 44280640, 94.97, 11070160, 9633280, 12989120 11 | clEnqueueWriteBuffer, 8, 1406560, 3.02, 175820, 153040, 203600 12 | clEnqueueReadBuffer, 4, 937920, 2.01, 234480, 222400, 240080 13 | 14 | 15 | -------------------------------------------------------------------------------- /tools/unitrace/test/cl_gemm/gold/windows/d.txt: -------------------------------------------------------------------------------- 1 | 2 | === Device Timing Summary === 3 | 4 | Total Execution Time (ns): 1169230000 5 | Total Device Time for CL GPU backend (ns): 91672091 6 | 7 | == CL GPU Backend == 8 | 9 | Kernel, Calls, Time (ns), Time (%), Average (ns), Min (ns), Max (ns) 10 | GEMM, 4, 84431196, 92.10, 21107799, 20856041, 21550937 11 | clEnqueueReadBuffer, 4, 5139701, 5.61, 1284925, 1006200, 1493901 12 | clEnqueueWriteBuffer, 8, 2101194, 2.29, 262649, 199687, 366406 13 | 14 | 15 | -------------------------------------------------------------------------------- /tools/unitrace/test/dpc_gemm/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(dpc_gemm CXX) 2 | 3 | add_executable(dpc_gemm main.cc) 4 | add_sycl_to_target(dpc_gemm) 5 | 6 | target_include_directories(dpc_gemm PRIVATE "${PROJECT_SOURCE_DIR}/../../../../utils") 7 | 8 | add_test(NAME dpc_gemm COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/run_test.py ${CMAKE_SOURCE_DIR} dpc_gemm GPU) 9 | -------------------------------------------------------------------------------- /tools/unitrace/test/dpc_gemm/gold/windows/d.txt: -------------------------------------------------------------------------------- 1 | 2 | === Device Timing Summary === 3 | 4 | Total Execution Time (ns): 5917626800 5 | Total Device Time for L0 backend (ns): 4676410206 6 | 7 | == L0 Backend == 8 | 9 | Kernel, Calls, Time (ns), Time (%), Average (ns), Min (ns), Max (ns) 10 | "_ZTSZZL11RunAndCheckN4sycl3_V15queueERKSt6vectorIfSaIfEES6_RS4_jfENKUlRNS0_7handlerEE_clES9_E6__GEMM", 4, 4675211665, 99.974365, 1168802916, 1073651562, 1219552604 11 | "zeCommandListAppendMemoryCopy(H2M)", 4, 1198541, 0.025630, 299635, 281875, 337500 12 | 13 | 14 | === Kernel Properties === 15 | 16 | Kernel, Compiled, SIMD, Number of Arguments, SLM Per Work Group, Private Memory Per Thread, Spill Memory Per Thread, Register File Size Per Thread 17 | "_ZTSZZL11RunAndCheckN4sycl3_V15queueERKSt6vectorIfSaIfEES6_RS4_jfENKUlRNS0_7handlerEE_clES9_E6__GEMM", JIT, 8, 13, 0, 8320, 11840, 128 18 | 19 | 20 | -------------------------------------------------------------------------------- /tools/unitrace/test/graph/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(graph CXX) 2 | 3 | add_executable(graph graph.cpp) 4 | add_sycl_to_target(graph) 5 | 6 | add_test(NAME graph COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/run_test.py ${CMAKE_SOURCE_DIR} graph) 7 | -------------------------------------------------------------------------------- /tools/unitrace/test/grf/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(grf CXX) 2 | 3 | add_executable(grf grf.cpp) 4 | add_sycl_to_target(grf) 5 | 6 | add_test(NAME grf COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/run_test.py ${CMAKE_SOURCE_DIR} grf) 7 | -------------------------------------------------------------------------------- /tools/unitrace/test/omp_gemm/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if(WIN32) 2 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qiopenmp -Qopenmp-targets=spir64") 3 | set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /Qopenmp /Qopenmp-targets=spir64") 4 | else() 5 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fiopenmp -fopenmp-targets=spir64") 6 | endif() 7 | 8 | project(omp_gemm CXX) 9 | 10 | add_executable(omp_gemm main.cc) 11 | 12 | if(WIN32) 13 | target_link_libraries(omp_gemm PRIVATE "$ENV{ONEAPI_ROOT}/compiler/latest/lib/libiomp5md.lib") 14 | endif() 15 | 16 | add_test(NAME omp_gemm COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/run_test.py ${CMAKE_SOURCE_DIR} omp_gemm GPU) -------------------------------------------------------------------------------- /tools/unitrace/test/omp_gemm/gold/linux/d.txt: -------------------------------------------------------------------------------- 1 | 2 | === Device Timing Summary === 3 | 4 | Total Execution Time (ns): 187924698 5 | Total Device Time for L0 backend (ns): 22891200 6 | 7 | == L0 Backend == 8 | 9 | Kernel, Calls, Time (ns), Time (%), Average (ns), Min (ns), Max (ns) 10 | "__omp_offloading_802_69e222e__Z12ComputeOnGPURKSt6vectorIfSaIfEES3_RS1_jf_l71", 4, 17027040, 74.382469, 4256760, 4147040, 4380320 11 | "zeCommandListAppendMemoryCopy(M2D)", 16, 5420320, 23.678619, 338770, 80, 764720 12 | "zeCommandListAppendMemoryCopy(D2M)", 9, 443840, 1.938911, 49315, 1600, 111120 13 | 14 | 15 | === Kernel Properties === 16 | 17 | Kernel, Compiled, SIMD, Number of Arguments, SLM Per Work Group, Private Memory Per Thread, Spill Memory Per Thread, Register File Size Per Thread 18 | "__omp_offloading_802_69e222e__Z12ComputeOnGPURKSt6vectorIfSaIfEES3_RS1_jf_l71", JIT, 32, 10, 0, 0, 0, 128 19 | 20 | 21 | -------------------------------------------------------------------------------- /tools/unitrace/test/omp_gemm/gold/windows/d.txt: -------------------------------------------------------------------------------- 1 | 2 | === Device Timing Summary === 3 | 4 | Total Execution Time (ns): 1918571400 5 | Total Device Time for L0 backend (ns): 722573953 6 | 7 | == L0 Backend == 8 | 9 | Kernel, Calls, Time (ns), Time (%), Average (ns), Min (ns), Max (ns) 10 | "__omp_offloading_e4ea70a0_ae24e1b9__Z12ComputeOnGPURKSt6vectorIfSaIfEES3_RS1_jf_l59", 4, 722469582, 99.985558, 180617395, 175209375, 188006250 11 | "zeCommandListAppendMemoryCopy(M2D)", 8, 64894, 0.008981, 8111, 4479, 20104 12 | "zeCommandListAppendMemoryCopy(D2M)", 5, 39477, 0.005463, 7895, 4270, 13958 13 | 14 | 15 | === Kernel Properties === 16 | 17 | Kernel, Compiled, SIMD, Number of Arguments, SLM Per Work Group, Private Memory Per Thread, Spill Memory Per Thread, Register File Size Per Thread 18 | "__omp_offloading_e4ea70a0_ae24e1b9__Z12ComputeOnGPURKSt6vectorIfSaIfEES3_RS1_jf_l59", JIT, 16, 10, 0, 0, 0, 128 19 | 20 | 21 | -------------------------------------------------------------------------------- /tools/unitrace/test/scenarios.txt: -------------------------------------------------------------------------------- 1 | -c 2 | -h 3 | -d 4 | -t 5 | -s 6 | --chrome-call-logging 7 | --chrome-device-logging 8 | --chrome-kernel-logging 9 | --chrome-sycl-logging 10 | --chrome-itt-logging -------------------------------------------------------------------------------- /tools/unitrace/test/ze_gemm/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include("${PROJECT_SOURCE_DIR}/../../../build_utils/CMakeLists.txt") 2 | 3 | project(ze_gemm CXX) 4 | 5 | add_executable(ze_gemm main.cc) 6 | 7 | target_include_directories(ze_gemm PRIVATE "${PROJECT_SOURCE_DIR}/../../../../utils") 8 | 9 | if(WIN32) 10 | FindL0Library(ze_gemm) 11 | FindL0Headers(ze_gemm) 12 | else() 13 | target_link_libraries(ze_gemm PRIVATE ze_loader) 14 | endif() 15 | 16 | add_custom_command(TARGET ze_gemm PRE_BUILD 17 | COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PROJECT_SOURCE_DIR}/gemm.spv ${CMAKE_BINARY_DIR}/ze_gemm/gemm.spv) 18 | 19 | add_test(NAME ze_gemm COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/run_test.py ${CMAKE_SOURCE_DIR} ze_gemm) 20 | -------------------------------------------------------------------------------- /tools/unitrace/test/ze_gemm/gemm.spv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/tools/unitrace/test/ze_gemm/gemm.spv -------------------------------------------------------------------------------- /tools/unitrace/test/ze_gemm/gold/linux/d.txt: -------------------------------------------------------------------------------- 1 | 2 | === Device Timing Summary === 3 | 4 | Total Execution Time (ns): 179716136 5 | Total Device Time for L0 backend (ns): 185907840 6 | 7 | == L0 Backend == 8 | 9 | Kernel, Calls, Time (ns), Time (%), Average (ns), Min (ns), Max (ns) 10 | "zeCommandListAppendBarrier", 8, 90681120, 48.777458, 11335140, 773920, 22420800 11 | "GEMM", 4, 87518240, 47.076141, 21879560, 20304800, 22409280 12 | "zeCommandListAppendMemoryCopy(M2D)", 8, 6118720, 3.291265, 764840, 756160, 779840 13 | "zeCommandListAppendMemoryCopy(D2M)", 4, 1589760, 0.855133, 397440, 334240, 419840 14 | 15 | 16 | === Kernel Properties === 17 | 18 | Kernel, Compiled, SIMD, Number of Arguments, SLM Per Work Group, Private Memory Per Thread, Spill Memory Per Thread, Register File Size Per Thread 19 | "GEMM", JIT, 32, 4, 0, 0, 0, 128 20 | 21 | 22 | -------------------------------------------------------------------------------- /tools/unitrace/test/ze_gemm/gold/windows/d.txt: -------------------------------------------------------------------------------- 1 | 2 | === Device Timing Summary === 3 | 4 | Total Execution Time (ns): 992160800 5 | Total Device Time for L0 backend (ns): 83297594 6 | 7 | == L0 Backend == 8 | 9 | Kernel, Calls, Time (ns), Time (%), Average (ns), Min (ns), Max (ns) 10 | "GEMM", 4, 79457185, 95.389534, 19864296, 19411770, 20667604 11 | "zeCommandListAppendMemoryCopy(M2D)", 8, 2482081, 2.979775, 310260, 226875, 430208 12 | "zeCommandListAppendMemoryCopy(D2M)", 4, 1353853, 1.625321, 338463, 250833, 473750 13 | "zeCommandListAppendBarrier", 8, 4475, 0.005372, 559, 520, 625 14 | 15 | 16 | === Kernel Properties === 17 | 18 | Kernel, Compiled, SIMD, Number of Arguments, SLM Per Work Group, Private Memory Per Thread, Spill Memory Per Thread, Register File Size Per Thread 19 | "GEMM", JIT, 32, 4, 0, 0, 0, 128 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /tools/utils/correlator.cc: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | 7 | #include "correlator.h" 8 | 9 | thread_local uint64_t Correlator::kernel_id_ = 0; -------------------------------------------------------------------------------- /tools/ze_tracer/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include("../../build_utils/CMakeLists.txt") 2 | SetRequiredCMakeVersion() 3 | cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) 4 | 5 | project(PTI_Samples_L0_Tracer CXX) 6 | SetCompilerFlags() 7 | SetBuildType() 8 | 9 | # Tool Library 10 | 11 | add_library(zet_tracer SHARED 12 | "${PROJECT_SOURCE_DIR}/../../loader/init.cc" 13 | "${PROJECT_SOURCE_DIR}/../utils/correlator.cc" 14 | tool.cc) 15 | target_include_directories(zet_tracer 16 | PRIVATE "${PROJECT_SOURCE_DIR}" 17 | PRIVATE "${PROJECT_SOURCE_DIR}/../utils" 18 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 19 | target_compile_definitions(zet_tracer PUBLIC PTI_LEVEL_ZERO=1) 20 | if(CMAKE_INCLUDE_PATH) 21 | target_include_directories(zet_tracer 22 | PUBLIC "${CMAKE_INCLUDE_PATH}") 23 | endif() 24 | 25 | FindL0Library(zet_tracer) 26 | FindL0Headers(zet_tracer) 27 | 28 | FindL0HeadersPath(zet_tracer "${PROJECT_SOURCE_DIR}/gen_tracing_callbacks.py") 29 | 30 | # Loader 31 | 32 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTOOL_NAME=zet_tracer") 33 | add_executable(ze_tracer "${PROJECT_SOURCE_DIR}/../../loader/loader.cc") 34 | target_include_directories(ze_tracer 35 | PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") 36 | if(UNIX) 37 | target_link_libraries(ze_tracer 38 | dl) 39 | endif() 40 | 41 | # Installation 42 | 43 | install(TARGETS ze_tracer zet_tracer DESTINATION bin) -------------------------------------------------------------------------------- /utils/demangle.h: -------------------------------------------------------------------------------- 1 | #ifndef PTI_UTILS_DEMANGLE_H_ 2 | #define PTI_UTILS_DEMANGLE_H_ 3 | 4 | #if __has_include() 5 | #define HAVE_CXXABI 1 6 | #include 7 | #include 8 | #else 9 | #define HAVE_CXXABI 0 10 | #endif 11 | #include 12 | 13 | #include "pti_assert.h" 14 | 15 | namespace utils { 16 | 17 | static inline std::string Demangle(const char* name) { 18 | PTI_ASSERT(name != nullptr); 19 | 20 | #if HAVE_CXXABI 21 | int status = 0; 22 | char* demangled = abi::__cxa_demangle(name, nullptr, 0, &status); 23 | if (status != 0) { 24 | return name; 25 | } 26 | 27 | constexpr const char* const prefix_to_skip = "typeinfo name for "; 28 | const size_t prefix_to_skip_len = strlen(prefix_to_skip); 29 | const size_t shift = 30 | (std::strncmp(demangled, prefix_to_skip, prefix_to_skip_len) == 0) ? 31 | prefix_to_skip_len : 0; 32 | 33 | std::string result(demangled + shift); 34 | free(demangled); 35 | return result; 36 | #else 37 | return name; 38 | #endif 39 | } 40 | 41 | } // namespace utils 42 | 43 | #undef HAVE_CXXABI 44 | 45 | #endif // PTI_UTILS_DEMANGLE_H_ -------------------------------------------------------------------------------- /utils/gpu_elf_parser/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(PTI_DEBUG_INFO_PARSER_BASE_DIR ${CMAKE_CURRENT_LIST_DIR} CACHE INTERNAL "") 2 | 3 | include("${CMAKE_CURRENT_LIST_DIR}/../../build_utils/CMakeLists.txt") 4 | SetRequiredCMakeVersion() 5 | cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) 6 | 7 | project(PTI_debug_info_parser CXX) 8 | SetCompilerFlags() 9 | SetBuildType() 10 | 11 | macro(FindPtiElfParserHeaders TARGET) 12 | target_include_directories(${TARGET} 13 | PUBLIC "${PTI_DEBUG_INFO_PARSER_BASE_DIR}/include" 14 | PUBLIC "${PTI_CMAKE_MACRO_DIR}/../utils") # leb128.hpp, pti_assert.h 15 | endmacro() 16 | 17 | add_library(debug_info_parser STATIC 18 | ${PTI_DEBUG_INFO_PARSER_BASE_DIR}/src/elf_parser.cpp 19 | ${PTI_DEBUG_INFO_PARSER_BASE_DIR}/src/section_debug_line.cpp 20 | ${PTI_DEBUG_INFO_PARSER_BASE_DIR}/src/section_debug_info.cpp 21 | ${PTI_DEBUG_INFO_PARSER_BASE_DIR}/src/section_debug_abbrev.cpp 22 | ${PTI_DEBUG_INFO_PARSER_BASE_DIR}/src/dwarf_state_machine.cpp 23 | ) 24 | 25 | FindPtiElfParserHeaders(debug_info_parser) 26 | set_property(TARGET debug_info_parser PROPERTY POSITION_INDEPENDENT_CODE ON) 27 | 28 | if(CMAKE_INCLUDE_PATH) 29 | target_include_directories(debug_info_parser 30 | PUBLIC "${CMAKE_INCLUDE_PATH}") 31 | endif() 32 | -------------------------------------------------------------------------------- /utils/gpu_elf_parser/include/elf_parser_mapping.h: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | 7 | #ifndef PTI_ELF_PARSER_MAPPING_H_ 8 | #define PTI_ELF_PARSER_MAPPING_H_ 9 | 10 | #include 11 | 12 | #ifdef __cplusplus 13 | extern "C" { 14 | #endif // __cplusplus 15 | 16 | struct SourceMapping { 17 | uint32_t file_id; 18 | const char* file_path; // pointer to the file path in the original data 19 | const char* file_name; // pointer to the file name in the original data 20 | uint64_t address; // address in the binary in 64-bit format 21 | uint32_t line; 22 | uint32_t column; 23 | }; 24 | 25 | #ifdef __cplusplus 26 | } // extern "C" 27 | #endif // __cplusplus 28 | 29 | #endif // PTI_ELF_PARSER_MAPPING_H_ 30 | -------------------------------------------------------------------------------- /utils/gpu_elf_parser/src/section_debug_abbrev.hpp: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | 7 | #ifndef PTI_SECTION_DEBUG_ABBREV_H_ 8 | #define PTI_SECTION_DEBUG_ABBREV_H_ 9 | 10 | #include "elf_parser_def.hpp" 11 | 12 | namespace elf_parser { 13 | 14 | class DebugAbbrevParser { 15 | public: 16 | DebugAbbrevParser(const uint8_t* data, uint32_t size); 17 | 18 | inline bool IsValid() const { return (data_ != nullptr && size_ != 0) ? true : false; } 19 | 20 | DwarfCompUnitMap GetCompUnitMap() const; 21 | 22 | private: 23 | const uint8_t* data_; 24 | uint32_t size_; 25 | }; 26 | 27 | } // namespace elf_parser 28 | 29 | #endif // PTI_SECTION_DEBUG_ABBREV_H_ 30 | -------------------------------------------------------------------------------- /utils/gpu_elf_parser/src/section_debug_info.hpp: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | 7 | /** 8 | * @file section_debug_info.hpp 9 | * @brief Contains the definition of the DebugInfoParser class, which is responsible for 10 | * parsing the .debug_info section in an ELF file. It also provides the compilation directory. 11 | */ 12 | 13 | #ifndef PTI_SECTION_DEBUG_INFO_H_ 14 | #define PTI_SECTION_DEBUG_INFO_H_ 15 | 16 | #include "elf_parser_def.hpp" 17 | 18 | namespace elf_parser { 19 | 20 | class DebugInfoParser { // parses one unit of debug_info section 21 | public: 22 | DebugInfoParser(const uint8_t* data, uint32_t size); 23 | 24 | inline bool IsValid() const { return is_valid_; } 25 | 26 | inline uint32_t GetBitness() const { return is_valid_ ? bitness_ : -1; } 27 | 28 | inline uint32_t GetUnitLength() const { return is_valid_ ? unit_length_from_beginning_ : -1; } 29 | 30 | inline uint64_t GetDebugAbbrevOffset() const { return is_valid_ ? debug_abbrev_offset_ : -1; } 31 | 32 | const char* GetCompDir(const DwarfCompUnitMap& comp_unit_map); 33 | 34 | private: 35 | template 36 | void ProcessDwarf4Header(); 37 | template 38 | void ProcessDwarf5Header(); 39 | 40 | const uint8_t* data_; 41 | uint32_t size_; 42 | uint32_t bitness_ = 32; 43 | uint32_t version_ = 0; 44 | uint32_t unit_length_from_beginning_ = 0; 45 | uint64_t debug_abbrev_offset_ = 0; 46 | uint8_t address_size_ = 0; 47 | uint64_t data_offset_ = 0; 48 | bool is_valid_ = false; 49 | }; 50 | 51 | } // namespace elf_parser 52 | 53 | #endif // PTI_SECTION_DEBUG_INFO_H_ 54 | -------------------------------------------------------------------------------- /utils/gpu_elf_parser/src/section_debug_line.hpp: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | 7 | /** 8 | * @file section_debug_line.hpp 9 | * @brief Contains the definition of the DwarfDebugLineParser class, which is responsible for 10 | * parsing the .debug_line section in an ELF file. 11 | */ 12 | 13 | #ifndef PTI_SECTION_DEBUG_LINE_H_ 14 | #define PTI_SECTION_DEBUG_LINE_H_ 15 | 16 | #include 17 | 18 | #include "elf_parser_def.hpp" 19 | #include "elf_parser_mapping.h" 20 | 21 | namespace elf_parser { 22 | 23 | class DwarfDebugLineParser { 24 | public: 25 | DwarfDebugLineParser(const uint8_t* data, uint64_t offset, uint64_t size, uint32_t address_width); 26 | 27 | inline bool IsValid() const { return is_valid_; } 28 | 29 | inline uint32_t GetBitness() { return is_valid_ ? header_.bitness : -1; } 30 | 31 | inline uint64_t GetUnitLength() { return is_valid_ ? header_.unit_length_from_beginning : -1; } 32 | 33 | std::vector GetMapping(const char* comp_dir); 34 | 35 | private: 36 | std::vector GetSourceFiles(const char* comp_dir); 37 | 38 | template 39 | void ProcessHeader(); 40 | 41 | const uint8_t* data_; 42 | const uint64_t size_; 43 | const uint64_t offset_; 44 | const uint32_t address_width_; 45 | DwarfLineNumberProgramHeader header_ = {}; 46 | bool is_valid_ = false; 47 | bool is_header_processed_ = false; 48 | }; 49 | 50 | } // namespace elf_parser 51 | 52 | #endif // PTI_SECTION_DEBUG_LINE_H_ 53 | -------------------------------------------------------------------------------- /utils/gtpin_utils.h: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | 7 | #ifndef PTI_UTILS_GTPIN_UTILS_H_ 8 | #define PTI_UTILS_GTPIN_UTILS_H_ 9 | 10 | #include 11 | #include 12 | 13 | #include "pti_assert.h" 14 | 15 | namespace utils { 16 | namespace gtpin { 17 | 18 | inline iga_gen_t GetArch(GED_MODEL arch) { 19 | switch(arch) { 20 | case GED_MODEL_8: 21 | return IGA_GEN8; 22 | case GED_MODEL_9: 23 | return IGA_GEN9; 24 | case GED_MODEL_10: 25 | return IGA_GEN10; 26 | case GED_MODEL_11: 27 | return IGA_GEN11; 28 | case GED_MODEL_TGL: 29 | return IGA_GEN12p1; 30 | default: 31 | break; 32 | } 33 | return IGA_GEN_INVALID; 34 | } 35 | 36 | inline void KnobAddBool(const char* name, bool value) { 37 | GTPinKnob knob = KNOB_FindArg(name); 38 | PTI_ASSERT(knob != nullptr); 39 | KnobValue knob_value; 40 | knob_value.value._bool = value; 41 | knob_value.type = KNOB_TYPE::KNOB_TYPE_BOOL; 42 | KNOB_STATUS status = KNOB_AddValue(knob, &knob_value); 43 | PTI_ASSERT(status == KNOB_STATUS_SUCCESS); 44 | } 45 | 46 | inline void KnobAddInt(const char* name, int value) { 47 | GTPinKnob knob = KNOB_FindArg(name); 48 | PTI_ASSERT(knob != nullptr); 49 | KnobValue knob_value; 50 | knob_value.value._bool = value; 51 | knob_value.type = KNOB_TYPE::KNOB_TYPE_INTEGER; 52 | KNOB_STATUS status = KNOB_AddValue(knob, &knob_value); 53 | PTI_ASSERT(status == KNOB_STATUS_SUCCESS); 54 | } 55 | 56 | } // namespace gtpin 57 | } // namespace utils 58 | 59 | #endif // PTI_UTILS_GTPIN_UTILS_H_ -------------------------------------------------------------------------------- /utils/gtpin_utils/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(PTI_GTPIN_TOOL_BASE_DIR ${CMAKE_CURRENT_LIST_DIR} CACHE INTERNAL "") 2 | 3 | include("${CMAKE_CURRENT_LIST_DIR}/../../build_utils/CMakeLists.txt") 4 | SetRequiredCMakeVersion() 5 | cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) 6 | 7 | macro(FindGTPinToolUtilsHeaders TARGET) 8 | target_include_directories(${TARGET} 9 | PUBLIC "${PTI_GTPIN_TOOL_BASE_DIR}/include") 10 | endmacro() 11 | 12 | project(PTI_GTPin_tool_utils CXX) 13 | SetCompilerFlags() 14 | SetBuildType() 15 | 16 | file(GLOB CAPSULE_MACROS "${PTI_GTPIN_TOOL_BASE_DIR}/src/macro/*.cpp") 17 | 18 | add_library(gtpin_tool_utils STATIC 19 | "${PTI_GTPIN_TOOL_BASE_DIR}/src/profiler.cpp" 20 | "${PTI_GTPIN_TOOL_BASE_DIR}/src/control.cpp" 21 | "${PTI_GTPIN_TOOL_BASE_DIR}/src/tool.cpp" 22 | "${PTI_GTPIN_TOOL_BASE_DIR}/src/capsule.cpp" 23 | ${CAPSULE_MACROS} 24 | "${PTI_GTPIN_TOOL_BASE_DIR}/src/results.cpp" 25 | "${PTI_GTPIN_TOOL_BASE_DIR}/src/tool_factory.cpp" 26 | "${PTI_GTPIN_TOOL_BASE_DIR}/src/writer.cpp" 27 | ) 28 | FindGTPinToolUtilsHeaders(gtpin_tool_utils) 29 | set_property(TARGET gtpin_tool_utils PROPERTY POSITION_INDEPENDENT_CODE ON) 30 | target_include_directories(gtpin_tool_utils 31 | PRIVATE "${PTI_GTPIN_TOOL_BASE_DIR}/../../sdk/src/utils") 32 | if(CMAKE_INCLUDE_PATH) 33 | target_include_directories(gtpin_tool_utils 34 | PUBLIC "${CMAKE_INCLUDE_PATH}") 35 | endif() 36 | 37 | FindGTPinLibrary(gtpin_tool_utils) 38 | FindGTPinHeaders(gtpin_tool_utils) 39 | FindGTPinUtils(gtpin_tool_utils) 40 | -------------------------------------------------------------------------------- /utils/gtpin_utils/doc/capsule.puml: -------------------------------------------------------------------------------- 1 | @startuml 2 | 3 | title Capsule class diagram 4 | 5 | ' class Capsule { 6 | ' -IGtKernelInstrument m_instrumentor 7 | ' -GtProfileArray m_profileArray 8 | ' -size_t m_recordIndex 9 | ' ---- 10 | ' +Capsule(instrumentor profileArray recordIndex) 11 | ' +GetProcedure(): proc 12 | ' +AppendProcedure() 13 | ' +operator+() 14 | ' -- Analyses -- 15 | ' +AddInstructionCounterAnalysis() 16 | ' +AddSimdActiveAnalysis() 17 | ' +AddCacheLineAlignedAnalysis() 18 | ' +AddCacheLineCounterAnalysis() 19 | ' +AddStrideDistributionAnanlysis() 20 | ' -- Utils -- 21 | ' +CalcBaseAddr() 22 | ' +IsCacheLineAligned() 23 | ' +ComputeSimdMask() 24 | ' +CounterAdd() 25 | ' +CounterInc() 26 | 27 | ' -- Other -- 28 | 29 | ' } 30 | 31 | ' class CapsuleState { 32 | ' -GtGenProcedure m_proc 33 | ' -IGtKernelInstrument m_instrumentor 34 | ' -GtProfileArray m_profileArray 35 | ' -size_t m_recordIndex 36 | ' -size_t m_numTiles 37 | ' -GtReg m_baseAddrReg 38 | ' -GtReg m_simdMaskReg 39 | ' -GtReg m_tempAddrReg 40 | ' -GtReg m_tempData64Reg 41 | ' -GtReg m_tempMsgData64Reg 42 | ' -IGtVregFactory m_vregs 43 | ' -IGtInsFactory m_insF 44 | ' +AppendProcedure() 45 | ' +GetProcedure(): proc 46 | ' +Getters() 47 | ' +Setters() 48 | ' } 49 | 50 | @enduml 51 | -------------------------------------------------------------------------------- /utils/gtpin_utils/doc/interfaces.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/utils/gtpin_utils/doc/interfaces.png -------------------------------------------------------------------------------- /utils/gtpin_utils/doc/results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/utils/gtpin_utils/doc/results.png -------------------------------------------------------------------------------- /utils/gtpin_utils/doc/writer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/pti-gpu/0f5841470aec005b5ba461fc7f31cc9b059b5d5e/utils/gtpin_utils/doc/writer.png -------------------------------------------------------------------------------- /utils/gtpin_utils/src/tool_factory.cpp: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | 7 | /** 8 | * @file tool_factory.cpp 9 | * @brief Contains the implementation of the ToolFactory class. 10 | * 11 | * This file provides the implementation of the ToolFactory class, which is responsible for creating 12 | * tools and managing their lifecycle. It also includes the definition of the ToolFactory 13 | * constructor and the GetControl method. 14 | */ 15 | 16 | #include "tool_factory.hpp" 17 | 18 | using namespace gtpin_prof; 19 | 20 | ToolFactory::ToolFactory(const ControlBaseSPtr control) : m_control(control) { 21 | PTI_ASSERT(control != nullptr); 22 | } 23 | 24 | const ControlBaseSPtr ToolFactory::GetControl() { return m_control; } 25 | -------------------------------------------------------------------------------- /utils/leb128.h: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | 7 | #ifndef PTI_UTILS_LEB128_H_ 8 | #define PTI_UTILS_LEB128_H_ 9 | 10 | #include 11 | 12 | namespace utils { 13 | namespace leb128 { 14 | 15 | inline const uint8_t* Decode32(const uint8_t* ptr, uint32_t& value, 16 | bool& done) { 17 | uint8_t byte = 0; 18 | uint8_t count = 0; 19 | uint8_t shift = 0; 20 | 21 | value = 0; 22 | done = false; 23 | 24 | while (count < sizeof(uint32_t)) { 25 | byte = *ptr; 26 | value |= ((byte & 0x7F) << shift); 27 | shift += 7; 28 | 29 | ++ptr; 30 | ++count; 31 | 32 | if ((byte & 0x80) == 0) { 33 | done = true; 34 | break; 35 | } 36 | } 37 | 38 | return ptr; 39 | } 40 | 41 | inline const uint8_t* Decode32(const uint8_t* ptr, int32_t& value, 42 | bool& done) { 43 | uint8_t byte = 0; 44 | uint8_t count = 0; 45 | uint8_t shift = 0; 46 | 47 | value = 0; 48 | done = false; 49 | 50 | while (count < sizeof(int32_t)) { 51 | byte = *ptr; 52 | value |= ((byte & 0x7F) << shift); 53 | shift += 7; 54 | 55 | ++ptr; 56 | ++count; 57 | 58 | if ((byte & 0x80) == 0) { 59 | done = true; 60 | break; 61 | } 62 | } 63 | 64 | if ((shift < 8 * sizeof(int32_t)) && ((byte & 0x40) > 0)) { 65 | value |= (~0u << shift); 66 | } 67 | 68 | return ptr; 69 | } 70 | 71 | } // namespace leb128 72 | } // namespace utils 73 | 74 | #endif // PTI_UTILS_LEB128_H_ -------------------------------------------------------------------------------- /utils/pti_assert.h: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | 7 | #ifndef PTI_UTILS_PTI_ASSERT_H_ 8 | #define PTI_UTILS_PTI_ASSERT_H_ 9 | 10 | #ifdef NDEBUG 11 | #undef NDEBUG 12 | #include 13 | #define NDEBUG 14 | #else 15 | #include 16 | #endif 17 | 18 | #define PTI_ASSERT(X) assert(X) 19 | 20 | #endif // PTI_UTILS_PTI_ASSERT_H_ -------------------------------------------------------------------------------- /utils/trace_guard.cc: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | 7 | #include "trace_guard.h" 8 | 9 | thread_local int TraceGuard::inactive_count_ = 0; -------------------------------------------------------------------------------- /utils/trace_guard.h: -------------------------------------------------------------------------------- 1 | //============================================================== 2 | // Copyright (C) Intel Corporation 3 | // 4 | // SPDX-License-Identifier: MIT 5 | // ============================================================= 6 | 7 | #ifndef PTI_TOOLS_CL_TRACER_TRACE_GUARD_H_ 8 | #define PTI_TOOLS_CL_TRACER_TRACE_GUARD_H_ 9 | 10 | #include "pti_assert.h" 11 | 12 | class TraceGuard { 13 | public: 14 | TraceGuard() { 15 | ++inactive_count_; 16 | } 17 | 18 | TraceGuard(const TraceGuard& that) = delete; 19 | 20 | ~TraceGuard() { 21 | PTI_ASSERT(inactive_count_ > 0); 22 | --inactive_count_; 23 | } 24 | 25 | static bool Inactive() { 26 | return inactive_count_ > 0; 27 | } 28 | 29 | private: 30 | static thread_local int inactive_count_; 31 | }; 32 | 33 | #endif // PTI_TOOLS_CL_TRACER_TRACE_GUARD_H_ 34 | --------------------------------------------------------------------------------