├── .cargo └── config.toml ├── .devcontainer ├── Dockerfile ├── Dockerfile-common ├── Dockerfile-el8_8 ├── Dockerfile-xgboost └── devcontainer.json ├── .gitattributes ├── .github └── workflows │ ├── pr.yml │ └── rust.yml ├── .gitignore ├── .gitmodules ├── ARCHITECTURE.md ├── Cargo.lock ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── TROUBLESHOOTING.md ├── atiadlxx-sys ├── Cargo.toml ├── README ├── build.rs ├── include │ ├── adapter.h │ ├── adl.h │ ├── adl_defines.h │ ├── adl_sdk.h │ ├── adl_structures.h │ ├── display.h │ ├── overdrive5.h │ ├── overdrive6.h │ ├── overdrive8.h │ ├── overdriveN.h │ └── wrapper.hpp ├── lib │ ├── atiadlxx.def │ └── atiadlxx.lib └── src │ ├── adl.rs │ └── lib.rs ├── comgr ├── Cargo.toml ├── README └── src │ ├── amd_comgr.rs │ ├── amd_comgr_3.rs │ ├── comgr.rs │ ├── double_wave32_on_wave64.ll │ ├── lib.rs │ ├── linux.ll │ ├── wave32.ll │ ├── wave32_on_wave64.ll │ └── windows.ll ├── cuda_base ├── Cargo.toml ├── README ├── build │ └── wrapper.h └── src │ ├── cuda.rs │ └── lib.rs ├── cuda_types ├── Cargo.toml └── src │ └── lib.rs ├── detours-sys ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── build.rs ├── build │ └── wrapper.h └── src │ ├── bundled_bindings.rs │ └── lib.rs ├── ext ├── detours │ ├── .github │ │ ├── ISSUE_TEMPLATE │ │ │ ├── bug-report.md │ │ │ └── question.md │ │ ├── PULL_REQUEST_TEMPLATE │ │ │ └── pull_request_template.md │ │ ├── codeql │ │ │ └── codeql-config.yml │ │ └── workflows │ │ │ └── main.yml │ ├── .gitignore │ ├── CREDITS.TXT │ ├── LICENSE.md │ ├── Makefile │ ├── README.md │ ├── samples │ │ ├── Makefile │ │ ├── README.TXT │ │ ├── comeasy │ │ │ ├── Makefile │ │ │ ├── comeasy.cpp │ │ │ ├── wrotei.cpp │ │ │ └── wrotei.rc │ │ ├── commem │ │ │ ├── Makefile │ │ │ └── commem.cpp │ │ ├── common.mak │ │ ├── cping │ │ │ ├── Makefile │ │ │ ├── ReadMe.Txt │ │ │ ├── cping.cpp │ │ │ ├── cping.dat │ │ │ └── iping.idl │ │ ├── disas │ │ │ ├── Makefile │ │ │ ├── arm.asm │ │ │ ├── disas.cpp │ │ │ ├── ia64.asm │ │ │ ├── unk.cpp │ │ │ ├── x64.asm │ │ │ └── x86.cpp │ │ ├── dtest │ │ │ ├── Makefile │ │ │ ├── NORMAL_IA64.TXT │ │ │ ├── NORMAL_X64.TXT │ │ │ ├── NORMAL_X86.TXT │ │ │ ├── dtarge.cpp │ │ │ ├── dtarge.h │ │ │ ├── dtarge.rc │ │ │ └── dtest.cpp │ │ ├── dumpe │ │ │ ├── Makefile │ │ │ └── dumpe.cpp │ │ ├── dumpi │ │ │ ├── Makefile │ │ │ └── dumpi.cpp │ │ ├── dynamic_alloc │ │ │ ├── Makefile │ │ │ ├── main.cpp │ │ │ ├── x64.asm │ │ │ └── x86.asm │ │ ├── echo │ │ │ ├── Makefile │ │ │ ├── echofx.cpp │ │ │ ├── echofx.rc │ │ │ ├── echonul.cpp │ │ │ └── main.cpp │ │ ├── einst │ │ │ ├── Makefile │ │ │ ├── edll1x.cpp │ │ │ ├── edll2x.cpp │ │ │ ├── edll3x.cpp │ │ │ └── einst.cpp │ │ ├── excep │ │ │ ├── Makefile │ │ │ ├── excep.cpp │ │ │ ├── firstexc.cpp │ │ │ └── firstexc.h │ │ ├── findfunc │ │ │ ├── Makefile │ │ │ ├── extend.cpp │ │ │ ├── extend.rc │ │ │ ├── findfunc.cpp │ │ │ ├── symtest.cpp │ │ │ ├── target.cpp │ │ │ ├── target.h │ │ │ └── target.rc │ │ ├── impmunge │ │ │ ├── Makefile │ │ │ └── impmunge.cpp │ │ ├── member │ │ │ ├── Makefile │ │ │ └── member.cpp │ │ ├── opengl │ │ │ ├── Makefile │ │ │ ├── ogldet.cpp │ │ │ ├── ogldet.rc │ │ │ └── testogl.cpp │ │ ├── region │ │ │ ├── Makefile │ │ │ └── region.cpp │ │ ├── setdll │ │ │ ├── Makefile │ │ │ └── setdll.cpp │ │ ├── simple │ │ │ ├── Makefile │ │ │ ├── simple.cpp │ │ │ ├── simple.rc │ │ │ └── sleep5.cpp │ │ ├── slept │ │ │ ├── Makefile │ │ │ ├── NORMAL_IA64.TXT │ │ │ ├── NORMAL_X64.TXT │ │ │ ├── NORMAL_X86.TXT │ │ │ ├── dslept.cpp │ │ │ ├── dslept.rc │ │ │ ├── sleepbed.cpp │ │ │ ├── sleepnew.cpp │ │ │ ├── sleepold.cpp │ │ │ ├── slept.cpp │ │ │ ├── slept.h │ │ │ ├── slept.rc │ │ │ └── verify.cpp │ │ ├── syelog │ │ │ ├── Makefile │ │ │ ├── sltest.cpp │ │ │ ├── sltestp.cpp │ │ │ ├── syelog.cpp │ │ │ ├── syelog.h │ │ │ └── syelogd.cpp │ │ ├── talloc │ │ │ ├── Makefile │ │ │ ├── NORMAL_IA64.TXT │ │ │ ├── NORMAL_X64.TXT │ │ │ ├── talloc.cpp │ │ │ ├── tdll1x.cpp │ │ │ ├── tdll2x.cpp │ │ │ ├── tdll3x.cpp │ │ │ ├── tdll4x.cpp │ │ │ ├── tdll5x.cpp │ │ │ ├── tdll6x.cpp │ │ │ ├── tdll7x.cpp │ │ │ ├── tdll8x.cpp │ │ │ └── tdll9x.cpp │ │ ├── traceapi │ │ │ ├── Makefile │ │ │ ├── _win32.cpp │ │ │ ├── testapi.cpp │ │ │ ├── trcapi.cpp │ │ │ └── trcapi.rc │ │ ├── tracebld │ │ │ ├── Makefile │ │ │ ├── tracebld.cpp │ │ │ ├── tracebld.h │ │ │ ├── trcbld.cpp │ │ │ └── trcbld.rc │ │ ├── tracelnk │ │ │ ├── Makefile │ │ │ ├── trclnk.cpp │ │ │ └── trclnk.rc │ │ ├── tracemem │ │ │ ├── Makefile │ │ │ ├── trcmem.cpp │ │ │ └── trcmem.rc │ │ ├── tracereg │ │ │ ├── Makefile │ │ │ ├── trcreg.cpp │ │ │ └── trcreg.rc │ │ ├── traceser │ │ │ ├── Makefile │ │ │ ├── trcser.cpp │ │ │ └── trcser.rc │ │ ├── tracessl │ │ │ ├── Makefile │ │ │ ├── trcssl.cpp │ │ │ └── trcssl.rc │ │ ├── tracetcp │ │ │ ├── Makefile │ │ │ ├── trctcp.cpp │ │ │ └── trctcp.rc │ │ ├── tryman │ │ │ ├── Makefile │ │ │ ├── managed.cs │ │ │ ├── size.cpp │ │ │ ├── tryman.cpp │ │ │ ├── tstman.cpp │ │ │ └── tstman.rc │ │ └── withdll │ │ │ ├── Makefile │ │ │ └── withdll.cpp │ ├── src │ │ ├── Makefile │ │ ├── creatwth.cpp │ │ ├── detours.cpp │ │ ├── detours.h │ │ ├── detver.h │ │ ├── disasm.cpp │ │ ├── disolarm.cpp │ │ ├── disolarm64.cpp │ │ ├── disolia64.cpp │ │ ├── disolx64.cpp │ │ ├── disolx86.cpp │ │ ├── image.cpp │ │ ├── modules.cpp │ │ └── uimports.cpp │ ├── system.mak │ ├── tests │ │ ├── Makefile │ │ ├── catch.hpp │ │ ├── corruptor.cpp │ │ ├── corruptor.h │ │ ├── main.cpp │ │ ├── test_image_api.cpp │ │ └── test_module_api.cpp │ └── vc │ │ ├── Detours.sln │ │ ├── Detours.vcxproj │ │ └── Detours.vcxproj.filters ├── llvm-sys.rs │ ├── .gitignore │ ├── .gitlab-ci.yml │ ├── Cargo.toml │ ├── LICENSE │ ├── README.md │ ├── appveyor.yml │ ├── build.cmake │ ├── build.rs │ ├── scripts │ │ ├── RELEASE_CHECKLIST.md │ │ └── build-binaries.sh │ ├── src │ │ ├── analysis.rs │ │ ├── bit_reader.rs │ │ ├── bit_writer.rs │ │ ├── blake3.rs │ │ ├── comdat.rs │ │ ├── core.rs │ │ ├── debuginfo.rs │ │ ├── disassembler.rs │ │ ├── error.rs │ │ ├── error_handling.rs │ │ ├── execution_engine.rs │ │ ├── initialization.rs │ │ ├── ir_reader.rs │ │ ├── lib.rs │ │ ├── linker.rs │ │ ├── lto.rs │ │ ├── object.rs │ │ ├── orc2 │ │ │ ├── ee.rs │ │ │ ├── lljit.rs │ │ │ └── mod.rs │ │ ├── remarks.rs │ │ ├── support.rs │ │ ├── target.rs │ │ ├── target_machine.rs │ │ ├── transforms.rs │ │ └── transforms │ │ │ ├── aggressive_instcombine.rs │ │ │ ├── instcombine.rs │ │ │ ├── ipo.rs │ │ │ ├── pass_builder.rs │ │ │ ├── pass_manager_builder.rs │ │ │ ├── scalar.rs │ │ │ ├── util.rs │ │ │ └── vectorize.rs │ └── wrappers │ │ └── target.c └── optix_ext │ ├── README.md │ ├── optix_ext_compile_no_inline.h │ ├── optix_ext_compile_no_inline_function_table_definition.h │ ├── optix_ext_compile_no_inline_stubs.h │ ├── optix_ext_knobs.h │ ├── optix_ext_knobs_function_table_definition.h │ ├── optix_ext_knobs_stubs.h │ ├── optix_ext_ptx_encryption.h │ ├── optix_ext_ptx_encryption_function_table_definition.h │ ├── optix_ext_ptx_encryption_stubs.h │ └── optix_ptx_encryption.h ├── hip_common ├── Cargo.toml └── src │ ├── cache.rs │ ├── kernel_metadata.rs │ ├── lib.rs │ ├── raytracing.rs │ ├── zluda.capnp │ ├── zluda_capnp.rs │ ├── zluda_ext.rs │ ├── zluda_rt6.capnp │ └── zluda_rt6_capnp.rs ├── hip_runtime-sys ├── Cargo.toml ├── Makefile.toml ├── README ├── build.rs └── src │ ├── hip_runtime_api_v5.rs │ ├── hip_runtime_api_v6.rs │ └── lib.rs ├── hipblaslt-sys ├── Cargo.toml ├── README ├── build.rs └── src │ ├── hipblaslt.rs │ └── lib.rs ├── hipfft-sys ├── Cargo.toml ├── README ├── build.rs └── src │ ├── hipfft.rs │ └── lib.rs ├── hiprt-sys ├── Cargo.toml ├── Makefile.toml ├── include │ ├── hiprt.h │ └── hiprt │ │ └── hiprt_vec.h ├── lib │ ├── hiprt64.dll │ ├── hiprt64.lib │ └── libhiprt64.so └── src │ ├── hiprt.rs │ └── lib.rs ├── miopen-sys ├── Cargo.toml ├── README ├── build.rs └── src │ ├── extra.rs │ ├── lib.rs │ └── miopen.rs ├── offline_compiler ├── Cargo.toml └── src │ └── main.rs ├── optix_base ├── Cargo.toml ├── README ├── include │ ├── wrapper.hpp │ └── wrapper6.hpp └── src │ ├── lib.rs │ ├── optix.rs │ └── optix6.rs ├── optix_dump ├── Cargo.toml ├── README ├── include │ └── wrapper.hpp └── src │ ├── eptx.rs │ └── lib.rs ├── optix_types ├── Cargo.toml └── src │ └── lib.rs ├── process_address_table ├── Cargo.toml ├── src │ └── main.rs └── table.rs ├── ptx ├── Cargo.toml ├── build.rs ├── lib │ ├── .gitattributes │ ├── cvt.h │ ├── cvt.py │ ├── raytracing.cpp │ ├── raytracing.hpp │ ├── raytracing_bounding_box.cpp │ ├── raytracing_callable.cpp │ ├── raytracing_intersect.cpp │ ├── zluda_ptx_impl.bc │ ├── zluda_ptx_impl.cpp │ ├── zluda_rt_ptx_impl.bc │ └── zluda_rt_ptx_impl.cpp └── src │ ├── ast.rs │ ├── emit.rs │ ├── lib.rs │ ├── llvm.rs │ ├── ptx.lalrpop │ ├── raytracing.rs │ ├── test │ ├── _Z9vectorAddPKfS0_Pfi.ptx │ ├── mod.rs │ ├── operands.ptx │ ├── ptx_raytracing │ │ ├── closest_hit.cu │ │ ├── closest_hit.ptx │ │ ├── optixCallablePrograms_generated_optixCallablePrograms.ptx │ │ ├── optixCallablePrograms_generated_optixCallablePrograms_miss.ll │ │ ├── optixHello_generated_draw_color.ptx │ │ ├── optixHello_generated_draw_color_draw_solid_color.ll │ │ ├── optixHello_generated_draw_color_var_ptr_cast.ptx │ │ ├── optixHello_generated_draw_color_var_ptr_cast_draw_solid_color.ll │ │ ├── optixPathTracer_generated_disney.ptx │ │ ├── optixPathTracer_generated_disney_Eval.ll │ │ ├── optixPathTracer_generated_hit_program.ptx │ │ ├── optixPathTracer_generated_hit_program_closest_hit.ll │ │ ├── optixSphere_generated_normal_shader.ptx │ │ ├── optixSphere_generated_normal_shader_closest_hit_radiance.ll │ │ ├── optixSphere_generated_sphere.ptx │ │ ├── optixSphere_generated_sphere_bounds.ll │ │ └── optixSphere_generated_sphere_robust_intersect.ll │ ├── raytracing.rs │ ├── spirv_build │ │ ├── bar_sync.ptx │ │ ├── global_extern_array.ptx │ │ ├── noreturn.ll │ │ ├── noreturn.ptx │ │ └── param_func_array_0.ptx │ ├── spirv_fail │ │ ├── const_ptr.ptx │ │ ├── global_ptr.ptx │ │ ├── local_ptr.txt │ │ ├── param_entry_array_0.ptx │ │ ├── param_vector.ptx │ │ ├── shared_ptr.ptx │ │ └── shared_ptr2.ptx │ ├── spirv_run │ │ ├── abs.ll │ │ ├── abs.ptx │ │ ├── activemask.ll │ │ ├── activemask.ptx │ │ ├── add.ll │ │ ├── add.ptx │ │ ├── add_global.ll │ │ ├── add_global.ptx │ │ ├── add_non_coherent.ll │ │ ├── add_non_coherent.ptx │ │ ├── add_param_ptr.ll │ │ ├── add_param_ptr.ptx │ │ ├── add_tuning.ll │ │ ├── add_tuning.ptx │ │ ├── addc_cc.ll │ │ ├── addc_cc.ptx │ │ ├── addc_cc2.ll │ │ ├── addc_cc2.ptx │ │ ├── alloca_call.ll │ │ ├── alloca_call.ptx │ │ ├── amdgpu_unnamed.ll │ │ ├── amdgpu_unnamed.ptx │ │ ├── and.ll │ │ ├── and.ptx │ │ ├── assertfail.ll │ │ ├── assertfail.ptx │ │ ├── atom_add.ll │ │ ├── atom_add.ptx │ │ ├── atom_add_f16.ll │ │ ├── atom_add_f16.ptx │ │ ├── atom_add_float.ll │ │ ├── atom_add_float.ptx │ │ ├── atom_cas.ll │ │ ├── atom_cas.ptx │ │ ├── atom_inc.ll │ │ ├── atom_inc.ptx │ │ ├── atom_ld_st.ll │ │ ├── atom_ld_st.ptx │ │ ├── atom_ld_st_vec.ll │ │ ├── atom_ld_st_vec.ptx │ │ ├── atom_max_u32.ll │ │ ├── atom_max_u32.ptx │ │ ├── b64tof64.ll │ │ ├── b64tof64.ptx │ │ ├── barrier.ll │ │ ├── barrier.ptx │ │ ├── bfe.ll │ │ ├── bfe.ptx │ │ ├── bfi.ll │ │ ├── bfi.ptx │ │ ├── bfind.ll │ │ ├── bfind.ptx │ │ ├── bfind_shiftamt.ll │ │ ├── bfind_shiftamt.ptx │ │ ├── block.ll │ │ ├── block.ptx │ │ ├── bra.ll │ │ ├── bra.ptx │ │ ├── brev.ll │ │ ├── brev.ptx │ │ ├── call.ll │ │ ├── call.ptx │ │ ├── call_bug.ll │ │ ├── call_bug.ptx │ │ ├── call_global_ptr.ll │ │ ├── call_global_ptr.ptx │ │ ├── call_multi_return.ll │ │ ├── call_multi_return.ptx │ │ ├── callprototype.ll │ │ ├── callprototype.ptx │ │ ├── carry_set_all.ll │ │ ├── carry_set_all.ptx │ │ ├── clz.ll │ │ ├── clz.ptx │ │ ├── const.ll │ │ ├── const.ptx │ │ ├── constant_f32.ll │ │ ├── constant_f32.ptx │ │ ├── constant_negative.ll │ │ ├── constant_negative.ptx │ │ ├── cos.ll │ │ ├── cos.ptx │ │ ├── cvt_clamp.ll │ │ ├── cvt_clamp.ptx │ │ ├── cvt_f32_f16.ll │ │ ├── cvt_f32_f16.ptx │ │ ├── cvt_f32_s32.ll │ │ ├── cvt_f32_s32.ptx │ │ ├── cvt_f64_f32.ll │ │ ├── cvt_f64_f32.ptx │ │ ├── cvt_rni.ll │ │ ├── cvt_rni.ptx │ │ ├── cvt_rzi.ll │ │ ├── cvt_rzi.ptx │ │ ├── cvt_s16_s8.ll │ │ ├── cvt_s16_s8.ptx │ │ ├── cvt_s32_f32.ll │ │ ├── cvt_s32_f32.ptx │ │ ├── cvt_s64_s32.ll │ │ ├── cvt_s64_s32.ptx │ │ ├── cvt_sat_s_u.ll │ │ ├── cvt_sat_s_u.ptx │ │ ├── cvt_u32_s16.ll │ │ ├── cvt_u32_s16.ptx │ │ ├── cvta.ll │ │ ├── cvta.ptx │ │ ├── div_approx.ll │ │ ├── div_approx.ptx │ │ ├── dp4a.ll │ │ ├── dp4a.ptx │ │ ├── ex2.ll │ │ ├── ex2.ptx │ │ ├── extern_shared.ll │ │ ├── extern_shared.ptx │ │ ├── extern_shared_call.ll │ │ ├── extern_shared_call.ptx │ │ ├── fma.ll │ │ ├── fma.ptx │ │ ├── func_ptr.ll │ │ ├── func_ptr.ptx │ │ ├── generic.ll │ │ ├── generic.ptx │ │ ├── global_array.ll │ │ ├── global_array.ptx │ │ ├── implicit_param.ll │ │ ├── implicit_param.ptx │ │ ├── isspacep.ll │ │ ├── isspacep.ptx │ │ ├── laneid.ptx │ │ ├── lanemask_lt.ll │ │ ├── lanemask_lt.ptx │ │ ├── ld_st.ll │ │ ├── ld_st.ptx │ │ ├── ld_st_implicit.ll │ │ ├── ld_st_implicit.ptx │ │ ├── ld_st_offset.ll │ │ ├── ld_st_offset.ptx │ │ ├── lg2.ll │ │ ├── lg2.ptx │ │ ├── local_align.ll │ │ ├── local_align.ptx │ │ ├── mad_hi_cc.ll │ │ ├── mad_hi_cc.ptx │ │ ├── mad_s32.ll │ │ ├── mad_s32.ptx │ │ ├── madc_cc.ll │ │ ├── madc_cc.ptx │ │ ├── match_any_32.ptx │ │ ├── max.ll │ │ ├── max.ptx │ │ ├── membar.ll │ │ ├── membar.ptx │ │ ├── min.ll │ │ ├── min.ptx │ │ ├── mod.rs │ │ ├── mov.ll │ │ ├── mov.ptx │ │ ├── mov_address.ll │ │ ├── mov_address.ptx │ │ ├── mov_vector_cast.ll │ │ ├── mov_vector_cast.ptx │ │ ├── mul24_hi.ll │ │ ├── mul24_hi.ptx │ │ ├── mul24_lo.ll │ │ ├── mul24_lo.ptx │ │ ├── mul_ftz.ll │ │ ├── mul_ftz.ptx │ │ ├── mul_hi.ll │ │ ├── mul_hi.ptx │ │ ├── mul_lo.ll │ │ ├── mul_lo.ptx │ │ ├── mul_non_ftz.ll │ │ ├── mul_non_ftz.ptx │ │ ├── mul_wide.ll │ │ ├── mul_wide.ptx │ │ ├── multireg.ll │ │ ├── multireg.ptx │ │ ├── neg.ll │ │ ├── neg.ptx │ │ ├── non_scalar_ptr_offset.ll │ │ ├── non_scalar_ptr_offset.ptx │ │ ├── not.ll │ │ ├── not.ptx │ │ ├── ntid.ll │ │ ├── ntid.ptx │ │ ├── or.ll │ │ ├── or.ptx │ │ ├── param_ptr.ll │ │ ├── param_ptr.ptx │ │ ├── popc.ll │ │ ├── popc.ptx │ │ ├── pred_not.ll │ │ ├── pred_not.ptx │ │ ├── prmt.ll │ │ ├── prmt.ptx │ │ ├── prmt_non_immediate.ll │ │ ├── prmt_non_immediate.ptx │ │ ├── rcp.ll │ │ ├── rcp.ptx │ │ ├── red_shared.ptx │ │ ├── reg_local.ll │ │ ├── reg_local.ptx │ │ ├── rem.ll │ │ ├── rem.ptx │ │ ├── rsqrt.ll │ │ ├── rsqrt.ptx │ │ ├── s64_min.ll │ │ ├── s64_min.ptx │ │ ├── sad.ll │ │ ├── sad.ptx │ │ ├── selp.ll │ │ ├── selp.ptx │ │ ├── selp_true.ll │ │ ├── selp_true.ptx │ │ ├── set_f16x2.ll │ │ ├── set_f16x2.ptx │ │ ├── setp.ll │ │ ├── setp.ptx │ │ ├── setp_bool.ll │ │ ├── setp_bool.ptx │ │ ├── setp_gt.ll │ │ ├── setp_gt.ptx │ │ ├── setp_leu.ll │ │ ├── setp_leu.ptx │ │ ├── setp_nan.ll │ │ ├── setp_nan.ptx │ │ ├── setp_num.ll │ │ ├── setp_num.ptx │ │ ├── setp_pred2.ll │ │ ├── setp_pred2.ptx │ │ ├── shared_ptr_32.ll │ │ ├── shared_ptr_32.ptx │ │ ├── shared_ptr_take_address.ll │ │ ├── shared_ptr_take_address.ptx │ │ ├── shared_unify_decl.ll │ │ ├── shared_unify_decl.ptx │ │ ├── shared_unify_extern.ll │ │ ├── shared_unify_extern.ptx │ │ ├── shared_unify_local.ll │ │ ├── shared_unify_local.ptx │ │ ├── shared_variable.ll │ │ ├── shared_variable.ptx │ │ ├── shf.ll │ │ ├── shf.ptx │ │ ├── shfl.ptx │ │ ├── shl.ll │ │ ├── shl.ptx │ │ ├── shl_link_hack.ll │ │ ├── shl_link_hack.ptx │ │ ├── shl_overflow.ll │ │ ├── shl_overflow.ptx │ │ ├── shr_s32.ll │ │ ├── shr_s32.ptx │ │ ├── shr_u32.ll │ │ ├── shr_u32.ptx │ │ ├── sign_extend.ll │ │ ├── sign_extend.ptx │ │ ├── sin.ll │ │ ├── sin.ptx │ │ ├── sqrt.ll │ │ ├── sqrt.ptx │ │ ├── sub.ll │ │ ├── sub.ptx │ │ ├── subc_cc.ll │ │ ├── subc_cc.ptx │ │ ├── vector.ll │ │ ├── vector.ptx │ │ ├── vector4.ll │ │ ├── vector4.ptx │ │ ├── vector_extract.ll │ │ ├── vector_extract.ptx │ │ ├── verify.py │ │ ├── vote_ballot.ll │ │ ├── vote_ballot.ptx │ │ ├── vshr.ll │ │ ├── vshr.ptx │ │ ├── xor.ll │ │ └── xor.ptx │ ├── vectorAdd_11.ptx │ └── vectorAdd_kernel64.ptx │ └── translate.rs ├── rocblas-sys ├── Cargo.toml ├── README ├── build.rs └── src │ ├── lib.rs │ └── rocblas.rs ├── rocm_smi-sys ├── Cargo.toml ├── README ├── build.rs └── src │ ├── lib.rs │ └── rocm_smi.rs ├── rocsolver-sys ├── Cargo.toml ├── README ├── build.rs └── src │ ├── lib.rs │ └── rocsolver.rs ├── rocsparse-sys ├── Cargo.toml ├── README ├── build.rs └── src │ ├── lib.rs │ └── rocsparse.rs ├── xtask ├── Cargo.toml └── src │ └── main.rs ├── zluda ├── Cargo.toml ├── README ├── build.rs ├── src │ ├── cuda.rs │ ├── impl │ │ ├── array.rs │ │ ├── cache.rs │ │ ├── context.rs │ │ ├── dark_api.rs │ │ ├── device.rs │ │ ├── empty_module.ptx │ │ ├── function.rs │ │ ├── gl.rs │ │ ├── graph.rs │ │ ├── hipfix.rs │ │ ├── library.rs │ │ ├── link.rs │ │ ├── memory.rs │ │ ├── mod.rs │ │ ├── module.rs │ │ ├── os_unix.rs │ │ ├── os_win.rs │ │ ├── pointer.rs │ │ ├── stream.rs │ │ ├── surface.rs │ │ ├── surfref.rs │ │ ├── texobj.rs │ │ └── texref.rs │ └── lib.rs └── tests │ ├── bfi.ptx │ ├── bfi.rs │ ├── common.rs │ ├── context_dark_api_primary_is_unretained.rs │ ├── context_destroy_also_destroys_stream.rs │ ├── context_destroy_leaves_zombie.rs │ ├── context_destroy_pops_top_of_stack.rs │ ├── context_double_destroy_fails.rs │ ├── context_empty_pop_fails.rs │ ├── context_no_current_on_init.rs │ ├── context_push_invalid_should_crash.rs │ ├── function_version.ptx │ ├── function_version.rs │ ├── kernel_args_align.ptx │ ├── kernel_args_align.rs │ ├── kernel_extra.ptx │ ├── kernel_extra.rs │ ├── kernel_suld.ptx │ ├── kernel_suld.rs │ ├── kernel_sust.ptx │ ├── kernel_sust.rs │ ├── kernel_tex.ptx │ ├── kernel_tex.rs │ ├── kernel_texobj_2d.ptx │ ├── kernel_texobj_2d.rs │ ├── kernel_texref_1d.ptx │ ├── kernel_texref_1d.rs │ ├── kernel_texref_2d.ptx │ ├── kernel_texref_2d.rs │ ├── kernel_unused_global.ptx │ ├── kernel_unused_global.rs │ ├── linking.rs │ ├── llama.bin │ ├── llama.ptx │ ├── llama.rs │ ├── maxntid.ptx │ ├── maxntid.rs │ ├── memcpy_pitch.rs │ ├── mipmap_array.ptx │ ├── mipmap_array.rs │ ├── module_texrefs_have_correct_format.rs │ ├── primary_context.rs │ ├── shuffle.ptx │ ├── shuffle.rs │ ├── stream_can_destroy.rs │ ├── stream_cant_destroy_default.rs │ ├── stream_context_destroyed.rs │ ├── stream_default_uses_current_ctx_impl.rs │ └── stream_moves_context_to_another_thread.rs ├── zluda_api ├── Cargo.toml ├── README └── src │ ├── lib.rs │ ├── nvapi.rs │ └── nvapi_wrapper.h ├── zluda_blas ├── Cargo.toml ├── README ├── build │ └── wrapper.h └── src │ ├── common.rs │ ├── cublas.rs │ ├── cublasxt.rs │ └── lib.rs ├── zluda_blaslt ├── Cargo.toml ├── README └── src │ ├── cublaslt.rs │ ├── decl.rs │ └── lib.rs ├── zluda_ccl ├── Cargo.toml ├── README └── src │ ├── lib.rs │ └── nccl.rs ├── zluda_dark_api ├── Cargo.toml └── src │ └── lib.rs ├── zluda_dnn ├── Cargo.toml ├── README └── src │ ├── cudnn.rs │ ├── extra.rs │ └── lib.rs ├── zluda_dump ├── Cargo.toml ├── README.md └── src │ ├── dark_api.rs │ ├── debug.ptx │ ├── events_to_csv.py │ ├── format.rs │ ├── lib.rs │ ├── log.rs │ ├── os_unix.rs │ ├── os_win.rs │ ├── profiler.rs │ ├── replay.py │ ├── side_by_side.rs │ └── trace.rs ├── zluda_fft ├── Cargo.toml ├── README └── src │ ├── cufft.rs │ ├── cufftxt.rs │ └── lib.rs ├── zluda_fftw ├── Cargo.toml ├── README └── src │ ├── cufftw.rs │ └── lib.rs ├── zluda_inject ├── Cargo.toml ├── build.rs ├── src │ ├── bin.rs │ ├── main.rs │ └── win.rs └── tests │ ├── helpers │ ├── direct_cuinit.rs │ ├── do_cuinit.rs │ ├── do_cuinit_early.rs │ ├── do_cuinit_late.rs │ ├── do_cuinit_late_clr.cs │ ├── do_cuinit_late_clr.exe │ ├── indirect_cuinit.rs │ ├── nvcuda.lib │ ├── query_exe.rs │ └── subprocess.rs │ └── inject.rs ├── zluda_lib ├── Cargo.toml ├── README.md └── src │ └── lib.rs ├── zluda_llvm ├── Cargo.toml ├── README.md ├── build.rs └── src │ ├── lib.cpp │ └── lib.rs ├── zluda_ml ├── Cargo.toml ├── README └── src │ ├── common.rs │ ├── lib.rs │ ├── nvml.rs │ ├── unix.rs │ └── windows.rs ├── zluda_redirect ├── Cargo.toml └── src │ ├── lib.rs │ └── payload_guid.rs ├── zluda_rt ├── Cargo.toml ├── bin │ ├── liboptix.so.6.5.0 │ └── optix.6.5.0.dll ├── build.rs ├── optix.xmi └── src │ ├── acceleration.rs │ ├── buffer.rs │ ├── cache.rs │ ├── context.rs │ ├── eptx.rs │ ├── geometry.rs │ ├── geometry_group.rs │ ├── geometry_instance.rs │ ├── geometry_triangles.rs │ ├── group.rs │ ├── hip.rs │ ├── lib.rs │ ├── material.rs │ ├── program.rs │ ├── repr_gpu.rs │ ├── test_common.rs │ ├── tests │ ├── alloca_bug.cu │ ├── alloca_bug.ptx │ ├── any_hit_intersect.cu │ ├── any_hit_intersect.ptx │ ├── barycentrics.cu │ ├── barycentrics.ptx │ ├── buffer_id.cu │ ├── buffer_id.ptx │ ├── buffer_id_call.cu │ ├── buffer_id_call.ptx │ ├── buffer_id_callable.cu │ ├── buffer_id_callable.ptx │ ├── buffer_mipmap.cu │ ├── buffer_mipmap.ptx │ ├── callable_programs.cu │ ├── callable_programs.ptx │ ├── default_variable.cu │ ├── default_variable.ptx │ ├── exception.cu │ ├── exception.ptx │ ├── exception_subfunc.cu │ ├── exception_subfunc.ptx │ ├── get_transform.cu │ ├── get_transform.ptx │ ├── mod.rs │ ├── oob.cu │ ├── oob.ptx │ ├── texture_sampler.cu │ ├── texture_sampler.ptx │ ├── trace_control.cu │ ├── trace_control.ptx │ ├── transform.cu │ ├── transform.ptx │ ├── triangle_front.cu │ └── triangle_front.ptx │ ├── texture_sampler.rs │ ├── transform.rs │ └── variable.rs ├── zluda_rtc ├── Cargo.toml ├── README └── src │ ├── lib.rs │ └── nvrtc.rs ├── zluda_runtime ├── Cargo.toml ├── README └── src │ ├── cudart.rs │ ├── decl.rs │ ├── extra.rs │ └── lib.rs └── zluda_sparse ├── Cargo.toml ├── README └── src ├── cusparse.rs └── lib.rs /.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [target."x86_64-unknown-linux-gnu"] 2 | rustflags = ["-C", "target-cpu=x86-64-v2"] 3 | 4 | [target."x86_64-pc-windows-msvc"] 5 | rustflags = ["-C", "target-cpu=x86-64-v2"] 6 | 7 | [alias] 8 | xtask = "run --package xtask --" 9 | -------------------------------------------------------------------------------- /.devcontainer/Dockerfile: -------------------------------------------------------------------------------- 1 | # syntax = devthefuture/dockerfile-x 2 | 3 | # This duplicate FROM is here purely to make dev containers happy, 4 | # Otherwise it tries to parse the file (whyyy???) and chokes on custom syntax 5 | FROM ubuntu:22.04 6 | INCLUDE ./Dockerfile-common -------------------------------------------------------------------------------- /.devcontainer/Dockerfile-xgboost: -------------------------------------------------------------------------------- 1 | # syntax = devthefuture/dockerfile-x 2 | FROM ubuntu:22.04 3 | INCLUDE ./Dockerfile-common 4 | 5 | ARG XGBOOST_VERSION=2.0.3 6 | RUN git clone --branch "v${XGBOOST_VERSION}" --recurse-submodules https://github.com/dmlc/xgboost.git && \ 7 | cd xgboost && \ 8 | # Broken test, segfaults on normal CUDA 9 | sed -i 's/TEST(Allocator, OOM) {/TEST(Allocator, OOM) { GTEST_SKIP();/g' tests/cpp/common/test_device_helpers.cu && \ 10 | mkdir build && \ 11 | cd build && \ 12 | cmake .. -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DUSE_CUDA=ON -GNinja && \ 13 | ninja 14 | 15 | # 16 | 17 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | ext/** linguist-vendored 2 | atiadlxx-sys/include/* linguist-vendored 3 | *.ptx linguist-language=Assembly -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | 3 | .vscode/ 4 | .idea/ -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "ext/llvm-project"] 2 | path = ext/llvm-project 3 | url = https://github.com/llvm/llvm-project.git 4 | branch = release/15.x 5 | shallow = true 6 | -------------------------------------------------------------------------------- /atiadlxx-sys/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "atiadlxx-sys" 3 | version = "0.0.0" 4 | authors = ["Andrzej Janik "] 5 | edition = "2018" 6 | links = "atiadlxx" 7 | 8 | [lib] -------------------------------------------------------------------------------- /atiadlxx-sys/README: -------------------------------------------------------------------------------- 1 | bindgen include/wrapper.hpp -o src/adl.rs --no-layout-tests --default-enum-style=newtype --no-derive-debug --allowlist-function "ADL2_.*" --allowlist-var "ADL.*" --allowlist-type "ADL.*" -------------------------------------------------------------------------------- /atiadlxx-sys/build.rs: -------------------------------------------------------------------------------- 1 | use std::env::VarError; 2 | use std::{env, path::PathBuf}; 3 | 4 | fn main() -> Result<(), VarError> { 5 | println!("cargo:rustc-link-lib=dylib=atiadlxx"); 6 | if cfg!(windows) { 7 | let env = env::var("CARGO_CFG_TARGET_ENV")?; 8 | if env == "msvc" { 9 | let mut path = PathBuf::from(env::var("CARGO_MANIFEST_DIR")?); 10 | path.push("lib"); 11 | println!("cargo:rustc-link-search=native={}", path.display()); 12 | } else { 13 | println!("cargo:rustc-link-search=native=C:\\Windows\\System32"); 14 | }; 15 | } else { 16 | println!("cargo:rustc-link-search=native=/opt/rocm/lib/"); 17 | } 18 | Ok(()) 19 | } 20 | -------------------------------------------------------------------------------- /atiadlxx-sys/include/wrapper.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "adl.h" 3 | #include "overdrive5.h" 4 | #include "overdrive6.h" 5 | #include "overdriveN.h" 6 | #include "overdrive8.h" 7 | -------------------------------------------------------------------------------- /atiadlxx-sys/lib/atiadlxx.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lshqqytiger/ZLUDA/5e717459179dc272b7d7d23391f0fad66c7459cf/atiadlxx-sys/lib/atiadlxx.lib -------------------------------------------------------------------------------- /atiadlxx-sys/src/lib.rs: -------------------------------------------------------------------------------- 1 | #[allow(non_camel_case_types)] 2 | #[allow(non_snake_case)] 3 | #[allow(non_upper_case_globals)] 4 | mod adl; 5 | pub use adl::*; 6 | -------------------------------------------------------------------------------- /comgr/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "comgr" 3 | version = "0.0.0" 4 | authors = ["Andrzej Janik "] 5 | edition = "2021" 6 | 7 | [features] 8 | rocm5 = ["hip_common/rocm5"] 9 | 10 | [lib] 11 | 12 | [dependencies] 13 | libloading = "0.8" 14 | hip_common = { path = "../hip_common" } 15 | itertools = "0.10.5" -------------------------------------------------------------------------------- /comgr/README: -------------------------------------------------------------------------------- 1 | bindgen $Env:HIP_PATH/include/amd_comgr/amd_comgr.h --must-use-type "amd_comgr_status_t" --no-layout-tests --no-derive-debug --default-enum-style=newtype --dynamic-loading LibComgr --dynamic-link-require-all -o src/amd_comgr.rs --allowlist-function="^amd_comgr_action_data_get_data$|^amd_comgr_action_info_set_isa_name$|^amd_comgr_action_info_set_option_list$|^amd_comgr_create_action_info$|^amd_comgr_create_data$|^amd_comgr_create_data_set$|^amd_comgr_data_set_add$|^amd_comgr_destroy_action_info$|^amd_comgr_destroy_data_set$|^amd_comgr_do_action$|^amd_comgr_get_data$|^amd_comgr_release_data$|^amd_comgr_set_data$|^amd_comgr_set_data_name$|^amd_comgr_action_info_set_language$|^amd_comgr_set_data_name$" -------------------------------------------------------------------------------- /comgr/src/double_wave32_on_wave64.ll: -------------------------------------------------------------------------------- 1 | target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" 2 | target triple = "amdgcn-amd-amdhsa" 3 | 4 | @__zluda_ptx_impl__COMPILATION_MODE = linkonce_odr local_unnamed_addr addrspace(4) constant i8 3, align 1 5 | -------------------------------------------------------------------------------- /comgr/src/linux.ll: -------------------------------------------------------------------------------- 1 | target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" 2 | target triple = "amdgcn-amd-amdhsa" 3 | 4 | @__zluda_ptx_impl__IS_WINDOWS = linkonce_odr local_unnamed_addr addrspace(4) constant i1 0 5 | -------------------------------------------------------------------------------- /comgr/src/wave32.ll: -------------------------------------------------------------------------------- 1 | target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" 2 | target triple = "amdgcn-amd-amdhsa" 3 | 4 | @__zluda_ptx_impl__COMPILATION_MODE = linkonce_odr local_unnamed_addr addrspace(4) constant i8 1, align 1 5 | -------------------------------------------------------------------------------- /comgr/src/wave32_on_wave64.ll: -------------------------------------------------------------------------------- 1 | target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" 2 | target triple = "amdgcn-amd-amdhsa" 3 | 4 | @__zluda_ptx_impl__COMPILATION_MODE = linkonce_odr local_unnamed_addr addrspace(4) constant i8 2, align 1 5 | -------------------------------------------------------------------------------- /comgr/src/windows.ll: -------------------------------------------------------------------------------- 1 | target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" 2 | target triple = "amdgcn-amd-amdhsa" 3 | 4 | @__zluda_ptx_impl__IS_WINDOWS = linkonce_odr local_unnamed_addr addrspace(4) constant i1 1 5 | -------------------------------------------------------------------------------- /cuda_base/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "cuda_base" 3 | version = "0.0.0" 4 | authors = ["Andrzej Janik "] 5 | edition = "2018" 6 | 7 | [lib] 8 | proc-macro = true 9 | 10 | [dependencies] 11 | quote = "1.0" 12 | syn = { version = "1.0.93", features = ["full", "visit", "visit-mut"] } 13 | proc-macro2 = "1.0" 14 | rustc-hash = "1.1" 15 | -------------------------------------------------------------------------------- /cuda_base/README: -------------------------------------------------------------------------------- 1 | bindgen build/wrapper.h -o src/cuda.rs --no-partialeq "CUDA_HOST_NODE_PARAMS_st" --no-partialeq "CUDA_HOST_NODE_PARAMS_v2_st" --with-derive-eq --allowlist-function="^cu.*" --allowlist-var="^CU.*" --default-enum-style=newtype --no-layout-tests --no-doc-comments --new-type-alias "^CUdevice_v\d+$|^CUdeviceptr_v\d+$" --must-use-type "cudaError_enum" -- -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.2\include" 2 | -------------------------------------------------------------------------------- /cuda_base/build/wrapper.h: -------------------------------------------------------------------------------- 1 | #define __CUDA_API_VERSION_INTERNAL 2 | #ifdef _WIN32 3 | #include 4 | #endif 5 | #include 6 | #include 7 | #include -------------------------------------------------------------------------------- /cuda_types/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "cuda_types" 3 | version = "0.0.0" 4 | authors = ["Andrzej Janik "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | cuda_base = { path = "../cuda_base" } 9 | -------------------------------------------------------------------------------- /cuda_types/src/lib.rs: -------------------------------------------------------------------------------- 1 | use cuda_base::cuda_type_declarations; 2 | 3 | cuda_type_declarations!(); 4 | 5 | impl From for Result<(), CUresult> { 6 | fn from(value: CUresult) -> Self { 7 | match value { 8 | CUresult::CUDA_SUCCESS => Ok(()), 9 | err => Err(err), 10 | } 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /detours-sys/build/wrapper.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | -------------------------------------------------------------------------------- /ext/detours/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md: -------------------------------------------------------------------------------- 1 | 14 | -------------------------------------------------------------------------------- /ext/detours/.github/codeql/codeql-config.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: "Detours CodeQL Config" 3 | 4 | queries: 5 | - uses: security-and-quality 6 | - uses: security-extended 7 | -------------------------------------------------------------------------------- /ext/detours/.gitignore: -------------------------------------------------------------------------------- 1 | # C extensions 2 | *.so 3 | 4 | # Unit test / coverage reports 5 | .coverage 6 | .tox 7 | nosetests.xml 8 | 9 | # Translations 10 | *.mo 11 | 12 | # Mr Developer 13 | .mr.developer.cfg 14 | .project 15 | .pydevproject 16 | 17 | # vim 18 | *~ 19 | *.swp 20 | 21 | # Visual Studio build 22 | *.ipch 23 | .vs/ 24 | output/ 25 | include/ 26 | *.exp 27 | *.pdb 28 | *.lib 29 | *.dll 30 | *.exe 31 | obj.* 32 | *.ipdb 33 | *.iobj 34 | *.tlog 35 | *.log 36 | *.obj 37 | *.user 38 | *.recipe 39 | /bin.* 40 | *.vcxproj.FileListAbsolute.txt 41 | *.vcxprojAssemblyReference.cache 42 | -------------------------------------------------------------------------------- /ext/detours/samples/comeasy/wrotei.rc: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Version information for wrotei.rc. 4 | // 5 | // Microsoft Research Detours Package 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | 10 | #include "detver.h" 11 | 12 | #define VER_INTERNALNAME_STR "wrotei" DETOURS_STRINGIFY(DETOURS_BITS) 13 | #define VER_ORIGINALFILENAME_STR "wrotei" DETOURS_STRINGIFY(DETOURS_BITS) ".dll" 14 | #define VER_FILEDESCRIPTION_STR "Detours COM Easy Sample" 15 | #define VER_COMPANYNAME_STR "Microsoft Corporation" 16 | 17 | #include "common.ver" 18 | -------------------------------------------------------------------------------- /ext/detours/samples/cping/cping.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lshqqytiger/ZLUDA/5e717459179dc272b7d7d23391f0fad66c7459cf/ext/detours/samples/cping/cping.dat -------------------------------------------------------------------------------- /ext/detours/samples/cping/iping.idl: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Module: iping.idl (cping.exe - COM Ping) 4 | // 5 | // Microsoft Research Detours Package 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | import "objidl.idl"; 10 | import "oaidl.idl"; 11 | import "oleidl.idl"; 12 | 13 | 14 | [object, uuid(decdbeef-d1ac-11d1-96bc-00aa00573fb0), pointer_default(unique)] 15 | interface IPing : IUnknown 16 | { 17 | HRESULT Ping(void); 18 | HRESULT PingToServer([in] LPSTR pszString); 19 | HRESULT PingToClient([out] LPSTR *ppszString); 20 | HRESULT PingToClientSize([in] ULONG cbOut); 21 | }; 22 | // 23 | ///////////////////////////////////////////////////////////////// End of File. 24 | -------------------------------------------------------------------------------- /ext/detours/samples/disas/unk.cpp: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Detours Test Program (x86.asm of disas.exe) 4 | // 5 | // Microsoft Research Detours Package 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | 10 | static int value = 0; 11 | 12 | extern "C" void TestCodes() 13 | { 14 | value++; 15 | } 16 | -------------------------------------------------------------------------------- /ext/detours/samples/dtest/dtarge.rc: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Version information for dtarge.rc. 4 | // 5 | // Microsoft Research Detours Package 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | 10 | #include "detver.h" 11 | 12 | #define VER_INTERNALNAME_STR "dtarge" DETOURS_STRINGIFY(DETOURS_BITS) 13 | #define VER_ORIGINALFILENAME_STR "dtarge" DETOURS_STRINGIFY(DETOURS_BITS) ".dll" 14 | #define VER_FILEDESCRIPTION_STR "Detours Test Module" 15 | #define VER_COMPANYNAME_STR "Microsoft Corporation" 16 | 17 | #include "common.ver" 18 | -------------------------------------------------------------------------------- /ext/detours/samples/dynamic_alloc/x64.asm: -------------------------------------------------------------------------------- 1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2 | ;; 3 | ;; Detours Test Program 4 | ;; 5 | ;; Microsoft Research Detours Package 6 | ;; 7 | ;; Copyright (c) Microsoft Corporation. All rights reserved. 8 | ;; 9 | PUBLIC CodeTemplate 10 | PUBLIC CodeTemplate_End 11 | 12 | _TEXT SEGMENT 13 | 14 | CodeTemplate PROC 15 | nop 16 | nop 17 | mov rax, 0deadbeef00000000h 18 | nop 19 | ret 20 | CodeTemplate_End:: 21 | CodeTemplate ENDP 22 | 23 | _TEXT ENDS 24 | 25 | END 26 | -------------------------------------------------------------------------------- /ext/detours/samples/dynamic_alloc/x86.asm: -------------------------------------------------------------------------------- 1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2 | ;; 3 | ;; Detours Test Program 4 | ;; 5 | ;; Microsoft Research Detours Package 6 | ;; 7 | ;; Copyright (c) Microsoft Corporation. All rights reserved. 8 | ;; 9 | .386 10 | .model flat,C 11 | 12 | PUBLIC CodeTemplate 13 | PUBLIC CodeTemplate_End 14 | 15 | _TEXT SEGMENT 16 | 17 | CodeTemplate PROC 18 | nop 19 | nop 20 | nop 21 | mov eax, 0deadbeefh 22 | nop 23 | nop 24 | nop 25 | ret 26 | CodeTemplate_End:: 27 | CodeTemplate ENDP 28 | 29 | _TEXT ENDS 30 | 31 | END 32 | -------------------------------------------------------------------------------- /ext/detours/samples/echo/echofx.rc: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Version information for echofx.rc. 4 | // 5 | // Microsoft Research Detours Package 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | 10 | #include "detver.h" 11 | 12 | #define VER_INTERNALNAME_STR "echofx" DETOURS_STRINGIFY(DETOURS_BITS) 13 | #define VER_ORIGINALFILENAME_STR "echofx" DETOURS_STRINGIFY(DETOURS_BITS) ".dll" 14 | #define VER_FILEDESCRIPTION_STR "Detours Echo Interception Module" 15 | #define VER_COMPANYNAME_STR "Microsoft Corporation" 16 | 17 | #include "common.ver" 18 | -------------------------------------------------------------------------------- /ext/detours/samples/echo/echonul.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // 3 | // 4 | #include 5 | 6 | int WINAPI Echo(PCSTR pszMsg) 7 | { 8 | int sum = 0; 9 | while (*pszMsg) { 10 | sum = sum + *pszMsg++; 11 | } 12 | return sum; 13 | } 14 | 15 | int main() 16 | { 17 | return 0; 18 | } 19 | -------------------------------------------------------------------------------- /ext/detours/samples/echo/main.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // 3 | // 4 | #include 5 | 6 | int WINAPI Echo(PCSTR pszMsg); 7 | 8 | extern "C" int __stdcall mainCRTStartup(HINSTANCE hInstance, 9 | HINSTANCE hPrevInstance, 10 | LPSTR lpCmdLine, 11 | int nCmdShow 12 | ) 13 | { 14 | (void)hInstance; 15 | (void)hPrevInstance; 16 | (void)lpCmdLine; 17 | (void)nCmdShow; 18 | 19 | Echo("Hello World"); 20 | Echo("Goodbye World"); 21 | 22 | return 0x99; 23 | } 24 | 25 | -------------------------------------------------------------------------------- /ext/detours/samples/excep/firstexc.h: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Detours Test Program (firstexc.h of firstexc.exe) 4 | // 5 | // Microsoft Research Detours Package 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | 10 | #pragma once 11 | #ifndef _FIRSTEXC_H_ 12 | #define _FIRSTEXC_H_ 13 | 14 | /////////////////////////////////////////////// First Chance Exception Filter. 15 | // 16 | LPTOP_LEVEL_EXCEPTION_FILTER WINAPI 17 | DetourFirstChanceExceptionFilter(LPTOP_LEVEL_EXCEPTION_FILTER lpTopLevelFilter); 18 | 19 | #endif // _FIRSTEXC_H_ 20 | // 21 | //////////////////////////////////////////////////////////////// End of File. 22 | -------------------------------------------------------------------------------- /ext/detours/samples/findfunc/extend.rc: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Version information for extend.rc. 4 | // 5 | // Microsoft Research Detours Package 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | 10 | #include "detver.h" 11 | 12 | #define VER_INTERNALNAME_STR "extend" DETOURS_STRINGIFY(DETOURS_BITS) 13 | #define VER_ORIGINALFILENAME_STR "extend" DETOURS_STRINGIFY(DETOURS_BITS) ".dll" 14 | #define VER_FILEDESCRIPTION_STR "Detours Dyanmic Interception Test Module" 15 | #define VER_COMPANYNAME_STR "Microsoft Corporation" 16 | 17 | #include "common.ver" 18 | -------------------------------------------------------------------------------- /ext/detours/samples/findfunc/target.h: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Detour Test Program (target.h of target.dll) 4 | // 5 | // Microsoft Research Detours Package 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | #pragma once 10 | 11 | DWORD WINAPI Target(DWORD dwCount); 12 | 13 | // 14 | ///////////////////////////////////////////////////////////////// End of File. 15 | -------------------------------------------------------------------------------- /ext/detours/samples/findfunc/target.rc: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Version information for target.rc. 4 | // 5 | // Microsoft Research Detours Package 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | 10 | #include "detver.h" 11 | 12 | #define VER_INTERNALNAME_STR "target" DETOURS_STRINGIFY(DETOURS_BITS) 13 | #define VER_ORIGINALFILENAME_STR "target" DETOURS_STRINGIFY(DETOURS_BITS) ".dll" 14 | #define VER_FILEDESCRIPTION_STR "Detours Test Module" 15 | #define VER_COMPANYNAME_STR "Microsoft Corporation" 16 | 17 | #include "common.ver" 18 | -------------------------------------------------------------------------------- /ext/detours/samples/opengl/ogldet.rc: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Version information for ogldet.rc. 4 | // 5 | // Microsoft Research Detours Package 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | 10 | #include "detver.h" 11 | 12 | #define VER_INTERNALNAME_STR "ogldet" DETOURS_STRINGIFY(DETOURS_BITS) 13 | #define VER_ORIGINALFILENAME_STR "ogldet" DETOURS_STRINGIFY(DETOURS_BITS) ".dll" 14 | #define VER_FILEDESCRIPTION_STR "Detours Open GL Test Module" 15 | #define VER_COMPANYNAME_STR "Microsoft Corporation" 16 | 17 | #include "common.ver" 18 | -------------------------------------------------------------------------------- /ext/detours/samples/opengl/testogl.cpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // File: testogl.cpp 4 | // Module: testogl.exe (oglsimple.dll) 5 | // 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | int __cdecl main() 12 | { 13 | printf("testogl.exe: Starting\n"); 14 | fflush(stdout); 15 | 16 | glFinish(); 17 | 18 | printf("testogl.exe: done\n"); 19 | fflush(stdout); 20 | 21 | return 0; 22 | } 23 | // 24 | ///////////////////////////////////////////////////////////////// End of File. 25 | -------------------------------------------------------------------------------- /ext/detours/samples/simple/simple.rc: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Version information for simple.rc. 4 | // 5 | // Microsoft Research Detours Package 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | 10 | #include "detver.h" 11 | 12 | #define VER_INTERNALNAME_STR "simple" DETOURS_STRINGIFY(DETOURS_BITS) 13 | #define VER_ORIGINALFILENAME_STR "simple" DETOURS_STRINGIFY(DETOURS_BITS) ".dll" 14 | #define VER_FILEDESCRIPTION_STR "Detours Test Module" 15 | #define VER_COMPANYNAME_STR "Microsoft Corporation" 16 | 17 | #include "common.ver" 18 | -------------------------------------------------------------------------------- /ext/detours/samples/simple/sleep5.cpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Detours Test Program (sleep5.cpp of sleep5.exe) 4 | // 5 | // Microsoft Research Detours Package 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | int __cdecl main(int argc, char ** argv) 15 | { 16 | if (argc == 2) { 17 | Sleep(atoi(argv[1]) * 1000); 18 | } 19 | else { 20 | printf("sleep5.exe: Starting.\n"); 21 | 22 | Sleep(5000); 23 | 24 | printf("sleep5.exe: Done sleeping.\n"); 25 | } 26 | return 0; 27 | } 28 | // 29 | ///////////////////////////////////////////////////////////////// End of File. 30 | -------------------------------------------------------------------------------- /ext/detours/samples/slept/dslept.rc: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Version information for dslept.rc. 4 | // 5 | // Microsoft Research Detours Package 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | 10 | #include "detver.h" 11 | 12 | #define VER_INTERNALNAME_STR "dslept" DETOURS_STRINGIFY(DETOURS_BITS) 13 | #define VER_ORIGINALFILENAME_STR "dslept" DETOURS_STRINGIFY(DETOURS_BITS) ".dll" 14 | #define VER_FILEDESCRIPTION_STR "Detours Sleep Interception Module" 15 | #define VER_COMPANYNAME_STR "Microsoft Corporation" 16 | 17 | #include "common.ver" 18 | -------------------------------------------------------------------------------- /ext/detours/samples/slept/slept.h: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Detour Test Program (slept.h of slept.dll) 4 | // 5 | // Microsoft Research Detours Package 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | #pragma once 10 | 11 | DWORD WINAPI UntimedSleepEx(DWORD dwMilliseconds, BOOL bAlertable); 12 | DWORD WINAPI TimedSleepEx(DWORD dwMilliseconds, BOOL bAlertable); 13 | DWORD WINAPI GetSleptTicks(VOID); 14 | DWORD WINAPI TestTicks(VOID); 15 | DWORD WINAPI TestTicksEx(DWORD Add); 16 | 17 | // 18 | ///////////////////////////////////////////////////////////////// End of File. 19 | -------------------------------------------------------------------------------- /ext/detours/samples/slept/slept.rc: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Version information for sleep.rc. 4 | // 5 | // Microsoft Research Detours Package 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | 10 | #include "detver.h" 11 | 12 | #define VER_INTERNALNAME_STR "sleep" DETOURS_STRINGIFY(DETOURS_BITS) 13 | #define VER_ORIGINALFILENAME_STR "sleep" DETOURS_STRINGIFY(DETOURS_BITS) ".dll" 14 | #define VER_FILEDESCRIPTION_STR "Detours Sleep Test Module" 15 | #define VER_COMPANYNAME_STR "Microsoft Corporation" 16 | 17 | #include "common.ver" 18 | -------------------------------------------------------------------------------- /ext/detours/samples/talloc/tdll1x.cpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Detours Test Program (tdll1x.cpp of talloc.exe/tdll1x.dll) 4 | // 5 | // Microsoft Research Detours Package 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | 10 | //////////////////////////////////////////////////////////////////// DLL Stuff 11 | // 12 | __declspec(dllexport) unsigned long __stdcall Dll1Function(unsigned long Value) 13 | { 14 | return Value + 1; 15 | } 16 | 17 | ///////////////////////////////////////////////////////////////// End of File. 18 | -------------------------------------------------------------------------------- /ext/detours/samples/talloc/tdll2x.cpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Detours Test Program (tdll2x.cpp of talloc.exe/tdll2x.dll) 4 | // 5 | // Microsoft Research Detours Package 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | 10 | //////////////////////////////////////////////////////////////////// DLL Stuff 11 | // 12 | __declspec(dllexport) unsigned long __stdcall Dll2Function(unsigned long Value) 13 | { 14 | return Value + 1; 15 | } 16 | 17 | ///////////////////////////////////////////////////////////////// End of File. 18 | -------------------------------------------------------------------------------- /ext/detours/samples/talloc/tdll3x.cpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Detours Test Program (tdll3x.cpp of talloc.exe/tdll3x.dll) 4 | // 5 | // Microsoft Research Detours Package 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | 10 | //////////////////////////////////////////////////////////////////// DLL Stuff 11 | // 12 | __declspec(dllexport) unsigned long __stdcall Dll3Function(unsigned long Value) 13 | { 14 | return Value + 1; 15 | } 16 | 17 | ///////////////////////////////////////////////////////////////// End of File. 18 | -------------------------------------------------------------------------------- /ext/detours/samples/talloc/tdll4x.cpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Detours Test Program (tdll4x.cpp of talloc.exe/tdll4x.dll) 4 | // 5 | // Microsoft Research Detours Package 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | 10 | //////////////////////////////////////////////////////////////////// DLL Stuff 11 | // 12 | __declspec(dllexport) unsigned long __stdcall Dll4Function(unsigned long Value) 13 | { 14 | return Value + 1; 15 | } 16 | 17 | ///////////////////////////////////////////////////////////////// End of File. 18 | -------------------------------------------------------------------------------- /ext/detours/samples/talloc/tdll5x.cpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Detours Test Program (tdll5x.cpp of talloc.exe/tdll5x.dll) 4 | // 5 | // Microsoft Research Detours Package 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | 10 | //////////////////////////////////////////////////////////////////// DLL Stuff 11 | // 12 | __declspec(dllexport) unsigned long __stdcall Dll5Function(unsigned long Value) 13 | { 14 | return Value + 1; 15 | } 16 | 17 | ///////////////////////////////////////////////////////////////// End of File. 18 | -------------------------------------------------------------------------------- /ext/detours/samples/talloc/tdll6x.cpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Detours Test Program (tdll6x.cpp of talloc.exe/tdll6x.dll) 4 | // 5 | // Microsoft Research Detours Package 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | 10 | //////////////////////////////////////////////////////////////////// DLL Stuff 11 | // 12 | __declspec(dllexport) unsigned long __stdcall Dll6Function(unsigned long Value) 13 | { 14 | return Value + 1; 15 | } 16 | 17 | ///////////////////////////////////////////////////////////////// End of File. 18 | -------------------------------------------------------------------------------- /ext/detours/samples/talloc/tdll7x.cpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Detours Test Program (tdll7x.cpp of talloc.exe/tdll7x.dll) 4 | // 5 | // Microsoft Research Detours Package 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | 10 | //////////////////////////////////////////////////////////////////// DLL Stuff 11 | // 12 | __declspec(dllexport) unsigned long __stdcall Dll7Function(unsigned long Value) 13 | { 14 | return Value + 1; 15 | } 16 | 17 | ///////////////////////////////////////////////////////////////// End of File. 18 | -------------------------------------------------------------------------------- /ext/detours/samples/talloc/tdll8x.cpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Detours Test Program (tdll8x.cpp of talloc.exe/tdll8x.dll) 4 | // 5 | // Microsoft Research Detours Package 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | 10 | //////////////////////////////////////////////////////////////////// DLL Stuff 11 | // 12 | __declspec(dllexport) unsigned long __stdcall Dll8Function(unsigned long Value) 13 | { 14 | return Value + 1; 15 | } 16 | 17 | ///////////////////////////////////////////////////////////////// End of File. 18 | -------------------------------------------------------------------------------- /ext/detours/samples/talloc/tdll9x.cpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Detours Test Program (tdll9x.cpp of talloc.exe/tdll9x.dll) 4 | // 5 | // Microsoft Research Detours Package 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | 10 | //////////////////////////////////////////////////////////////////// DLL Stuff 11 | // 12 | __declspec(dllexport) unsigned long __stdcall Dll9Function(unsigned long Value) 13 | { 14 | return Value + 1; 15 | } 16 | 17 | ///////////////////////////////////////////////////////////////// End of File. 18 | -------------------------------------------------------------------------------- /ext/detours/samples/traceapi/trcapi.rc: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Version information for trcapi.rc. 4 | // 5 | // Microsoft Research Detours Package 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | 10 | #include "detver.h" 11 | 12 | #define VER_INTERNALNAME_STR "trcapi" DETOURS_STRINGIFY(DETOURS_BITS) 13 | #define VER_ORIGINALFILENAME_STR "trcapi" DETOURS_STRINGIFY(DETOURS_BITS) ".dll" 14 | #define VER_FILEDESCRIPTION_STR "Detours Win32 API Tracing Module" 15 | #define VER_COMPANYNAME_STR "Microsoft Corporation" 16 | 17 | #include "common.ver" 18 | -------------------------------------------------------------------------------- /ext/detours/samples/tracebld/trcbld.rc: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Version information for trcbld.rc. 4 | // 5 | // Microsoft Research Detours Package 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | 10 | #include "detver.h" 11 | 12 | #define VER_INTERNALNAME_STR "trcbld" DETOURS_STRINGIFY(DETOURS_BITS) 13 | #define VER_ORIGINALFILENAME_STR "trcbld" DETOURS_STRINGIFY(DETOURS_BITS) ".dll" 14 | #define VER_FILEDESCRIPTION_STR "Detours Build Tracing Module" 15 | #define VER_COMPANYNAME_STR "Microsoft Corporation" 16 | 17 | #include "common.ver" 18 | -------------------------------------------------------------------------------- /ext/detours/samples/tracelnk/trclnk.rc: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Version information for trclnk.rc. 4 | // 5 | // Microsoft Research Detours Package 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | 10 | #include "detver.h" 11 | 12 | #define VER_INTERNALNAME_STR "trclnk" DETOURS_STRINGIFY(DETOURS_BITS) 13 | #define VER_ORIGINALFILENAME_STR "trclnk" DETOURS_STRINGIFY(DETOURS_BITS) ".dll" 14 | #define VER_FILEDESCRIPTION_STR "Detours Dynamic Linking Trace Module" 15 | #define VER_COMPANYNAME_STR "Microsoft Corporation" 16 | 17 | #include "common.ver" 18 | -------------------------------------------------------------------------------- /ext/detours/samples/tracemem/trcmem.rc: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Version information for trcmem.rc. 4 | // 5 | // Microsoft Research Detours Package 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | 10 | #include "detver.h" 11 | 12 | #define VER_INTERNALNAME_STR "trcmem" DETOURS_STRINGIFY(DETOURS_BITS) 13 | #define VER_ORIGINALFILENAME_STR "trcmem" DETOURS_STRINGIFY(DETOURS_BITS) ".dll" 14 | #define VER_FILEDESCRIPTION_STR "Detours Memory Trace Module" 15 | #define VER_COMPANYNAME_STR "Microsoft Corporation" 16 | 17 | #include "common.ver" 18 | -------------------------------------------------------------------------------- /ext/detours/samples/tracereg/trcreg.rc: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Version information for trcreg.rc. 4 | // 5 | // Microsoft Research Detours Package 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | 10 | #include "detver.h" 11 | 12 | #define VER_INTERNALNAME_STR "trcreg" DETOURS_STRINGIFY(DETOURS_BITS) 13 | #define VER_ORIGINALFILENAME_STR "trcreg" DETOURS_STRINGIFY(DETOURS_BITS) ".dll" 14 | #define VER_FILEDESCRIPTION_STR "Detours Registry Trace Module" 15 | #define VER_COMPANYNAME_STR "Microsoft Corporation" 16 | 17 | #include "common.ver" 18 | -------------------------------------------------------------------------------- /ext/detours/samples/traceser/trcser.rc: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Version information for trcser.rc. 4 | // 5 | // Microsoft Research Detours Package 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | 10 | #include "detver.h" 11 | 12 | #define VER_INTERNALNAME_STR "trcser" DETOURS_STRINGIFY(DETOURS_BITS) 13 | #define VER_ORIGINALFILENAME_STR "trcsrc" DETOURS_STRINGIFY(DETOURS_BITS) ".dll" 14 | #define VER_FILEDESCRIPTION_STR "Detours Serial Trace Module" 15 | #define VER_COMPANYNAME_STR "Microsoft Corporation" 16 | 17 | #include "common.ver" 18 | -------------------------------------------------------------------------------- /ext/detours/samples/tracessl/trcssl.rc: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Version information for trcssl.rc. 4 | // 5 | // Microsoft Research Detours Package 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | 10 | #include "detver.h" 11 | 12 | #define VER_INTERNALNAME_STR "trcssl" DETOURS_STRINGIFY(DETOURS_BITS) 13 | #define VER_ORIGINALFILENAME_STR "trcsll" DETOURS_STRINGIFY(DETOURS_BITS) ".dll" 14 | #define VER_FILEDESCRIPTION_STR "Detours SSL Trace Module" 15 | #define VER_COMPANYNAME_STR "Microsoft Corporation" 16 | 17 | #include "common.ver" 18 | -------------------------------------------------------------------------------- /ext/detours/samples/tracetcp/trctcp.rc: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Version information for trctcp.rc. 4 | // 5 | // Microsoft Research Detours Package 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | 10 | #include "detver.h" 11 | 12 | #define VER_INTERNALNAME_STR "trctcp" DETOURS_STRINGIFY(DETOURS_BITS) 13 | #define VER_ORIGINALFILENAME_STR "trctcp" DETOURS_STRINGIFY(DETOURS_BITS) ".dll" 14 | #define VER_FILEDESCRIPTION_STR "Detours TCP Trace Module" 15 | #define VER_COMPANYNAME_STR "Microsoft Corporation" 16 | 17 | #include "common.ver" 18 | -------------------------------------------------------------------------------- /ext/detours/samples/tryman/tryman.cpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Detours Test Program (tryman.cpp of tryman.exe) 4 | // 5 | // Microsoft Research Detours Package 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | 10 | #include 11 | 12 | extern int WINAPI Test3264(int arg); 13 | 14 | int __cdecl main(int argc, char ** argv) 15 | { 16 | (void)argv; 17 | int ret = 0; 18 | 19 | ret = Test3264(argc); 20 | return ret == 0 ? ret : 0; 21 | } 22 | // 23 | ///////////////////////////////////////////////////////////////// End of File. 24 | -------------------------------------------------------------------------------- /ext/detours/samples/tryman/tstman.rc: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Version information for tstman.rc. 4 | // 5 | // Microsoft Research Detours Package 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | 10 | #include "detver.h" 11 | 12 | #define VER_INTERNALNAME_STR "tstman" DETOURS_STRINGIFY(DETOURS_BITS) 13 | #define VER_ORIGINALFILENAME_STR "tstman" DETOURS_STRINGIFY(DETOURS_BITS) ".dll" 14 | #define VER_FILEDESCRIPTION_STR "Detours 32/64-bit Test Module" 15 | #define VER_COMPANYNAME_STR "Microsoft Corporation" 16 | 17 | #include "common.ver" 18 | -------------------------------------------------------------------------------- /ext/detours/src/detver.h: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Common version parameters. 4 | // 5 | // Microsoft Research Detours Package, Version 4.0.1 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | 10 | #define _USING_V110_SDK71_ 1 11 | #include "winver.h" 12 | #if 0 13 | #include 14 | #include 15 | #else 16 | #ifndef DETOURS_STRINGIFY 17 | #define DETOURS_STRINGIFY_(x) #x 18 | #define DETOURS_STRINGIFY(x) DETOURS_STRINGIFY_(x) 19 | #endif 20 | 21 | #define VER_FILEFLAGSMASK 0x3fL 22 | #define VER_FILEFLAGS 0x0L 23 | #define VER_FILEOS 0x00040004L 24 | #define VER_FILETYPE 0x00000002L 25 | #define VER_FILESUBTYPE 0x00000000L 26 | #endif 27 | #define VER_DETOURS_BITS DETOURS_STRINGIFY(DETOURS_BITS) 28 | -------------------------------------------------------------------------------- /ext/detours/src/disolarm.cpp: -------------------------------------------------------------------------------- 1 | #define DETOURS_ARM_OFFLINE_LIBRARY 2 | #include "disasm.cpp" 3 | -------------------------------------------------------------------------------- /ext/detours/src/disolarm64.cpp: -------------------------------------------------------------------------------- 1 | #define DETOURS_ARM64_OFFLINE_LIBRARY 2 | #include "disasm.cpp" 3 | -------------------------------------------------------------------------------- /ext/detours/src/disolia64.cpp: -------------------------------------------------------------------------------- 1 | #define DETOURS_IA64_OFFLINE_LIBRARY 2 | #include "disasm.cpp" 3 | -------------------------------------------------------------------------------- /ext/detours/src/disolx64.cpp: -------------------------------------------------------------------------------- 1 | #define DETOURS_X64_OFFLINE_LIBRARY 2 | #include "disasm.cpp" 3 | -------------------------------------------------------------------------------- /ext/detours/src/disolx86.cpp: -------------------------------------------------------------------------------- 1 | #define DETOURS_X86_OFFLINE_LIBRARY 2 | #include "disasm.cpp" 3 | -------------------------------------------------------------------------------- /ext/detours/tests/main.cpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Unit Test Main (main.cpp of unittests.exe) 4 | // 5 | // Microsoft Research Detours Package 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | #define CATCH_CONFIG_MAIN 10 | #include "catch.hpp" 11 | -------------------------------------------------------------------------------- /ext/detours/tests/test_image_api.cpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Unit Tests for Detours Image API (test_image_api.cpp of unittests.exe) 4 | // 5 | // Microsoft Research Detours Package 6 | // 7 | // Copyright (c) Microsoft Corporation. All rights reserved. 8 | // 9 | #include "catch.hpp" 10 | #include "windows.h" 11 | #include "detours.h" 12 | 13 | TEST_CASE("DetourBinaryOpen", "[image]") 14 | { 15 | SECTION("Passing INVALID_HANDLE, results in error") 16 | { 17 | auto binary = DetourBinaryOpen(INVALID_HANDLE_VALUE); 18 | REQUIRE( GetLastError() == ERROR_INVALID_HANDLE ); 19 | REQUIRE( binary == nullptr ); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /ext/llvm-sys.rs/.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo. 2 | /target 3 | Cargo.lock 4 | -------------------------------------------------------------------------------- /ext/llvm-sys.rs/.gitlab-ci.yml: -------------------------------------------------------------------------------- 1 | image: rust:buster 2 | 3 | variables: 4 | LLVM_SYS_150_FFI_WORKAROUND: "YES" 5 | 6 | before_script: 7 | - apt-get update -qq && apt-get install -qq -y lsb-release software-properties-common 8 | - wget https://apt.llvm.org/llvm.sh 9 | - chmod +x llvm.sh 10 | - ./llvm.sh 15 11 | - apt-get install libpolly-15-dev 12 | 13 | test: 14 | script: 15 | - cargo build 16 | - cargo test 17 | - cargo run --example nop-function 18 | - cargo run --example jit-function 19 | - echo "Hello, world!" | cargo run --example disassembler 20 | -------------------------------------------------------------------------------- /ext/llvm-sys.rs/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | description = "Bindings to LLVM's C API" 3 | repository = "https://gitlab.com/taricorp/llvm-sys.rs" 4 | readme = "README.md" 5 | license = "MIT" 6 | keywords = ["bindings", "llvm"] 7 | categories = ["external-ffi-bindings"] 8 | links = "llvm-15" 9 | name = "llvm-sys" 10 | version = "150.1.2" 11 | authors = [ 12 | "Peter Marheine ", 13 | ] 14 | build = "build.rs" 15 | 16 | [dependencies] 17 | libc = "0.2" 18 | 19 | [build-dependencies] 20 | cmake = "0.1" 21 | convert_case = "0.5" 22 | -------------------------------------------------------------------------------- /ext/llvm-sys.rs/appveyor.yml: -------------------------------------------------------------------------------- 1 | version: 1.0.{build} 2 | 3 | image: macos 4 | install: 5 | - sh: >- 6 | brew install llvm 7 | - sh: >- 8 | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs -o rustup.sh && sh rustup.sh -y 9 | build: off 10 | test_script: 11 | - sh: | 12 | export PATH=/usr/local/bin:$PATH 13 | source ~/.cargo/env 14 | export LLVM_SYS_110_PREFIX=/usr/local/opt/llvm 15 | export LLVM_CONFIG_PATH=${LLVM_SYS_110_PREFIX}/bin/llvm-config 16 | 17 | cargo test 18 | cargo run --example nop-function 19 | cargo run --example jit-function 20 | echo "Hello, world!" | cargo run --example disassembler 21 | -------------------------------------------------------------------------------- /ext/llvm-sys.rs/build.cmake: -------------------------------------------------------------------------------- 1 | cmake_policy(SET CMP0091 NEW) 2 | set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDLL") 3 | -------------------------------------------------------------------------------- /ext/llvm-sys.rs/scripts/build-binaries.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e 3 | 4 | if [ $? -lt 1 ] 5 | then 6 | echo "Usage: $0 " >&2 7 | echo "Example: $0 3.9.1" >&2 8 | exit 1 9 | fi 10 | 11 | VERSION=$1 12 | 13 | # Dependencies (for Ubuntu): 14 | # * wget 15 | # * xz-utils 16 | # * ninja-build 17 | # * cmake 18 | # * build-essential 19 | # * python 20 | 21 | wget http://releases.llvm.org/$VERSION/llvm-$VERSION.src.tar.xz 22 | tar xJf llvm-$VERSION.src.tar.xz 23 | mkdir build llvm-$VERSION 24 | cd build 25 | cmake -G Ninja ../llvm-$VERSION.src -DLLVM_TARGETS_TO_BUILD=X86 -DCMAKE_BUILD_TYPE=MinSizeRel -DLLVM_ENABLE_ASSERTIONS=ON -DCMAKE_INSTALL_PREFIX=/usr/local/llvm-$VERSION -DCMAKE_INSTALL_UTILS 26 | cmake --build . --target install 27 | cd .. 28 | tar cJf llvm-$VERSION.linux.tar.xz /usr/local/llvm-$VERSION 29 | 30 | # Additional flags for MSVC 31 | # (CXX) /GL /Gy /Gw 32 | # (link) /LTCG /OPT:REF,ICF 33 | -------------------------------------------------------------------------------- /ext/llvm-sys.rs/src/bit_writer.rs: -------------------------------------------------------------------------------- 1 | //! Output of the LLVM bitcode format. 2 | 3 | use super::prelude::*; 4 | 5 | extern "C" { 6 | /// Write a module to the specified path. 7 | /// 8 | /// Returns 0 on success. 9 | pub fn LLVMWriteBitcodeToFile(M: LLVMModuleRef, Path: *const ::libc::c_char) -> ::libc::c_int; 10 | /// Write a module to an open file descriptor. 11 | /// 12 | /// Returns 0 on success. 13 | pub fn LLVMWriteBitcodeToFD( 14 | M: LLVMModuleRef, 15 | FD: ::libc::c_int, 16 | ShouldClose: ::libc::c_int, 17 | Unbuffered: ::libc::c_int, 18 | ) -> ::libc::c_int; 19 | /// Deprecated: use LLVMWriteBitcodeToFD 20 | pub fn LLVMWriteBitcodeToFileHandle(M: LLVMModuleRef, Handle: ::libc::c_int) -> ::libc::c_int; 21 | /// Writes a module to a new memory buffer. 22 | pub fn LLVMWriteBitcodeToMemoryBuffer(M: LLVMModuleRef) -> LLVMMemoryBufferRef; 23 | } 24 | -------------------------------------------------------------------------------- /ext/llvm-sys.rs/src/error.rs: -------------------------------------------------------------------------------- 1 | pub const LLVMErrorSuccess: ::libc::c_int = 0; 2 | 3 | #[derive(Debug)] 4 | pub enum LLVMOpaqueError {} 5 | 6 | pub type LLVMErrorRef = *mut LLVMOpaqueError; 7 | 8 | pub type LLVMErrorTypeId = *const ::libc::c_void; 9 | 10 | extern "C" { 11 | pub fn LLVMGetErrorTypeId(Err: LLVMErrorRef) -> LLVMErrorTypeId; 12 | pub fn LLVMConsumeError(Err: LLVMErrorRef); 13 | pub fn LLVMGetErrorMessage(Err: LLVMErrorRef) -> *mut ::libc::c_char; 14 | pub fn LLVMDisposeErrorMessage(ErrMsg: *mut ::libc::c_char); 15 | pub fn LLVMGetStringErrorTypeId() -> LLVMErrorTypeId; 16 | /// Create a StringError. 17 | pub fn LLVMCreateStringError(ErrMst: *const ::libc::c_char) -> LLVMErrorRef; 18 | } 19 | -------------------------------------------------------------------------------- /ext/llvm-sys.rs/src/error_handling.rs: -------------------------------------------------------------------------------- 1 | pub type LLVMFatalErrorHandler = Option; 2 | 3 | extern "C" { 4 | /// Install a fatal error handler. 5 | /// 6 | /// LLVM will call `exit(1)` if it detects a fatal error. A callback 7 | /// registered with this function will be invoked before the program is 8 | /// exited. 9 | pub fn LLVMInstallFatalErrorHandler(Handler: LLVMFatalErrorHandler); 10 | /// Reset fatal error handling to the default. 11 | pub fn LLVMResetFatalErrorHandler(); 12 | /// Enable LLVM's build-in stack trace code. 13 | /// 14 | /// This intercepts the OS's crash signals and prints which component 15 | /// of LLVM you were in at the time of the crash. 16 | pub fn LLVMEnablePrettyStackTrace(); 17 | } 18 | -------------------------------------------------------------------------------- /ext/llvm-sys.rs/src/ir_reader.rs: -------------------------------------------------------------------------------- 1 | //! The IR reader 2 | 3 | use super::prelude::*; 4 | 5 | extern "C" { 6 | /// Read LLVM IR from a memory buffer and convert it to an in-memory Module. 7 | /// 8 | /// Returns 0 on success, and an optional human-readable description of any 9 | /// errors that occurred. 10 | pub fn LLVMParseIRInContext( 11 | ContextRef: LLVMContextRef, 12 | MemBuf: LLVMMemoryBufferRef, 13 | OutM: *mut LLVMModuleRef, 14 | OutMessage: *mut *mut ::libc::c_char, 15 | ) -> LLVMBool; 16 | } 17 | -------------------------------------------------------------------------------- /ext/llvm-sys.rs/src/linker.rs: -------------------------------------------------------------------------------- 1 | //! The module/file/archive linker 2 | 3 | use super::prelude::*; 4 | 5 | #[repr(C)] 6 | #[derive(Debug)] 7 | pub enum LLVMLinkerMode { 8 | LLVMLinkerDestroySource = 0, 9 | #[deprecated(since = "3.7.0", note = "LLVMLinkerPreserveSource has no effect")] 10 | LLVMLinkerPreserveSource_Removed = 1, 11 | } 12 | 13 | extern "C" { 14 | /// Link the source module into the destination module. 15 | /// 16 | /// Destroys the source module, returns true on error. Use the diagnostic 17 | /// handler to get any diagnostic message. 18 | pub fn LLVMLinkModules2(Dest: LLVMModuleRef, Src: LLVMModuleRef) -> LLVMBool; 19 | } 20 | -------------------------------------------------------------------------------- /ext/llvm-sys.rs/src/orc2/ee.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | extern "C" { 4 | pub fn LLVMOrcCreateRTDyldObjectLinkingLayerWithSectionMemoryManager( 5 | ES: LLVMOrcExecutionSessionRef, 6 | ) -> LLVMOrcObjectLayerRef; 7 | pub fn LLVMOrcRTDyldObjectLinkingLayerRegisterJITEventListener( 8 | RTDyldObjLinkingLayer: LLVMOrcObjectLayerRef, 9 | Listener: LLVMJITEventListenerRef, 10 | ); 11 | } 12 | -------------------------------------------------------------------------------- /ext/llvm-sys.rs/src/support.rs: -------------------------------------------------------------------------------- 1 | use super::prelude::*; 2 | 3 | extern "C" { 4 | pub fn LLVMLoadLibraryPermanently(Filename: *const ::libc::c_char) -> LLVMBool; 5 | pub fn LLVMParseCommandLineOptions( 6 | argc: ::libc::c_int, 7 | argv: *const *const ::libc::c_char, 8 | Overview: *const ::libc::c_char, 9 | ); 10 | /// Search all previously loaded dynamic libraries for the named symbol. 11 | /// 12 | /// Returns its address if found, otherwise null. 13 | /// 14 | /// Added in LLVM 3.7. 15 | pub fn LLVMSearchForAddressOfSymbol(symbolName: *const ::libc::c_char) -> *mut ::libc::c_void; 16 | /// Permanently add the named symbol with the provided value. 17 | /// 18 | /// Symbols added this way are searched before any libraries. 19 | /// 20 | /// Added in LLVM 3.7. 21 | pub fn LLVMAddSymbol(symbolName: *const ::libc::c_char, symbolValue: *mut ::libc::c_void); 22 | } 23 | -------------------------------------------------------------------------------- /ext/llvm-sys.rs/src/transforms.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | // Util 4 | extern "C" { 5 | pub fn LLVMAddLowerSwitchPass(PM: LLVMPassManagerRef ); 6 | 7 | pub fn LLVMAddPromoteMemoryToRegisterPass(PM: LLVMPassManagerRef ); 8 | } 9 | -------------------------------------------------------------------------------- /ext/llvm-sys.rs/src/transforms/aggressive_instcombine.rs: -------------------------------------------------------------------------------- 1 | use prelude::*; 2 | 3 | extern "C" { 4 | pub fn LLVMAddAggressiveInstCombinerPass(PM: LLVMPassManagerRef); 5 | } 6 | -------------------------------------------------------------------------------- /ext/llvm-sys.rs/src/transforms/instcombine.rs: -------------------------------------------------------------------------------- 1 | use super::super::prelude::*; 2 | 3 | extern "C" { 4 | pub fn LLVMAddInstructionCombiningPass(PM: LLVMPassManagerRef); 5 | } 6 | -------------------------------------------------------------------------------- /ext/llvm-sys.rs/src/transforms/util.rs: -------------------------------------------------------------------------------- 1 | use super::super::prelude::*; 2 | 3 | extern "C" { 4 | pub fn LLVMAddLowerSwitchPass(PM: LLVMPassManagerRef); 5 | 6 | pub fn LLVMAddPromoteMemoryToRegisterPass(PM: LLVMPassManagerRef); 7 | 8 | pub fn LLVMAddAddDiscriminatorsPass(PM: LLVMPassManagerRef); 9 | } 10 | -------------------------------------------------------------------------------- /ext/llvm-sys.rs/src/transforms/vectorize.rs: -------------------------------------------------------------------------------- 1 | //! Vectorization transformations of LLVM IR. 2 | 3 | use super::super::prelude::*; 4 | 5 | extern "C" { 6 | pub fn LLVMAddLoopVectorizePass(PM: LLVMPassManagerRef); 7 | pub fn LLVMAddSLPVectorizePass(PM: LLVMPassManagerRef); 8 | } 9 | -------------------------------------------------------------------------------- /hip_common/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "hip_common" 3 | version = "0.0.0" 4 | authors = ["Andrzej Janik "] 5 | edition = "2018" 6 | 7 | [lib] 8 | 9 | [features] 10 | rocm5 = ["hip_runtime-sys/rocm5"] 11 | 12 | [dependencies] 13 | const_format = "0.2.30" 14 | hip_runtime-sys = { path = "../hip_runtime-sys" } 15 | cuda_types = { path = "../cuda_types" } 16 | rusqlite = { version = "0.28.0", features = ["bundled", "serde_json"] } 17 | sha2 = "0.10.2" 18 | itertools = "0.10.5" 19 | capnp = "0.17.2" 20 | rustc-hash = "1.1" 21 | goblin = { version = "0.5.1", default-features = false, features = ["elf64", "elf32", "endian_fd"] } 22 | memchr = "2.5.0" 23 | libloading = "0.8" 24 | 25 | [build-dependencies] 26 | capnpc = "0.17.2" 27 | -------------------------------------------------------------------------------- /hip_common/src/raytracing.rs: -------------------------------------------------------------------------------- 1 | use rustc_hash::FxHashMap; 2 | use std::{alloc::Layout, ffi::CString}; 3 | 4 | #[derive(Clone)] 5 | pub struct VariablesBlock { 6 | pub variables: FxHashMap, 7 | pub layout: Layout, 8 | } 9 | 10 | impl VariablesBlock { 11 | pub fn empty() -> Self { 12 | Self { 13 | variables: FxHashMap::default(), 14 | layout: Layout::new::<()>(), 15 | } 16 | } 17 | } 18 | 19 | #[derive(Clone, PartialEq, Eq)] 20 | pub struct Variable { 21 | pub size: u32, 22 | pub offset: u32, 23 | pub default_value: Vec, 24 | } 25 | -------------------------------------------------------------------------------- /hip_common/src/zluda.capnp: -------------------------------------------------------------------------------- 1 | @0xbefb36a5417c8ae1; 2 | 3 | struct Metadata { 4 | # We can turn it into an union later: "a field can be replaced with a group 5 | # or union containing an equivalent field and some new fields" 6 | version1 @0 :Version1; 7 | } 8 | 9 | struct Version1 { 10 | kernels @0 :List(Kernel); 11 | smVersion @1 :UInt32; 12 | } 13 | 14 | struct Kernel { 15 | name @0 :Text; 16 | minGoupSize @1 :UInt32; 17 | maxGroupSize @2 :UInt32; 18 | } 19 | -------------------------------------------------------------------------------- /hip_common/src/zluda_rt6.capnp: -------------------------------------------------------------------------------- 1 | @0xb631f7be1b968e02; 2 | 3 | struct Metadata { 4 | # We can turn it into an union later: "a field can be replaced with a group 5 | # or union containing an equivalent field and some new fields" 6 | version1 @0 :Version1; 7 | } 8 | 9 | struct Version1 { 10 | attributesSize @0 :UInt32; 11 | attributesAlign @1 :UInt32; 12 | attributes @2 :List(Attribute); 13 | variablesSize @3 :UInt32; 14 | variablesAlign @4 :UInt32; 15 | variables @5 :List(GlobalVariable); 16 | isCallable @6 :Bool; 17 | } 18 | 19 | struct Attribute { 20 | name @0 :Text; 21 | offset @1 :UInt32; 22 | size @2 :UInt32; 23 | } 24 | 25 | struct GlobalVariable { 26 | name @0 :Text; 27 | offset @1 :UInt32; 28 | size @2 :UInt32; 29 | defaultValue @3 :Data; 30 | } 31 | -------------------------------------------------------------------------------- /hip_runtime-sys/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "hip_runtime-sys" 3 | version = "0.0.0" 4 | authors = ["Andrzej Janik "] 5 | edition = "2018" 6 | links = "amdhip" 7 | 8 | [features] 9 | rocm5 = [] 10 | 11 | [lib] 12 | 13 | [dependencies] 14 | rustc-hash = "1.1" -------------------------------------------------------------------------------- /hip_runtime-sys/README: -------------------------------------------------------------------------------- 1 | bindgen $Env:HIP_PATH/include/hip/hip_runtime_api.h -o src/hip_runtime_api.rs --no-layout-tests --default-enum-style=newtype --allowlist-function "hip.*" --allowlist-type "hip.*" --no-derive-debug --must-use-type hipError_t --new-type-alias "^hipDeviceptr_t$" --allowlist-var "^hip.*$" -- -I"$Env:HIP_PATH/include" -D__HIP_PLATFORM_AMD__ -------------------------------------------------------------------------------- /hip_runtime-sys/build.rs: -------------------------------------------------------------------------------- 1 | use std::env::VarError; 2 | use std::{env, path::PathBuf}; 3 | 4 | fn main() -> Result<(), VarError> { 5 | println!("cargo:rustc-link-lib=dylib=amdhip64"); 6 | if cfg!(windows) { 7 | let env = env::var("CARGO_CFG_TARGET_ENV")?; 8 | if env == "msvc" { 9 | let mut path = PathBuf::from(env::var("HIP_PATH")?); 10 | path.push("lib"); 11 | println!("cargo:rustc-link-search=native={}", path.display()); 12 | } else { 13 | println!("cargo:rustc-link-search=native=C:\\Windows\\System32"); 14 | }; 15 | } else { 16 | println!("cargo:rustc-link-search=native=/opt/rocm/lib/"); 17 | } 18 | Ok(()) 19 | } 20 | -------------------------------------------------------------------------------- /hipblaslt-sys/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "hipblaslt-sys" 3 | version = "0.0.0" 4 | authors = ["Andrzej Janik "] 5 | edition = "2018" 6 | links = "hipblaslt" 7 | 8 | [lib] -------------------------------------------------------------------------------- /hipblaslt-sys/README: -------------------------------------------------------------------------------- 1 | bindgen $Env:HIP_PATH/include/hipblaslt/hipblaslt.h -o src/hipblaslt.rs --no-layout-tests --default-enum-style=newtype --allowlist-function "^hipblasLt.*" --allowlist-type "^hipblasLt.*" --no-derive-debug --must-use-type hiprtError -- -I"$Env:HIP_PATH/include" -D__HIP_PLATFORM_AMD__ -D__HIP_PLATFORM_HCC__ -x c++ 2 | -------------------------------------------------------------------------------- /hipblaslt-sys/build.rs: -------------------------------------------------------------------------------- 1 | use std::env::VarError; 2 | use std::{env, path::PathBuf}; 3 | 4 | fn main() -> Result<(), VarError> { 5 | println!("cargo:rustc-link-lib=dylib=hipblaslt"); 6 | if cfg!(windows) { 7 | let env = env::var("CARGO_CFG_TARGET_ENV")?; 8 | if env == "msvc" { 9 | let mut path = PathBuf::from(env::var("HIP_PATH")?); 10 | path.push("lib"); 11 | println!("cargo:rustc-link-search=native={}", path.display()); 12 | } else { 13 | println!("cargo:rustc-link-search=native=C:\\Windows\\System32"); 14 | }; 15 | } else { 16 | println!("cargo:rustc-link-search=native=/opt/rocm/lib/"); 17 | } 18 | Ok(()) 19 | } 20 | -------------------------------------------------------------------------------- /hipblaslt-sys/src/lib.rs: -------------------------------------------------------------------------------- 1 | #[allow(warnings)] 2 | mod hipblaslt; 3 | pub use hipblaslt::*; 4 | 5 | impl hipblasOperation_t { 6 | pub const HIPBLAS_OP_N: hipblasOperation_t = hipblasOperation_t(111); 7 | } 8 | impl hipblasOperation_t { 9 | pub const HIPBLAS_OP_T: hipblasOperation_t = hipblasOperation_t(112); 10 | } 11 | impl hipblasOperation_t { 12 | pub const HIPBLAS_OP_C: hipblasOperation_t = hipblasOperation_t(113); 13 | } 14 | #[allow(non_camel_case_types)] 15 | #[repr(transparent)] 16 | #[derive(Copy, Clone, Hash, PartialEq, Eq)] 17 | pub struct hipblasOperation_t(pub ::std::os::raw::c_int); 18 | -------------------------------------------------------------------------------- /hipfft-sys/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "hipfft-sys" 3 | version = "0.0.0" 4 | authors = ["Andrzej Janik "] 5 | edition = "2018" 6 | links = "hipfft" 7 | 8 | [lib] -------------------------------------------------------------------------------- /hipfft-sys/README: -------------------------------------------------------------------------------- 1 | bindgen $Env:HIP_PATH/include/hipfft/hipfft.h -o src/hipfft.rs --no-layout-tests --default-enum-style=newtype --no-derive-debug --allowlist-function "hipfft.*" --must-use-type hipfftResult_t -- -I"$Env:HIP_PATH/include" -D__HIP_PLATFORM_AMD__ -------------------------------------------------------------------------------- /hipfft-sys/build.rs: -------------------------------------------------------------------------------- 1 | use std::env::VarError; 2 | use std::{env, path::PathBuf}; 3 | 4 | fn main() -> Result<(), VarError> { 5 | println!("cargo:rustc-link-lib=dylib=hipfft"); 6 | if cfg!(windows) { 7 | let mut path = PathBuf::from(env::var("HIP_PATH")?); 8 | path.push("lib"); 9 | println!("cargo:rustc-link-search=native={}", path.display()); 10 | } else { 11 | println!("cargo:rustc-link-search=native=/opt/rocm/lib/"); 12 | } 13 | Ok(()) 14 | } 15 | -------------------------------------------------------------------------------- /hipfft-sys/src/lib.rs: -------------------------------------------------------------------------------- 1 | #[allow(warnings)] 2 | mod hipfft; 3 | pub use hipfft::*; -------------------------------------------------------------------------------- /hiprt-sys/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "hiprt-sys" 3 | version = "0.0.0" 4 | authors = ["Andrzej Janik "] 5 | edition = "2018" 6 | 7 | [lib] 8 | 9 | [dependencies] 10 | libloading = "0.8" 11 | 12 | [target.'cfg(windows)'.dependencies] 13 | winapi = { version = "0.3", features = ["libloaderapi", "std"] } 14 | widestring = "1.0" 15 | -------------------------------------------------------------------------------- /hiprt-sys/Makefile.toml: -------------------------------------------------------------------------------- 1 | [tasks.bindgen] 2 | command = "bindgen" 3 | args = [ 4 | "include/hiprt.h", 5 | "-o", "src/hiprt.rs", 6 | "--rust-target", "1.64", 7 | "--no-layout-tests", 8 | "--no-derive-debug", 9 | "--default-enum-style=newtype", 10 | "--dynamic-loading", "HipRt", 11 | "--must-use-type", "hiprtError", 12 | "--allowlist-function", "hiprt.*", 13 | "--allowlist-type", "hiprt.*", 14 | "--allowlist-var", "^HIPRT.*$", 15 | "--", "-I", "include", "-x", "c++", 16 | ] -------------------------------------------------------------------------------- /hiprt-sys/lib/hiprt64.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lshqqytiger/ZLUDA/5e717459179dc272b7d7d23391f0fad66c7459cf/hiprt-sys/lib/hiprt64.dll -------------------------------------------------------------------------------- /hiprt-sys/lib/hiprt64.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lshqqytiger/ZLUDA/5e717459179dc272b7d7d23391f0fad66c7459cf/hiprt-sys/lib/hiprt64.lib -------------------------------------------------------------------------------- /hiprt-sys/lib/libhiprt64.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lshqqytiger/ZLUDA/5e717459179dc272b7d7d23391f0fad66c7459cf/hiprt-sys/lib/libhiprt64.so -------------------------------------------------------------------------------- /miopen-sys/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "miopen-sys" 3 | version = "0.0.0" 4 | authors = ["Andrzej Janik "] 5 | edition = "2018" 6 | links = "MIOpen" 7 | 8 | [lib] 9 | -------------------------------------------------------------------------------- /miopen-sys/README: -------------------------------------------------------------------------------- 1 | bindgen $Env:HIP_PATH/include/miopen/miopen.h -o src/miopen.rs --no-layout-tests --default-enum-style=newtype --no-derive-debug --allowlist-function "miopen.*" --allowlist-var "MIOPEN_*" --must-use-type miopenStatus_t -- -D__HIP_PLATFORM_AMD__ -DMIOPEN_BACKEND_HIP=1 -DMIOPEN_BETA_API=1 -I"$Env:HIP_PATH/include" -x c++ -------------------------------------------------------------------------------- /miopen-sys/build.rs: -------------------------------------------------------------------------------- 1 | use std::env::VarError; 2 | use std::{env, path::PathBuf}; 3 | 4 | fn main() -> Result<(), VarError> { 5 | println!("cargo:rustc-link-lib=dylib=MIOpen"); 6 | if cfg!(windows) { 7 | let env = env::var("CARGO_CFG_TARGET_ENV")?; 8 | if env == "msvc" { 9 | let mut path = PathBuf::from(env::var("HIP_PATH")?); 10 | path.push("lib"); 11 | println!("cargo:rustc-link-search=native={}", path.display()); 12 | } else { 13 | println!("cargo:rustc-link-search=native=C:\\Windows\\System32"); 14 | }; 15 | } else { 16 | println!("cargo:rustc-link-search=native=/opt/rocm/lib/"); 17 | } 18 | Ok(()) 19 | } 20 | -------------------------------------------------------------------------------- /miopen-sys/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![allow(warnings)] 2 | mod extra; 3 | mod miopen; 4 | 5 | pub use miopen::*; 6 | -------------------------------------------------------------------------------- /offline_compiler/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "offline_compiler" 3 | version = "0.0.0" 4 | authors = ["Andrzej Janik "] 5 | edition = "2018" 6 | 7 | [[bin]] 8 | name = "zoc" 9 | path = "src/main.rs" 10 | 11 | [features] 12 | rocm5 = ["comgr/rocm5", "hip_common/rocm5", "hip_runtime-sys/rocm5", "ptx/rocm5"] 13 | 14 | [dependencies] 15 | comgr = { path = "../comgr" } 16 | hip_common = { path = "../hip_common" } 17 | hiprt-sys = { path = "../hiprt-sys" } 18 | hip_runtime-sys = { path = "../hip_runtime-sys" } 19 | ptx = { path = "../ptx" } 20 | argh = "0.1" 21 | libloading = "0.8" 22 | 23 | [package.metadata.zluda] 24 | debug_only = true 25 | -------------------------------------------------------------------------------- /optix_base/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "optix_base" 3 | version = "0.0.0" 4 | authors = ["Andrzej Janik "] 5 | edition = "2018" 6 | 7 | [lib] 8 | proc-macro = true 9 | 10 | [dependencies] 11 | quote = "1.0" 12 | syn = { version = "1.0.93", features = ["full", "visit", "visit-mut"] } 13 | proc-macro2 = "1.0" 14 | rustc-hash = "1.1" 15 | -------------------------------------------------------------------------------- /optix_base/README: -------------------------------------------------------------------------------- 1 | bindgen include/wrapper.hpp -o src/optix.rs --no-layout-tests --size_t-is-usize --default-enum-style=newtype --no-derive-debug --whitelist-type="Optix.*" --whitelist-function "optix.*" --whitelist-var "OPTIX.*" -- -I"F:\dev\OptiX SDK 7.4.0\include" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.5\include 2 | bindgen include/wrapper6.hpp -o src/optix6.rs --new-type-alias=RTobject --no-layout-tests --size_t-is-usize --default-enum-style=newtype --no-derive-debug --whitelist-type="RT.*" --whitelist-function "rt.*" --whitelist-var "RT.*" -- -I"F:\dev\OptiX SDK 6.5.0\include" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.5\include" "-D__int64=long long" 3 | -------------------------------------------------------------------------------- /optix_base/include/wrapper.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | -------------------------------------------------------------------------------- /optix_base/include/wrapper6.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | -------------------------------------------------------------------------------- /optix_dump/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "optix_dump" 3 | version = "0.0.0" 4 | authors = ["Andrzej Janik "] 5 | edition = "2018" 6 | 7 | [lib] 8 | name = "optix_dump" 9 | crate-type = ["cdylib"] 10 | 11 | [dependencies] 12 | cuda_types = { path = "../cuda_types" } 13 | optix_base = { path = "../optix_base" } 14 | wmi = "0.9" 15 | winapi = { version = "0.3", features = ["libloaderapi", "std"] } 16 | lazy_static = "1.4.0" 17 | paste = "1.0.7" 18 | sha2 = "0.10.2" 19 | generic-array = "0.14.5" 20 | typenum = "1.15.0" 21 | 22 | [package.metadata.zluda] 23 | debug_only = true 24 | windows_only = true 25 | broken = true 26 | -------------------------------------------------------------------------------- /optix_dump/README: -------------------------------------------------------------------------------- 1 | bindgen include/wrapper.hpp -o src/optix.rs --no-layout-tests --size_t-is-usize --default-enum-style=newtype --no-derive-debug --whitelist-type="Optix.*" --whitelist-function "optix.*" --whitelist-var "OPTIX.*" -- -I"F:\dev\OptiX SDK 7.4.0\include" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.5\include 2 | -------------------------------------------------------------------------------- /optix_dump/include/wrapper.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | -------------------------------------------------------------------------------- /optix_types/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "optix_types" 3 | version = "0.0.0" 4 | authors = ["Andrzej Janik "] 5 | edition = "2018" 6 | 7 | [lib] 8 | 9 | [dependencies] 10 | cuda_types = { path = "../cuda_types" } 11 | optix_base = { path = "../optix_base" } 12 | -------------------------------------------------------------------------------- /optix_types/src/lib.rs: -------------------------------------------------------------------------------- 1 | 2 | optix_base::optix6_type_declarations!(RTformat, RTresult); -------------------------------------------------------------------------------- /process_address_table/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "process_address_table" 3 | version = "0.0.0" 4 | authors = ["Andrzej Janik "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | detours-sys = { path = "../detours-sys" } 9 | libloading = "0.8" 10 | 11 | [dependencies.windows] 12 | version = "0.48" 13 | features = [ 14 | "Win32_Foundation", 15 | "Win32_System_Diagnostics_Debug", 16 | "Win32_System_LibraryLoader", 17 | ] 18 | 19 | [package.metadata.zluda] 20 | debug_only = true 21 | skip_zip = true 22 | -------------------------------------------------------------------------------- /ptx/build.rs: -------------------------------------------------------------------------------- 1 | extern crate lalrpop; 2 | 3 | fn main() { 4 | lalrpop::process_root().unwrap(); 5 | } 6 | -------------------------------------------------------------------------------- /ptx/lib/.gitattributes: -------------------------------------------------------------------------------- 1 | *.cpp diff 2 | *.hpp diff 3 | -------------------------------------------------------------------------------- /ptx/lib/raytracing_callable.cpp: -------------------------------------------------------------------------------- 1 | #include "raytracing.hpp" 2 | 3 | extern "C" { 4 | __device__ void FUNCTION_NAME(); 5 | __device__ void* EXPORTED_FUNCTION = (void*)FUNCTION_NAME; 6 | static __global__ void EXPORTED_KERNEL() { } 7 | } 8 | 9 | #define extern auto hack = 10 | #define constexpr ; 11 | -------------------------------------------------------------------------------- /ptx/lib/zluda_ptx_impl.bc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lshqqytiger/ZLUDA/5e717459179dc272b7d7d23391f0fad66c7459cf/ptx/lib/zluda_ptx_impl.bc -------------------------------------------------------------------------------- /ptx/lib/zluda_rt_ptx_impl.bc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lshqqytiger/ZLUDA/5e717459179dc272b7d7d23391f0fad66c7459cf/ptx/lib/zluda_rt_ptx_impl.bc -------------------------------------------------------------------------------- /ptx/src/test/operands.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry foobar( 6 | .param .u32 foobar_param_0 7 | ) 8 | { 9 | .reg .u32 %reg<10>; 10 | .reg .u64 %reg_64; 11 | .reg .pred p; 12 | .reg .pred q; 13 | 14 | // reg 15 | ld.param.u32 %reg0, [foobar_param_0]; 16 | // reg with offset 17 | ld.param.u32 %reg1, [foobar_param_0+1]; 18 | ld.param.u32 %reg2, [foobar_param_0+-1]; 19 | // immediate - only in local 20 | ld.local.u32 %reg3, [1]; 21 | 22 | // ids 23 | add.u32 %reg0, %reg1, %reg2; 24 | // immediate 25 | add.u32 %reg0, 1, %reg2; 26 | // reg with offset 27 | add.u32 %reg0, %reg1+1, %reg2+-1; 28 | // suprisingly, setp accepts all forms 29 | setp.eq.and.u32 p, %reg1+1, %reg2+-1, 2; 30 | 31 | // vector index - only supported by mov (maybe: ld, st, tex) 32 | mov.u32 %reg0, %ntid.x; 33 | } 34 | -------------------------------------------------------------------------------- /ptx/src/test/ptx_raytracing/closest_hit.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | using namespace optix; 4 | 5 | rtBuffer output; 6 | rtDeclareVariable(rtCallableProgramId, eval, , ); 7 | 8 | RT_PROGRAM void closest_hit() 9 | { 10 | float3 result = eval(); 11 | output[0] = make_float4(result.x, result.y, result.y, 0.0); 12 | } -------------------------------------------------------------------------------- /ptx/src/test/spirv_build/bar_sync.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry bar_sync() 6 | { 7 | .reg .u32 temp_32; 8 | bar.sync temp_32; 9 | ret; 10 | } 11 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_build/global_extern_array.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .extern .global .b32 foobar [1]; -------------------------------------------------------------------------------- /ptx/src/test/spirv_build/noreturn.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .weak .func noreturn(.param .b64 noreturn_0) 6 | .noreturn 7 | { 8 | } -------------------------------------------------------------------------------- /ptx/src/test/spirv_build/param_func_array_0.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .func foobar( 6 | .param .b32 foobar[] 7 | ) 8 | { 9 | ret; 10 | } -------------------------------------------------------------------------------- /ptx/src/test/spirv_fail/const_ptr.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .const .b32 foobar []; -------------------------------------------------------------------------------- /ptx/src/test/spirv_fail/global_ptr.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .global .b32 foobar []; -------------------------------------------------------------------------------- /ptx/src/test/spirv_fail/local_ptr.txt: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | 6 | .visible .entry func() 7 | { 8 | 9 | .local .b32 foobar [1]; 10 | 11 | ret; 12 | } 13 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_fail/param_entry_array_0.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry foobar( 6 | .param .b32 foobar[] 7 | ) 8 | { 9 | ret; 10 | } -------------------------------------------------------------------------------- /ptx/src/test/spirv_fail/param_vector.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .func foobar( 6 | .param .b32 .v2 foobar 7 | ) 8 | { 9 | ret; 10 | } -------------------------------------------------------------------------------- /ptx/src/test/spirv_fail/shared_ptr.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | extern .shared .b32 foobar []; -------------------------------------------------------------------------------- /ptx/src/test/spirv_fail/shared_ptr2.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .extern .shared .b32 foobar1 []; 6 | 7 | .visible .func _Z4dupaPf( 8 | .param .b64 _Z4dupaPf_param_0 9 | ) 10 | { 11 | .shared .b32 foobar2 []; 12 | ret; 13 | } -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/abs.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry abs( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .s32 temp1; 13 | .reg .s32 temp2; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.b32 temp1, [in_addr]; 19 | ld.b32 temp2, [in_addr+4]; 20 | abs.s32 temp1, temp1; 21 | abs.s32 temp2, temp2; 22 | st.b32 [out_addr], temp1; 23 | st.b32 [out_addr+4], temp2; 24 | ret; 25 | } 26 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/activemask.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry activemask( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 out_addr; 11 | .reg .b32 temp; 12 | 13 | ld.param.u64 out_addr, [output]; 14 | 15 | activemask.b32 temp; 16 | st.u32 [out_addr], temp; 17 | ret; 18 | } 19 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/add.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .entry add( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u64 temp; 13 | .reg .u64 temp2; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.u64 temp, [in_addr]; 19 | add.u64 temp2, temp, 1; 20 | st.u64 [out_addr], temp2; 21 | ret; 22 | } 23 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/add_global.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | 6 | .global .align 4 .f32 PI = 0f40490FDB; 7 | 8 | .visible .entry add_global( 9 | .param .u64 input, 10 | .param .u64 output 11 | ) 12 | { 13 | .reg .u64 in_addr; 14 | .reg .u64 out_addr; 15 | .reg .f32 temp; 16 | .reg .f32 pi; 17 | 18 | ld.param.u64 in_addr, [input]; 19 | ld.param.u64 out_addr, [output]; 20 | 21 | ld.f32 temp, [in_addr]; 22 | ld.global.f32 pi, [PI]; 23 | add.f32 temp, temp, pi; 24 | st.f32 [out_addr], temp; 25 | ret; 26 | } 27 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/add_non_coherent.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_32 3 | .address_size 64 4 | 5 | .visible .entry add_non_coherent( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u64 temp; 13 | .reg .u64 temp2; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.global.nc.u64 temp, [in_addr]; 19 | add.u64 temp2, temp, 1; 20 | st.global.u64 [out_addr], temp2; 21 | ret; 22 | } 23 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/add_param_ptr.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .entry add_param_ptr( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u64 temp; 13 | .reg .u64 temp2; 14 | 15 | mov.b64 in_addr, input; 16 | mov.b64 out_addr, output; 17 | 18 | ld.param.u64 in_addr, [in_addr+0]; 19 | ld.param.u64 out_addr, [out_addr+0]; 20 | 21 | ld.u64 temp, [in_addr]; 22 | add.u64 temp2, temp, 1; 23 | st.u64 [out_addr], temp2; 24 | ret; 25 | } 26 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/add_tuning.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry add_tuning( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | .maxntid 256, 1, 1 10 | .minnctapersm 4 11 | { 12 | .reg .u64 in_addr; 13 | .reg .u64 out_addr; 14 | .reg .u64 temp; 15 | .reg .u64 temp2; 16 | 17 | ld.param.u64 in_addr, [input]; 18 | ld.param.u64 out_addr, [output]; 19 | 20 | ld.u64 temp, [in_addr]; 21 | add.u64 temp2, temp, 1; 22 | st.u64 [out_addr], temp2; 23 | ret; 24 | } 25 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/and.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry and( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u32 temp1; 13 | .reg .u32 temp2; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.u32 temp1, [in_addr]; 19 | ld.u32 temp2, [in_addr+4]; 20 | and.b32 temp1, temp1, temp2; 21 | st.u32 [out_addr], temp1; 22 | ret; 23 | } 24 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/atom_add.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry atom_add( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .shared .align 4 .b8 shared_mem[1024]; 11 | 12 | .reg .u64 in_addr; 13 | .reg .u64 out_addr; 14 | .reg .u32 temp1; 15 | .reg .u32 temp2; 16 | 17 | ld.param.u64 in_addr, [input]; 18 | ld.param.u64 out_addr, [output]; 19 | 20 | ld.u32 temp1, [in_addr]; 21 | ld.u32 temp2, [in_addr+4]; 22 | st.shared.u32 [shared_mem], temp1; 23 | atom.shared.add.u32 temp1, [shared_mem], temp2; 24 | ld.shared.u32 temp2, [shared_mem]; 25 | st.u32 [out_addr], temp1; 26 | st.u32 [out_addr+4], temp2; 27 | ret; 28 | } 29 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/atom_add_f16.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_70 3 | .address_size 64 4 | 5 | .visible .entry atom_add_f16( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .shared .align 4 .b8 shared_mem[1024]; 11 | 12 | .reg .u64 in_addr; 13 | .reg .u64 out_addr; 14 | .reg .f16 temp; 15 | 16 | ld.param.u64 in_addr, [input]; 17 | ld.param.u64 out_addr, [output]; 18 | 19 | ld.b16 temp, [in_addr+2]; 20 | atom.add.noftz.f16 temp, [in_addr], temp; 21 | st.b16 [out_addr], temp; 22 | ld.b16 temp, [in_addr]; 23 | st.b16 [out_addr+2], temp; 24 | ret; 25 | } 26 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/atom_add_float.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry atom_add_float( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .shared .align 4 .b8 shared_mem[1024]; 11 | 12 | .reg .u64 in_addr; 13 | .reg .u64 out_addr; 14 | .reg .f32 temp1; 15 | .reg .f32 temp2; 16 | 17 | ld.param.u64 in_addr, [input]; 18 | ld.param.u64 out_addr, [output]; 19 | 20 | ld.f32 temp1, [in_addr]; 21 | ld.f32 temp2, [in_addr+4]; 22 | st.shared.f32 [shared_mem], temp1; 23 | atom.shared.add.f32 temp1, [shared_mem], temp2; 24 | ld.shared.f32 temp2, [shared_mem]; 25 | st.f32 [out_addr], temp1; 26 | st.f32 [out_addr+4], temp2; 27 | ret; 28 | } 29 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/atom_cas.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry atom_cas( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u32 temp1; 13 | .reg .u32 temp2; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.u32 temp1, [in_addr]; 19 | atom.cas.b32 temp1, [in_addr+4], temp1, 100; 20 | ld.u32 temp2, [in_addr+4]; 21 | st.u32 [out_addr], temp1; 22 | st.u32 [out_addr+4], temp2; 23 | ret; 24 | } 25 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/atom_inc.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry atom_inc( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u32 temp1; 13 | .reg .u32 temp2; 14 | .reg .u32 temp3; 15 | 16 | ld.param.u64 in_addr, [input]; 17 | ld.param.u64 out_addr, [output]; 18 | 19 | atom.inc.u32 temp1, [in_addr], 101; 20 | atom.global.inc.u32 temp2, [in_addr], 101; 21 | ld.u32 temp3, [in_addr]; 22 | st.u32 [out_addr], temp1; 23 | st.u32 [out_addr+4], temp2; 24 | st.u32 [out_addr+8], temp3; 25 | ret; 26 | } 27 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/atom_ld_st.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_70 3 | .address_size 64 4 | 5 | .visible .entry atom_ld_st( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u32 temp; 13 | 14 | ld.param.u64 in_addr, [input]; 15 | ld.param.u64 out_addr, [output]; 16 | ld.acquire.gpu.u32 temp, [in_addr]; 17 | st.release.gpu.u32 [out_addr], temp; 18 | ret; 19 | } 20 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/atom_ld_st_vec.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_70 3 | .address_size 64 4 | 5 | .visible .entry atom_ld_st_vec( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u64 temp1; 13 | .reg .u64 temp2; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | ld.acquire.gpu.v2.u64 {temp1, temp2}, [in_addr]; 18 | st.release.gpu.v2.u64 [out_addr], {temp1, temp2}; 19 | ret; 20 | } 21 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/atom_max_u32.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry atom_max_u32( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .b32 temp1; 13 | .reg .b32 temp2; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.b32 temp1, [in_addr]; 19 | st.b32 [out_addr], temp1; 20 | ld.b32 temp2, [in_addr+4]; 21 | atom.max.u32 temp1, [out_addr], temp2; 22 | ret; 23 | } 24 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/b64tof64.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry b64tof64( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .f64 in_addr_f; 11 | .reg .b64 in_addr; 12 | .reg .u64 out_addr; 13 | 14 | .reg.u64 temp; 15 | 16 | ld.param.f64 in_addr_f, [input]; 17 | ld.param.u64 out_addr, [output]; 18 | 19 | mov.b64 in_addr, in_addr_f; 20 | 21 | ld.u64 temp, [in_addr]; 22 | st.u64 [out_addr], temp; 23 | 24 | ret; 25 | } 26 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/barrier.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry barrier() 6 | { 7 | barrier.sync 0; 8 | ret; 9 | } 10 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/bfe.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry bfe( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u32 temp<3>; 13 | 14 | ld.param.u64 in_addr, [input]; 15 | ld.param.u64 out_addr, [output]; 16 | 17 | ld.u32 temp0, [in_addr]; 18 | ld.u32 temp1, [in_addr+4]; 19 | ld.u32 temp2, [in_addr+8]; 20 | bfe.u32 temp0, temp0, temp1, temp2; 21 | st.u32 [out_addr], temp0; 22 | ret; 23 | } 24 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/bfi.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry bfi( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u32 temp<4>; 13 | 14 | ld.param.u64 in_addr, [input]; 15 | ld.param.u64 out_addr, [output]; 16 | 17 | ld.u32 temp0, [in_addr]; 18 | ld.u32 temp1, [in_addr+4]; 19 | ld.u32 temp2, [in_addr+8]; 20 | ld.u32 temp3, [in_addr+12]; 21 | bfi.b32 temp0, temp0, temp1, temp2, temp3; 22 | st.u32 [out_addr], temp0; 23 | ret; 24 | } 25 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/bfind.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry bfind( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u32 temp<6>; 13 | 14 | ld.param.u64 in_addr, [input]; 15 | ld.param.u64 out_addr, [output]; 16 | 17 | ld.u32 temp0, [in_addr]; 18 | ld.u32 temp1, [in_addr+4]; 19 | ld.u32 temp2, [in_addr+8]; 20 | bfind.u32 temp3, temp0; 21 | bfind.u32 temp4, temp1; 22 | bfind.u32 temp5, temp2; 23 | st.u32 [out_addr], temp3; 24 | st.u32 [out_addr+4], temp4; 25 | st.u32 [out_addr+8], temp5; 26 | ret; 27 | } 28 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/bfind_shiftamt.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry bfind_shiftamt( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u32 temp<6>; 13 | 14 | ld.param.u64 in_addr, [input]; 15 | ld.param.u64 out_addr, [output]; 16 | 17 | ld.u32 temp0, [in_addr]; 18 | ld.u32 temp1, [in_addr+4]; 19 | ld.u32 temp2, [in_addr+8]; 20 | bfind.shiftamt.u32 temp3, temp0; 21 | bfind.shiftamt.u32 temp4, temp1; 22 | bfind.shiftamt.u32 temp5, temp2; 23 | st.u32 [out_addr], temp3; 24 | st.u32 [out_addr+4], temp4; 25 | st.u32 [out_addr+8], temp5; 26 | ret; 27 | } 28 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/block.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry block( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u64 temp; 13 | .reg .u64 temp2; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.u64 temp, [in_addr]; 19 | add.u64 temp2, temp, 1; 20 | { 21 | .reg .u64 temp2; 22 | add.u64 temp2, temp2, 1; 23 | } 24 | st.u64 [out_addr], temp2; 25 | ret; 26 | } 27 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/bra.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry bra( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u64 temp; 13 | .reg .u64 temp2; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.u64 temp, [in_addr]; 19 | bra case1; 20 | case1: 21 | add.u64 temp2, temp, 1; 22 | bra case3; 23 | case2: 24 | add.u64 temp2, temp, 2; 25 | case3: 26 | st.u64 [out_addr], temp2; 27 | ret; 28 | } 29 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/brev.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry brev( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .b32 temp; 13 | 14 | ld.param.u64 in_addr, [input]; 15 | ld.param.u64 out_addr, [output]; 16 | 17 | ld.b32 temp, [in_addr]; 18 | brev.b32 temp, temp; 19 | st.b32 [out_addr], temp; 20 | ret; 21 | } 22 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/clz.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry clz( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .b32 temp; 13 | 14 | ld.param.u64 in_addr, [input]; 15 | ld.param.u64 out_addr, [output]; 16 | 17 | ld.b32 temp, [in_addr]; 18 | clz.b32 temp, temp; 19 | st.b32 [out_addr], temp; 20 | ret; 21 | } 22 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/constant_f32.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry constant_f32( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .f32 temp; 13 | 14 | ld.param.u64 in_addr, [input]; 15 | ld.param.u64 out_addr, [output]; 16 | 17 | ld.f32 temp, [in_addr]; 18 | mul.f32 temp, temp, 0f3f000000; // 0.5 19 | st.f32 [out_addr], temp; 20 | ret; 21 | } 22 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/constant_negative.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry constant_negative( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .s32 temp; 13 | 14 | ld.param.u64 in_addr, [input]; 15 | ld.param.u64 out_addr, [output]; 16 | 17 | ld.s32 temp, [in_addr]; 18 | mul.lo.s32 temp, temp, -1; 19 | st.s32 [out_addr], temp; 20 | ret; 21 | } 22 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/cos.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry cos( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .f32 temp; 13 | 14 | ld.param.u64 in_addr, [input]; 15 | ld.param.u64 out_addr, [output]; 16 | 17 | ld.f32 temp, [in_addr]; 18 | cos.approx.f32 temp, temp; 19 | st.f32 [out_addr], temp; 20 | ret; 21 | } 22 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/cvt_f32_f16.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry cvt_f32_f16( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .f16 temp_f16; 13 | .reg .f32 temp_f32; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.global.b16 temp_f16, [in_addr]; 19 | cvt.f32.f16 temp_f32, temp_f16; 20 | st.f32 [out_addr], temp_f32; 21 | ret; 22 | } 23 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/cvt_f64_f32.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry cvt_f64_f32( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .f32 temp_f32; 13 | .reg .f64 temp_f64; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.global.f32 temp_f32, [in_addr]; 19 | cvt.ftz.f64.f32 temp_f64, temp_f32; 20 | st.f64 [out_addr], temp_f64; 21 | ret; 22 | } 23 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/cvt_rni.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry cvt_rni( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .f32 temp1; 13 | .reg .f32 temp2; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.f32 temp1, [in_addr]; 19 | ld.f32 temp2, [in_addr+4]; 20 | cvt.rni.f32.f32 temp1, temp1; 21 | cvt.rni.f32.f32 temp2, temp2; 22 | st.f32 [out_addr], temp1; 23 | st.f32 [out_addr+4], temp2; 24 | ret; 25 | } 26 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/cvt_rzi.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry cvt_rzi( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .f32 temp1; 13 | .reg .f32 temp2; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.f32 temp1, [in_addr]; 19 | ld.f32 temp2, [in_addr+4]; 20 | cvt.rzi.f32.f32 temp1, temp1; 21 | cvt.rzi.f32.f32 temp2, temp2; 22 | st.f32 [out_addr], temp1; 23 | st.f32 [out_addr+4], temp2; 24 | ret; 25 | } 26 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/cvt_s16_s8.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry cvt_s16_s8( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .b32 temp_16; 13 | .reg .b32 temp_8; 14 | 15 | // inline asm 16 | /*ptx_texBake_end*/ 17 | // inline asm 18 | 19 | ld.param.u64 in_addr, [input]; 20 | ld.param.u64 out_addr, [output]; 21 | 22 | ld.global.b32 temp_8, [in_addr]; 23 | cvt.s16.s8 temp_16, temp_8; 24 | st.b32 [out_addr], temp_16; 25 | ret; 26 | } 27 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/cvt_s32_f32.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry cvt_s32_f32( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .b32 temp1; 13 | .reg .b32 temp2; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.f32 temp1, [in_addr]; 19 | ld.f32 temp2, [in_addr+4]; 20 | cvt.rpi.ftz.s32.f32 temp1, temp1; 21 | cvt.rpi.ftz.s32.f32 temp2, temp2; 22 | st.global.s32 [out_addr], temp1; 23 | st.global.s32 [out_addr+4], temp2; 24 | ret; 25 | } 26 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/cvt_s64_s32.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry cvt_s64_s32( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .s32 r_32; 13 | .reg .s64 r_64; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.b32 r_32, [in_addr]; 19 | cvt.s64.s32 r_64, r_32; 20 | st.b64 [out_addr], r_64; 21 | ret; 22 | } 23 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/cvt_sat_s_u.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry cvt_sat_s_u( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .s32 input_value; 13 | .reg .u32 temp1; 14 | .reg .s32 temp2; 15 | 16 | ld.param.u64 in_addr, [input]; 17 | ld.param.u64 out_addr, [output]; 18 | 19 | ld.s32 input_value, [in_addr]; 20 | cvt.sat.u32.s32 temp1, input_value; 21 | cvt.s32.u32 temp1, temp1; 22 | cvt.u32.s32 temp2, input_value; 23 | st.s32 [out_addr], temp1; 24 | st.s32 [out_addr+4], temp2; 25 | ret; 26 | } 27 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/cvt_u32_s16.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry cvt_u32_s16( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .b16 temp_16; 13 | .reg .b32 temp_32; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.global.b16 temp_16, [in_addr]; 19 | cvt.u32.s16 temp_32, temp_16; 20 | st.b32 [out_addr], temp_32; 21 | ret; 22 | } 23 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/cvta.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry cvta( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .f32 temp; 13 | 14 | ld.param.u64 in_addr, [input]; 15 | ld.param.u64 out_addr, [output]; 16 | 17 | cvta.to.global.u64 in_addr, in_addr; 18 | cvta.to.global.u64 out_addr, out_addr; 19 | 20 | ld.global.f32 temp, [in_addr]; 21 | st.global.f32 [out_addr], temp; 22 | ret; 23 | } -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/div_approx.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry div_approx( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .f32 temp1; 13 | .reg .f32 temp2; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.f32 temp1, [in_addr]; 19 | ld.f32 temp2, [in_addr+4]; 20 | div.approx.f32 temp1, temp1, temp2; 21 | st.f32 [out_addr], temp1; 22 | ret; 23 | } 24 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/dp4a.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_61 3 | .address_size 64 4 | 5 | .entry dp4a( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .b32 temp0; 13 | .reg .b32 temp1; 14 | .reg .b32 temp2; 15 | 16 | ld.param.u64 in_addr, [input]; 17 | ld.param.u64 out_addr, [output]; 18 | 19 | ld.b32 temp0, [in_addr]; 20 | ld.b32 temp1, [in_addr+4]; 21 | ld.b32 temp2, [in_addr+8]; 22 | dp4a.s32.s32 temp0, temp0, temp1, temp2; 23 | st.b32 [out_addr], temp0; 24 | ret; 25 | } 26 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/ex2.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry ex2( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .f32 temp; 13 | 14 | ld.param.u64 in_addr, [input]; 15 | ld.param.u64 out_addr, [output]; 16 | 17 | ld.f32 temp, [in_addr]; 18 | ex2.approx.f32 temp, temp; 19 | st.f32 [out_addr], temp; 20 | ld.f32 temp, [in_addr+4]; 21 | ex2.approx.f32 temp, temp; 22 | st.f32 [out_addr+4], temp; 23 | ld.f32 temp, [in_addr+8]; 24 | ex2.approx.f32 temp, temp; 25 | st.f32 [out_addr+8], temp; 26 | ld.f32 temp, [in_addr+12]; 27 | ex2.approx.f32 temp, temp; 28 | st.f32 [out_addr+12], temp; 29 | 30 | ret; 31 | } 32 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/extern_shared.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .extern .shared .b32 shared_mem []; 6 | 7 | .visible .entry extern_shared( 8 | .param .u64 input, 9 | .param .u64 output 10 | ) 11 | { 12 | .reg .u64 in_addr; 13 | .reg .u64 out_addr; 14 | .reg .u64 temp; 15 | 16 | ld.param.u64 in_addr, [input]; 17 | ld.param.u64 out_addr, [output]; 18 | 19 | ld.global.u64 temp, [in_addr]; 20 | st.shared.u64 [shared_mem], temp; 21 | ld.shared.u64 temp, [shared_mem]; 22 | st.global.u64 [out_addr], temp; 23 | ret; 24 | } -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/fma.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry fma( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .f32 temp1; 13 | .reg .f32 temp2; 14 | .reg .f32 temp3; 15 | 16 | ld.param.u64 in_addr, [input]; 17 | ld.param.u64 out_addr, [output]; 18 | 19 | ld.f32 temp1, [in_addr]; 20 | ld.f32 temp2, [in_addr+4]; 21 | ld.f32 temp3, [in_addr+8]; 22 | fma.rn.f32 temp1, temp1, temp2, temp3; 23 | st.f32 [out_addr], temp1; 24 | ret; 25 | } 26 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/func_ptr.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .func (.reg .f32 out) foobar(.reg .f32 x, .reg .f32 y) 6 | { 7 | add.f32 out, x, y; 8 | ret; 9 | } 10 | 11 | .visible .entry func_ptr( 12 | .param .u64 input, 13 | .param .u64 output 14 | ) 15 | { 16 | .reg .u64 in_addr; 17 | .reg .u64 out_addr; 18 | .reg .u64 temp; 19 | .reg .u64 temp2; 20 | .reg .u64 f_addr; 21 | 22 | ld.param.u64 in_addr, [input]; 23 | ld.param.u64 out_addr, [output]; 24 | 25 | ld.u64 temp, [in_addr]; 26 | add.u64 temp2, temp, 1; 27 | mov.u64 f_addr, foobar; 28 | add.u64 temp2, temp2, f_addr; 29 | st.u64 [out_addr], temp2; 30 | ret; 31 | } 32 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/global_array.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .global .u32 asdas[4][2] = {{-1,2}, {3}}; 6 | .global .u64 foobar[4][2] = {{-1,2}, {3}, {asdas}}; 7 | 8 | .visible .entry global_array( 9 | .param .u64 input, 10 | .param .u64 output 11 | ) 12 | { 13 | .reg .u64 in_addr; 14 | .reg .u64 out_addr; 15 | .reg .u32 temp; 16 | 17 | mov.u64 in_addr, foobar; 18 | ld.param.u64 out_addr, [output]; 19 | 20 | ld.global.u32 temp, [in_addr]; 21 | st.global.u32 [out_addr], temp; 22 | ret; 23 | } -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/implicit_param.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry implicit_param( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .f32 temp; 13 | .param .b32 temp_param; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.global.f32 temp, [in_addr]; 19 | st.param.f32 [temp_param], temp; 20 | ld.param.f32 temp, [temp_param]; 21 | st.global.f32 [out_addr], temp; 22 | 23 | ret; 24 | } -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/isspacep.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry isspacep( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .pred is_global; 13 | .reg .pred is_shared; 14 | 15 | .reg .u32 is_global_u32; 16 | .reg .u32 is_shared_u32; 17 | 18 | ld.param.u64 in_addr, [input]; 19 | ld.param.u64 out_addr, [output]; 20 | 21 | isspacep.global is_global, in_addr; 22 | selp.u32 is_global_u32, 1, 0, is_global; 23 | isspacep.shared is_shared, in_addr; 24 | selp.u32 is_shared_u32, 1, 0, is_shared; 25 | st.u32 [out_addr], is_global_u32; 26 | st.u32 [out_addr+4], is_shared_u32; 27 | ret; 28 | } 29 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/laneid.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry laneid( 6 | .param .u64 output 7 | ) 8 | { 9 | .reg .u64 out_addr; 10 | .reg .u32 tid; 11 | .reg .u64 tid_64; 12 | .reg .u32 result; 13 | 14 | ld.param.u64 out_addr, [output]; 15 | 16 | mov.b32 tid, %tid.x; 17 | cvt.u64.u32 tid_64, tid; 18 | 19 | mov.b32 result, %laneid; 20 | 21 | mad.lo.u64 out_addr, tid_64, 4, out_addr; 22 | st.u32 [out_addr], result; 23 | ret; 24 | } 25 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/lanemask_lt.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry lanemask_lt( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .b32 temp; 13 | .reg .b32 temp2; 14 | .reg .b32 less_lane; 15 | 16 | ld.param.u64 in_addr, [input]; 17 | ld.param.u64 out_addr, [output]; 18 | 19 | ld.u32 temp, [in_addr]; 20 | add.u32 temp2, temp, 1; 21 | mov.u32 less_lane, %lanemask_lt; 22 | add.u32 temp2, temp2, less_lane; 23 | st.u32 [out_addr], temp2; 24 | ret; 25 | } 26 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/ld_st.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry ld_st( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u64 temp; 13 | 14 | ld.param.u64 in_addr, [input]; 15 | ld.param.u64 out_addr, [output]; 16 | 17 | ld.u64 temp, [in_addr]; 18 | st.u64 [out_addr], temp; 19 | ret; 20 | } -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/ld_st_implicit.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry ld_st_implicit( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .b64 temp; 13 | 14 | ld.param.u64 in_addr, [input]; 15 | ld.param.u64 out_addr, [output]; 16 | 17 | mov.b64 temp, 0x0123456789abcdef; 18 | ld.global.f32 temp, [in_addr]; 19 | st.global.f32 [out_addr], temp; 20 | ret; 21 | } -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/ld_st_offset.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry ld_st_offset( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u32 temp1; 13 | .reg .u32 temp2; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.u32 temp1, [in_addr]; 19 | ld.u32 temp2, [in_addr+4]; 20 | st.u32 [out_addr], temp2; 21 | st.u32 [out_addr+4], temp1; 22 | ret; 23 | } -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/lg2.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry lg2( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .f32 temp; 13 | 14 | ld.param.u64 in_addr, [input]; 15 | ld.param.u64 out_addr, [output]; 16 | 17 | ld.f32 temp, [in_addr]; 18 | lg2.approx.f32 temp, temp; 19 | st.f32 [out_addr], temp; 20 | ret; 21 | } 22 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/local_align.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry local_align( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .local .align 8 .b8 __local_depot0[8]; 11 | .reg .u64 in_addr; 12 | .reg .u64 out_addr; 13 | .reg .u64 temp; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.u64 temp, [in_addr]; 19 | st.u64 [out_addr], temp; 20 | ret; 21 | } 22 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/madc_cc.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry madc_cc( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .s32 dst1; 13 | .reg .s32 dst2; 14 | .reg .b32 src1; 15 | .reg .b32 src2; 16 | .reg .b32 src3; 17 | 18 | ld.param.u64 in_addr, [input]; 19 | ld.param.u64 out_addr, [output]; 20 | 21 | ld.s32 src1, [in_addr]; 22 | ld.s32 src2, [in_addr+4]; 23 | ld.b32 src3, [in_addr+8]; 24 | mad.lo.cc.s32 dst1, src1, src2, src3; 25 | madc.hi.s32 dst2, src1, src2, 3; 26 | st.s32 [out_addr], dst1; 27 | st.s32 [out_addr+4], dst2; 28 | ret; 29 | } 30 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/max.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry max( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .s32 temp1; 13 | .reg .s32 temp2; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.s32 temp1, [in_addr]; 19 | ld.s32 temp2, [in_addr+4]; 20 | max.s32 temp1, temp1, temp2; 21 | st.s32 [out_addr], temp1; 22 | ret; 23 | } 24 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/membar.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry membar( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .s32 temp; 13 | 14 | ld.param.u64 in_addr, [input]; 15 | ld.param.u64 out_addr, [output]; 16 | 17 | ld.u32 temp, [in_addr]; 18 | membar.sys; 19 | st.s32 [out_addr], temp; 20 | ret; 21 | } 22 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/min.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry min( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .s32 temp1; 13 | .reg .s32 temp2; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.s32 temp1, [in_addr]; 19 | ld.s32 temp2, [in_addr+4]; 20 | min.s32 temp1, temp1, temp2; 21 | st.s32 [out_addr], temp1; 22 | ret; 23 | } 24 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/mov.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry mov( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u64 temp; 13 | .reg .u64 temp2; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.u64 temp, [in_addr]; 19 | mov.u64 temp2, temp; 20 | st.u64 [out_addr], temp2; 21 | ret; 22 | } 23 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/mov_address.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry mov_address( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .local .b8 __local_depot0[8]; 11 | .reg .u64 temp; 12 | 13 | mov.u64 temp, __local_depot0; 14 | ret; 15 | } 16 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/mov_vector_cast.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry mov_vector_cast( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u64 temp_wide; 13 | .reg .f32 temp1; 14 | .reg .f32 temp2; 15 | .reg .f16 temp3; 16 | .reg .f16 temp4; 17 | .reg .f16 temp5; 18 | .reg .f16 temp6; 19 | 20 | ld.param.u64 in_addr, [input]; 21 | ld.param.u64 out_addr, [output]; 22 | 23 | ld.u64 temp_wide, [in_addr]; 24 | mov.b64 {temp1, temp2}, temp_wide; 25 | mov.b64 {temp3, temp4, temp5, temp6}, temp_wide; 26 | st.f32 [out_addr], temp2; 27 | st.f32 [out_addr+4], temp1; 28 | 29 | ret; 30 | } 31 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/mul24_hi.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry mul24_hi( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u32 temp; 13 | .reg .u32 temp2; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.u32 temp, [in_addr]; 19 | mul24.hi.u32 temp2, temp, 9815513; 20 | st.u32 [out_addr], temp2; 21 | ret; 22 | } 23 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/mul24_lo.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry mul24_lo( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u32 temp; 13 | .reg .u32 temp2; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.u32 temp, [in_addr]; 19 | mul24.lo.u32 temp2, temp, 9815513; 20 | st.u32 [out_addr], temp2; 21 | ret; 22 | } 23 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/mul_ftz.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry mul_ftz( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .f32 temp1; 13 | .reg .f32 temp2; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.f32 temp1, [in_addr]; 19 | ld.f32 temp2, [in_addr+4]; 20 | mul.ftz.f32 temp1, temp1, temp2; 21 | st.f32 [out_addr], temp1; 22 | ret; 23 | } 24 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/mul_hi.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry mul_hi( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u64 temp; 13 | .reg .u64 temp2; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.u64 temp, [in_addr]; 19 | mul.hi.u64 temp2, temp, 2; 20 | st.u64 [out_addr], temp2; 21 | ret; 22 | } 23 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/mul_lo.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry mul_lo( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u64 temp; 13 | .reg .u64 temp2; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.u64 temp, [in_addr]; 19 | mul.lo.u64 temp2, temp, 2; 20 | st.u64 [out_addr], temp2; 21 | ret; 22 | } 23 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/mul_non_ftz.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry mul_non_ftz( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .f32 temp1; 13 | .reg .f32 temp2; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.f32 temp1, [in_addr]; 19 | ld.f32 temp2, [in_addr+4]; 20 | mul.f32 temp1, temp1, temp2; 21 | st.f32 [out_addr], temp1; 22 | ret; 23 | } 24 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/mul_wide.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry mul_wide( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .s32 inp1; 13 | .reg .s32 inp2; 14 | .reg .s64 result; 15 | 16 | ld.param.u64 in_addr, [input]; 17 | ld.param.u64 out_addr, [output]; 18 | 19 | ld.global.s32 inp1, [in_addr]; 20 | ld.global.s32 inp2, [in_addr+4]; 21 | mul.wide.s32 result, inp1, inp2; 22 | st.u64 [out_addr], result; 23 | ret; 24 | } 25 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/multireg.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry multireg( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr, out_addr, temp<2>; 11 | 12 | ld.param.u64 in_addr, [input]; 13 | ld.param.u64 out_addr, [output]; 14 | 15 | ld.u64 temp0, [in_addr]; 16 | add.u64 temp1, temp0, 1; 17 | st.u64 [out_addr], temp1; 18 | ret; 19 | } 20 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/neg.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry neg( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .s32 temp1; 13 | 14 | ld.param.u64 in_addr, [input]; 15 | ld.param.u64 out_addr, [output]; 16 | 17 | ld.s32 temp1, [in_addr]; 18 | neg.s32 temp1, temp1; 19 | st.s32 [out_addr], temp1; 20 | ret; 21 | } 22 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/non_scalar_ptr_offset.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry non_scalar_ptr_offset( 6 | .param .u64 input_p, 7 | .param .u64 output_p 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u32 x; 13 | .reg .u32 y; 14 | 15 | ld.param.u64 in_addr, [input_p]; 16 | ld.param.u64 out_addr, [output_p]; 17 | 18 | ld.global.v2.u32 {x,y}, [in_addr+8]; 19 | add.u32 x, x, y; 20 | st.global.u32 [out_addr], x; 21 | ret; 22 | } 23 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/not.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry not( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u64 temp; 13 | .reg .u64 temp2; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.u64 temp, [in_addr]; 19 | not.b64 temp2, temp; 20 | st.u64 [out_addr], temp2; 21 | ret; 22 | } 23 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/ntid.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry ntid( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u32 in_val; 13 | .reg .u32 global_count; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.u32 in_val, [in_addr]; 19 | mov.u32 global_count, %ntid.x; 20 | add.u32 in_val, in_val, global_count; 21 | st.u32 [out_addr], in_val; 22 | ret; 23 | } 24 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/or.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry or( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u64 temp1; 13 | .reg .u64 temp2; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.u64 temp1, [in_addr]; 19 | ld.u64 temp2, [in_addr+8]; 20 | or.b64 temp1, temp1, temp2; 21 | st.u64 [out_addr], temp1; 22 | ret; 23 | } 24 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/param_ptr.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry param_ptr( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 ptr; 11 | .reg .u64 in_addr; 12 | .reg .u64 out_addr; 13 | .reg .u64 temp; 14 | .reg .u64 temp2; 15 | 16 | mov.b64 ptr, input; 17 | 18 | ld.param.u64 in_addr, [ptr]; 19 | ld.param.u64 out_addr, [output]; 20 | 21 | ld.u64 temp, [in_addr]; 22 | add.u64 temp2, temp, 1; 23 | st.u64 [out_addr], temp2; 24 | ret; 25 | } 26 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/popc.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry popc( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .b32 temp; 13 | 14 | ld.param.u64 in_addr, [input]; 15 | ld.param.u64 out_addr, [output]; 16 | 17 | ld.b32 temp, [in_addr]; 18 | popc.b32 temp, temp; 19 | st.b32 [out_addr], temp; 20 | ret; 21 | } 22 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/pred_not.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry pred_not( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u64 temp; 13 | .reg .u64 temp2; 14 | .reg .u64 temp3; 15 | .reg .pred pred; 16 | 17 | ld.param.u64 in_addr, [input]; 18 | ld.param.u64 out_addr, [output]; 19 | 20 | ld.u64 temp, [in_addr]; 21 | ld.u64 temp2, [in_addr + 8]; 22 | setp.lt.u64 pred, temp, temp2; 23 | not.pred pred, pred; 24 | @pred mov.u64 temp3, 1; 25 | @!pred mov.u64 temp3, 2; 26 | st.u64 [out_addr], temp3; 27 | ret; 28 | } 29 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/prmt.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry prmt( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u32 temp1; 13 | .reg .u32 temp2; 14 | .reg .u32 temp3; 15 | .reg .u32 temp4; 16 | 17 | ld.param.u64 in_addr, [input]; 18 | ld.param.u64 out_addr, [output]; 19 | 20 | ld.u32 temp1, [in_addr]; 21 | ld.u32 temp2, [in_addr+4]; 22 | prmt.b32 temp3, temp1, temp2, 30212; 23 | prmt.b32 temp4, temp1, temp2, 32268; 24 | st.u32 [out_addr], temp3; 25 | st.u32 [out_addr+4], temp4; 26 | ret; 27 | } 28 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/prmt_non_immediate.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry prmt_non_immediate( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u32 temp1; 13 | .reg .u32 temp2; 14 | .reg .u32 control; 15 | 16 | ld.param.u64 in_addr, [input]; 17 | ld.param.u64 out_addr, [output]; 18 | 19 | ld.u32 temp1, [in_addr]; 20 | ld.u32 temp2, [in_addr+4]; 21 | mov.u32 control, 64; 22 | prmt.b32 temp2, temp1, temp2, control; 23 | st.u32 [out_addr], temp2; 24 | ret; 25 | } 26 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/rcp.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry rcp( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .f32 temp; 13 | 14 | ld.param.u64 in_addr, [input]; 15 | ld.param.u64 out_addr, [output]; 16 | 17 | ld.f32 temp, [in_addr]; 18 | rcp.approx.f32 temp, temp; 19 | st.f32 [out_addr], temp; 20 | ret; 21 | } 22 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/reg_local.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry reg_local( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .local .align 8 .b8 local_x[8]; 11 | .reg .u64 in_addr; 12 | .reg .u64 out_addr; 13 | .reg .b64 temp; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.global.u64 temp, [in_addr]; 19 | st.u64 [local_x], temp + 1; 20 | ld.u64 temp, [local_x+0]; 21 | st.global.u64 [out_addr+0], temp; 22 | ret; 23 | } -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/rem.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry rem( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .s32 temp1; 13 | .reg .s32 temp2; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.s32 temp1, [in_addr]; 19 | ld.s32 temp2, [in_addr+4]; 20 | rem.s32 temp1, temp1, temp2; 21 | st.s32 [out_addr], temp1; 22 | ret; 23 | } 24 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/rsqrt.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry rsqrt( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .f64 temp1; 13 | 14 | ld.param.u64 in_addr, [input]; 15 | ld.param.u64 out_addr, [output]; 16 | 17 | ld.f64 temp1, [in_addr]; 18 | rsqrt.approx.f64 temp1, temp1; 19 | st.f64 [out_addr], temp1; 20 | ret; 21 | } 22 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/s64_min.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry s64_min( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 out_addr; 11 | .reg .s64 min; 12 | 13 | ld.param.u64 out_addr, [output]; 14 | mov.s64 min, -9223372036854775808; 15 | st.s64 [out_addr], min; 16 | ret; 17 | } 18 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/sad.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .entry sad( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .b32 a; 13 | .reg .b32 b; 14 | .reg .b32 c; 15 | .reg .b32 result_u32; 16 | .reg .b32 result_s32; 17 | 18 | ld.param.u64 in_addr, [input]; 19 | ld.param.u64 out_addr, [output]; 20 | 21 | ld.u32 a, [in_addr]; 22 | ld.u32 b, [in_addr+4]; 23 | ld.u32 c, [in_addr+8]; 24 | sad.u32 result_u32, a, b, c; 25 | sad.s32 result_s32, a, b, c; 26 | st.b32 [out_addr], result_u32; 27 | st.b32 [out_addr+4], result_s32; 28 | ret; 29 | } 30 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/selp.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry selp( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u16 temp1; 13 | .reg .u16 temp2; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.u16 temp1, [in_addr]; 19 | ld.u16 temp2, [in_addr + 2]; 20 | selp.u16 temp1, temp1, temp2, 0; 21 | st.u16 [out_addr], temp1; 22 | ret; 23 | } 24 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/selp_true.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry selp_true( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u16 temp1; 13 | .reg .u16 temp2; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.u16 temp1, [in_addr]; 19 | ld.u16 temp2, [in_addr + 2]; 20 | selp.u16 temp1, temp1, temp2, 1; 21 | st.u16 [out_addr], temp1; 22 | ret; 23 | } 24 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/set_f16x2.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_53 3 | .address_size 64 4 | 5 | .visible .entry set_f16x2( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .b32 temp0; 13 | .reg .b32 temp1; 14 | .reg .b32 temp2; 15 | .reg .b32 temp3; 16 | .reg .f16x2 sela; 17 | 18 | ld.param.u64 in_addr, [input]; 19 | ld.param.u64 out_addr, [output]; 20 | 21 | ld.u32 temp0, [in_addr]; 22 | ld.u32 temp1, [in_addr+4]; 23 | ld.u32 temp2, [in_addr+8]; 24 | ld.u32 temp3, [in_addr+12]; 25 | set.gtu.u32.f16x2 temp0, temp0, temp1; 26 | set.eq.f16x2.f16x2 temp2, temp2, temp3; 27 | st.b32 [out_addr], temp0; 28 | st.b32 [out_addr+4], temp2; 29 | ret; 30 | } 31 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/setp.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry setp( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u64 temp; 13 | .reg .u64 temp2; 14 | .reg .u64 temp3; 15 | .reg .pred pred; 16 | 17 | ld.param.u64 in_addr, [input]; 18 | ld.param.u64 out_addr, [output]; 19 | 20 | ld.u64 temp, [in_addr]; 21 | ld.u64 temp2, [in_addr + 8]; 22 | setp.lt.u64 pred, temp, temp2; 23 | @pred mov.u64 temp3, 1; 24 | @!pred mov.u64 temp3, 2; 25 | st.u64 [out_addr], temp3; 26 | ret; 27 | } 28 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/setp_bool.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry setp_bool( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .f32 r1; 13 | .reg .f32 r2; 14 | .reg .f32 r3; 15 | .reg .pred temp; 16 | .reg .pred p1; 17 | .reg .pred p2; 18 | 19 | ld.param.u64 in_addr, [input]; 20 | ld.param.u64 out_addr, [output]; 21 | 22 | ld.f32 r1, [in_addr]; 23 | ld.f32 r2, [in_addr + 4]; 24 | ld.f32 r3, [in_addr + 8]; 25 | mov.pred temp, 0; 26 | setp.gt.and.ftz.f32 p1|p2, r1, r2, temp; 27 | @p1 mov.f32 r3, r1; 28 | @p2 mov.f32 r3, r2; 29 | st.f32 [out_addr], r3; 30 | ret; 31 | } 32 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/setp_gt.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry setp_gt( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .f32 r1; 13 | .reg .f32 r2; 14 | .reg .f32 r3; 15 | .reg .pred pred; 16 | 17 | ld.param.u64 in_addr, [input]; 18 | ld.param.u64 out_addr, [output]; 19 | 20 | ld.f32 r1, [in_addr]; 21 | ld.f32 r2, [in_addr + 4]; 22 | setp.gt.ftz.f32 pred, r1, r2; 23 | @pred mov.f32 r3, r1; 24 | @!pred mov.f32 r3, r2; 25 | st.f32 [out_addr], r3; 26 | ret; 27 | } 28 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/setp_leu.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry setp_leu( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .f32 r1; 13 | .reg .f32 r2; 14 | .reg .f32 r3; 15 | .reg .pred pred; 16 | 17 | ld.param.u64 in_addr, [input]; 18 | ld.param.u64 out_addr, [output]; 19 | 20 | ld.f32 r1, [in_addr]; 21 | ld.f32 r2, [in_addr + 4]; 22 | setp.leu.ftz.f32 pred, r1, r2; 23 | @pred mov.f32 r3, r1; 24 | @!pred mov.f32 r3, r2; 25 | st.f32 [out_addr], r3; 26 | ret; 27 | } 28 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/setp_pred2.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry setp_pred2( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .f32 r1; 13 | .reg .f32 r2; 14 | .reg .f32 r3; 15 | .reg .pred yes; 16 | .reg .pred no; 17 | 18 | ld.param.u64 in_addr, [input]; 19 | ld.param.u64 out_addr, [output]; 20 | 21 | ld.f32 r1, [in_addr]; 22 | ld.f32 r2, [in_addr + 4]; 23 | setp.gt.ftz.f32 yes|no, r1, r2; 24 | @yes mov.f32 r3, r1; 25 | @no mov.f32 r3, r2; 26 | st.f32 [out_addr], r3; 27 | ret; 28 | } 29 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/shared_ptr_32.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | 6 | .visible .entry shared_ptr_32( 7 | .param .u64 input, 8 | .param .u64 output 9 | ) 10 | { 11 | .shared .align 4 .b8 shared_mem1[128]; 12 | 13 | .reg .u64 in_addr; 14 | .reg .u64 out_addr; 15 | .reg .u32 shared_addr; 16 | 17 | .reg .u64 temp1; 18 | .reg .u64 temp2; 19 | 20 | ld.param.u64 in_addr, [input]; 21 | ld.param.u64 out_addr, [output]; 22 | mov.u32 shared_addr, shared_mem1; 23 | 24 | ld.global.u64 temp1, [in_addr]; 25 | st.shared.u64 [shared_addr], temp1; 26 | ld.shared.u64 temp2, [shared_addr+0]; 27 | st.global.u64 [out_addr], temp2; 28 | ret; 29 | } -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/shared_ptr_take_address.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .extern .shared .align 4 .b8 shared_mem[]; 6 | 7 | .visible .entry shared_ptr_take_address( 8 | .param .u64 input, 9 | .param .u64 output 10 | ) 11 | { 12 | .reg .u64 in_addr; 13 | .reg .u64 out_addr; 14 | .reg .u64 shared_addr; 15 | .reg .u64 temp1; 16 | .reg .u64 temp2; 17 | 18 | ld.param.u64 in_addr, [input]; 19 | ld.param.u64 out_addr, [output]; 20 | mov.u64 shared_addr, shared_mem; 21 | 22 | ld.global.u64 temp1, [in_addr]; 23 | st.shared.u64 [shared_addr], temp1; 24 | ld.shared.u64 temp2, [shared_addr]; 25 | st.global.u64 [out_addr], temp2; 26 | ret; 27 | } -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/shared_variable.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | 6 | .visible .entry shared_variable( 7 | .param .u64 input, 8 | .param .u64 output 9 | ) 10 | { 11 | .shared .align 4 .b8 shared_mem1[128]; 12 | 13 | .reg .u64 in_addr; 14 | .reg .u64 out_addr; 15 | .reg .u64 temp1; 16 | .reg .u64 temp2; 17 | 18 | ld.param.u64 in_addr, [input]; 19 | ld.param.u64 out_addr, [output]; 20 | 21 | ld.global.u64 temp1, [in_addr]; 22 | st.shared.u64 [shared_mem1], temp1; 23 | ld.shared.u64 temp2, [shared_mem1]; 24 | st.global.u64 [out_addr], temp2; 25 | ret; 26 | } -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/shf.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_32 3 | .address_size 64 4 | 5 | .visible .entry shf( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u32 temp1; 13 | .reg .u32 temp2; 14 | .reg .u32 result; 15 | 16 | ld.param.u64 in_addr, [input]; 17 | ld.param.u64 out_addr, [output]; 18 | 19 | ld.u32 temp1, [in_addr]; 20 | ld.u32 temp2, [in_addr+4]; 21 | shf.l.wrap.b32 result, temp1, temp2, 14; 22 | st.u32 [out_addr], result; 23 | ret; 24 | } 25 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/shfl.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry shfl( 6 | .param .u64 output 7 | ) 8 | { 9 | .reg .u64 out_addr; 10 | .reg .u32 tid; 11 | .reg .u64 tid_64; 12 | .reg .u32 result; 13 | 14 | ld.param.u64 out_addr, [output]; 15 | 16 | mov.b32 tid, %tid.x; 17 | cvt.u64.u32 tid_64, tid; 18 | shfl.sync.down.b32 result, tid, 1, 31, -1; 19 | mad.lo.u64 out_addr, tid_64, 4, out_addr; 20 | st.u32 [out_addr], result; 21 | ret; 22 | } 23 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/shl.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry shl( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u64 temp; 13 | .reg .u64 temp2; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.u64 temp, [in_addr]; 19 | shl.b64 temp2, temp, 2; 20 | st.u64 [out_addr], temp2; 21 | ret; 22 | } 23 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/shl_link_hack.ptx: -------------------------------------------------------------------------------- 1 | // HACK ALERT 2 | // This test is for testing workaround for a bug in IGC where linking fails 3 | // if there is shl/shr with different width of value and shift 4 | 5 | .version 6.5 6 | .target sm_30 7 | .address_size 64 8 | 9 | .visible .entry shl_link_hack( 10 | .param .u64 input, 11 | .param .u64 output 12 | ) 13 | { 14 | .reg .u64 in_addr; 15 | .reg .u64 out_addr; 16 | .reg .u64 temp; 17 | .reg .u64 temp2; 18 | 19 | ld.param.u64 in_addr, [input]; 20 | ld.param.u64 out_addr, [output]; 21 | 22 | // Here only to trigger linking 23 | .reg .u32 unused; 24 | atom.inc.u32 unused, [out_addr], 2000000; 25 | 26 | ld.u64 temp, [in_addr]; 27 | shl.b64 temp2, temp, 2; 28 | st.u64 [out_addr], temp2; 29 | ret; 30 | } 31 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/shr_s32.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry shr_s32( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .s32 temp; 13 | .reg .b32 shift_amount; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.s32 temp, [in_addr]; 19 | ld.b32 shift_amount, [in_addr+4]; 20 | shr.s32 temp, temp, shift_amount; 21 | st.s32 [out_addr], temp; 22 | ret; 23 | } 24 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/shr_u32.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry shr_u32( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u32 temp; 13 | .reg .b32 shift_amount1; 14 | .reg .b32 shift_amount2; 15 | .reg .u32 result1; 16 | .reg .u32 result2; 17 | 18 | ld.param.u64 in_addr, [input]; 19 | ld.param.u64 out_addr, [output]; 20 | 21 | ld.u32 temp, [in_addr]; 22 | ld.b32 shift_amount1, [in_addr+4]; 23 | ld.b32 shift_amount2, [in_addr+8]; 24 | 25 | shr.u32 result1, temp, shift_amount1; 26 | shr.u32 result2, temp, shift_amount2; 27 | 28 | st.u32 [out_addr], result1; 29 | st.u32 [out_addr+4], result2; 30 | ret; 31 | } 32 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/sign_extend.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry sign_extend( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .s32 temp; 13 | 14 | ld.param.u64 in_addr, [input]; 15 | ld.param.u64 out_addr, [output]; 16 | 17 | ld.s16 temp, [in_addr]; 18 | st.s32 [out_addr], temp; 19 | ret; 20 | } -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/sin.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry sin( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .f32 temp; 13 | 14 | ld.param.u64 in_addr, [input]; 15 | ld.param.u64 out_addr, [output]; 16 | 17 | ld.f32 temp, [in_addr]; 18 | sin.approx.f32 temp, temp; 19 | st.f32 [out_addr], temp; 20 | ret; 21 | } 22 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/sqrt.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry sqrt( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .f32 temp1; 13 | 14 | ld.param.u64 in_addr, [input]; 15 | ld.param.u64 out_addr, [output]; 16 | 17 | ld.f32 temp1, [in_addr]; 18 | sqrt.approx.f32 temp1, temp1; 19 | st.f32 [out_addr], temp1; 20 | ret; 21 | } 22 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/sub.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry sub( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u64 temp; 13 | .reg .u64 temp2; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.u64 temp, [in_addr]; 19 | sub.u64 temp2, temp, 1; 20 | st.u64 [out_addr], temp2; 21 | ret; 22 | } 23 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/vector4.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_60 3 | .address_size 64 4 | 5 | .visible .entry vector4( 6 | .param .u64 input_p, 7 | .param .u64 output_p 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .v4 .u32 temp; 13 | .reg .u32 temp_scalar; 14 | 15 | ld.param.u64 in_addr, [input_p]; 16 | ld.param.u64 out_addr, [output_p]; 17 | 18 | ld.v4.u32 temp, [in_addr]; 19 | mov.b32 temp_scalar, temp.w; 20 | st.u32 [out_addr], temp_scalar; 21 | ret; 22 | } -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/vector_extract.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry vector_extract( 6 | .param .u64 input_p, 7 | .param .u64 output_p 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u16 temp1; 13 | .reg .u16 temp2; 14 | .reg .u16 temp3; 15 | .reg .u16 temp4; 16 | .reg .v4.u16 foo; 17 | 18 | ld.param.u64 in_addr, [input_p]; 19 | ld.param.u64 out_addr, [output_p]; 20 | 21 | ld.global.v4.u8 {temp1, temp2, temp3, temp4}, [in_addr]; 22 | mov.v4.u16 foo, {temp2, temp3, temp4, temp1}; 23 | mov.v4.u16 {temp3, temp4, temp1, temp2}, foo; 24 | mov.v4.u16 {temp4, temp1, temp2, temp3}, {temp3, temp4, temp1, temp2}; 25 | st.global.v4.u8 [out_addr], {temp1, temp2, temp3, temp4}; 26 | ret; 27 | } 28 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/vote_ballot.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry vote_ballot( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u32 temp1; 13 | .reg .u32 temp2; 14 | .reg .u32 temp3; 15 | .reg .u32 temp4; 16 | 17 | ld.param.u64 out_addr, [output]; 18 | 19 | vote.sync.ballot.b32 temp1, 1, 1; 20 | vote.sync.ballot.b32 temp2, 0, 0xffffff; 21 | vote.sync.ballot.b32 temp3, 1, 2; 22 | vote.sync.ballot.b32 temp4, 1, 3; 23 | 24 | st.u32 [out_addr+0], temp1; 25 | st.u32 [out_addr+4], temp2; 26 | st.u32 [out_addr+8], temp3; 27 | st.u32 [out_addr+12], temp4; 28 | ret; 29 | } 30 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/vshr.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry vshr( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u32 temp1; 13 | .reg .u32 temp2; 14 | .reg .u32 temp3; 15 | .reg .u32 temp4; 16 | 17 | ld.param.u64 in_addr, [input]; 18 | ld.param.u64 out_addr, [output]; 19 | ld.b32 temp2, [in_addr]; 20 | ld.b32 temp3, [in_addr+4]; 21 | ld.b32 temp4, [in_addr+8]; 22 | 23 | vshr.u32.u32.u32.clamp.add temp1, temp2, temp3, temp4; 24 | 25 | st.u32 [out_addr], temp1; 26 | ret; 27 | } 28 | -------------------------------------------------------------------------------- /ptx/src/test/spirv_run/xor.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry xor( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .b32 temp1; 13 | .reg .b32 temp2; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.b32 temp1, [in_addr]; 19 | ld.b32 temp2, [in_addr+4]; 20 | xor.b32 temp1, temp1, temp2; 21 | st.b32 [out_addr], temp1; 22 | ret; 23 | } 24 | -------------------------------------------------------------------------------- /rocblas-sys/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rocblas-sys" 3 | version = "0.0.0" 4 | authors = ["Andrzej Janik "] 5 | edition = "2018" 6 | links = "rocblas" 7 | 8 | [lib] 9 | -------------------------------------------------------------------------------- /rocblas-sys/README: -------------------------------------------------------------------------------- 1 | bindgen $Env:HIP_PATH/include/rocblas/rocblas.h -o src/rocblas.rs --no-layout-tests --default-enum-style=newtype --no-derive-debug --allowlist-function "rocblas_.*" --allowlist-var "ROCBLAS_*" --must-use-type rocblas_status -- -I"$Env:HIP_PATH/include" -------------------------------------------------------------------------------- /rocblas-sys/build.rs: -------------------------------------------------------------------------------- 1 | use std::env::VarError; 2 | use std::{env, path::PathBuf}; 3 | 4 | fn main() -> Result<(), VarError> { 5 | println!("cargo:rustc-link-lib=dylib=rocblas"); 6 | if cfg!(windows) { 7 | let mut path = PathBuf::from(env::var("HIP_PATH")?); 8 | path.push("lib"); 9 | println!("cargo:rustc-link-search=native={}", path.display()); 10 | } else { 11 | println!("cargo:rustc-link-search=native=/opt/rocm/lib/"); 12 | } 13 | Ok(()) 14 | } 15 | -------------------------------------------------------------------------------- /rocblas-sys/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![allow(warnings)] 2 | mod rocblas; 3 | pub use rocblas::*; -------------------------------------------------------------------------------- /rocm_smi-sys/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rocm_smi-sys" 3 | version = "0.0.0" 4 | authors = ["Andrzej Janik "] 5 | edition = "2018" 6 | links = "rocm_smi64" 7 | 8 | [lib] 9 | -------------------------------------------------------------------------------- /rocm_smi-sys/README: -------------------------------------------------------------------------------- 1 | bindgen /opt/rocm/include/rocm_smi/rocm_smi.h -o src/rocm_smi.rs --no-layout-tests --size_t-is-usize --default-enum-style=newtype --no-derive-debug --allowlist-function "rsmi_.*" --allowlist-var "RSMI.*" --must-use-type rsmi_status_t -- -I/opt/rocm/include -------------------------------------------------------------------------------- /rocm_smi-sys/build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | println!("cargo:rustc-link-lib=dylib=rocm_smi64"); 3 | println!("cargo:rustc-link-search=native=/opt/rocm/lib/"); 4 | } 5 | -------------------------------------------------------------------------------- /rocm_smi-sys/src/lib.rs: -------------------------------------------------------------------------------- 1 | #[allow(non_camel_case_types)] 2 | #[allow(non_snake_case)] 3 | #[allow(non_upper_case_globals)] 4 | mod rocm_smi; 5 | pub use rocm_smi::*; 6 | -------------------------------------------------------------------------------- /rocsolver-sys/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rocsolver-sys" 3 | version = "0.0.0" 4 | authors = ["Andrzej Janik "] 5 | edition = "2018" 6 | links = "rocsolver" 7 | 8 | [lib] 9 | -------------------------------------------------------------------------------- /rocsolver-sys/README: -------------------------------------------------------------------------------- 1 | bindgen $Env:HIP_PATH/include/rocsolver/rocsolver.h -o src/rocsolver.rs --no-layout-tests --default-enum-style=newtype --no-derive-debug --allowlist-function "rocsolver_.*" --allowlist-var "ROCSOLVER_*" --must-use-type rocblas_status -- -I"$Env:HIP_PATH/include" -------------------------------------------------------------------------------- /rocsolver-sys/build.rs: -------------------------------------------------------------------------------- 1 | use std::env::VarError; 2 | use std::{env, path::PathBuf}; 3 | 4 | fn main() -> Result<(), VarError> { 5 | println!("cargo:rustc-link-lib=dylib=rocsolver"); 6 | if cfg!(windows) { 7 | let mut path = PathBuf::from(env::var("HIP_PATH")?); 8 | path.push("lib"); 9 | println!("cargo:rustc-link-search=native={}", path.display()); 10 | } else { 11 | println!("cargo:rustc-link-search=native=/opt/rocm/lib/"); 12 | } 13 | Ok(()) 14 | } 15 | -------------------------------------------------------------------------------- /rocsolver-sys/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![allow(warnings)] 2 | mod rocsolver; 3 | pub use rocsolver::*; -------------------------------------------------------------------------------- /rocsparse-sys/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rocsparse-sys" 3 | version = "0.0.0" 4 | authors = ["Andrzej Janik "] 5 | edition = "2018" 6 | links = "rocsparse" 7 | 8 | [lib] 9 | -------------------------------------------------------------------------------- /rocsparse-sys/README: -------------------------------------------------------------------------------- 1 | bindgen $Env:HIP_PATH/include/rocsparse/rocsparse.h -o src/rocsparse.rs --no-layout-tests --default-enum-style=newtype --no-derive-debug --allowlist-function "rocsparse_.*" --allowlist-var "ROCSPARSE_*" --must-use-type rocsparse_status -- -I"$Env:HIP_PATH/include" -------------------------------------------------------------------------------- /rocsparse-sys/build.rs: -------------------------------------------------------------------------------- 1 | use std::env::VarError; 2 | use std::{env, path::PathBuf}; 3 | 4 | fn main() -> Result<(), VarError> { 5 | println!("cargo:rustc-link-lib=dylib=rocsparse"); 6 | if cfg!(windows) { 7 | let mut path = PathBuf::from(env::var("HIP_PATH")?); 8 | path.push("lib"); 9 | println!("cargo:rustc-link-search=native={}", path.display()); 10 | } else { 11 | println!("cargo:rustc-link-search=native=/opt/rocm/lib/"); 12 | } 13 | Ok(()) 14 | } 15 | -------------------------------------------------------------------------------- /rocsparse-sys/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![allow(warnings)] 2 | mod rocsparse; 3 | pub use rocsparse::*; -------------------------------------------------------------------------------- /xtask/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "xtask" 3 | version = "0.0.0" 4 | authors = ["Andrzej Janik "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | argh = "0.1" 9 | # v18 requires rust 1.70 or higher 10 | cargo_metadata = "=0.17.0" 11 | # cargo-platform is a cargo_metadata, version 0.1.6 requires rust 1.70 or higher 12 | cargo-platform = "=0.1.5" 13 | serde = "1.0.193" 14 | serde_json = "1.0.108" 15 | time = { version = "=0.3.36", features = ["local-offset"] } 16 | 17 | [target.'cfg(windows)'.dependencies] 18 | zip = { version = "0.6.6", features = ["deflate", "time"], default-features = false } 19 | 20 | [target.'cfg(unix)'.dependencies] 21 | flate2 = { version = "1.0.28", features = ["cloudflare_zlib"], default-features = false } 22 | tar = "0.4" -------------------------------------------------------------------------------- /zluda/README: -------------------------------------------------------------------------------- 1 | bindgen /usr/local/cuda/include/cuda.h -o cuda.rs --whitelist-function="^cu.*" --size_t-is-usize --default-enum-style=newtype --no-layout-tests --no-doc-comments --no-derive-debug --new-type-alias "^CUdevice$|^CUdeviceptr$" 2 | sed -i -e 's/extern "C" {//g' -e 's/-> CUresult;/-> CUresult { impl_::unsupported()/g' -e 's/pub fn /#[no_mangle] pub extern "system" fn /g' cuda.rs 3 | rustfmt cuda.rs -------------------------------------------------------------------------------- /zluda/build.rs: -------------------------------------------------------------------------------- 1 | use vergen::{Config, vergen}; 2 | 3 | fn main() { 4 | vergen(Config::default()).unwrap() 5 | } -------------------------------------------------------------------------------- /zluda/src/impl/empty_module.ptx: -------------------------------------------------------------------------------- 1 | .version 1.0 2 | .target sm_10 3 | .address_size 64 -------------------------------------------------------------------------------- /zluda/src/impl/os_unix.rs: -------------------------------------------------------------------------------- 1 | use std::ffi::c_void; 2 | 3 | pub unsafe fn heap_create() -> *mut c_void { 4 | usize::MAX as *mut _ 5 | } 6 | 7 | #[cfg(test)] 8 | pub unsafe fn load_cuda() -> *mut c_void { 9 | use libc; 10 | use std::ffi::CStr; 11 | 12 | let result = libc::dlopen( 13 | b"/usr/lib/x86_64-linux-gnu/libcuda.so.1\0".as_ptr() as _, 14 | libc::RTLD_LOCAL | libc::RTLD_LAZY, 15 | ); 16 | if result == std::ptr::null_mut() { 17 | panic!("{}", CStr::from_ptr(libc::dlerror()).to_string_lossy()); 18 | } 19 | result 20 | } 21 | 22 | #[cfg(test)] 23 | pub unsafe fn get_proc_address(handle: *mut c_void, func: &[u8]) -> *mut c_void { 24 | use libc; 25 | libc::dlsym(handle, func.as_ptr() as *const _) 26 | } 27 | -------------------------------------------------------------------------------- /zluda/src/impl/os_win.rs: -------------------------------------------------------------------------------- 1 | use std::ffi::c_void; 2 | 3 | use winapi::um::{heapapi::HeapCreate, winnt::HEAP_NO_SERIALIZE}; 4 | 5 | pub unsafe fn heap_create() -> *mut c_void { 6 | HeapCreate(HEAP_NO_SERIALIZE, 0, 0) 7 | } 8 | -------------------------------------------------------------------------------- /zluda/src/impl/surfref.rs: -------------------------------------------------------------------------------- 1 | use crate::{hip_call_cuda, r#impl::hipfix}; 2 | use cuda_types::{CUarray, CUresult}; 3 | use hip_runtime_sys::*; 4 | use std::ptr; 5 | 6 | pub(crate) unsafe fn set_array( 7 | surfref: *mut textureReference, 8 | array: CUarray, 9 | _flags: u32, 10 | ) -> Result<(), CUresult> { 11 | if array == ptr::null_mut() { 12 | return Err(CUresult::CUDA_ERROR_INVALID_VALUE); 13 | } 14 | let array = hipfix::array::get(array); 15 | let array = array.as_mut().unwrap(); 16 | hip_call_cuda!(hipTexRefSetFormat( 17 | surfref, 18 | array.Format, 19 | array.NumChannels as i32, 20 | )); 21 | hip_call_cuda!(hipTexRefSetArray(surfref, array, HIP_TRSA_OVERRIDE_FORMAT)); 22 | Ok(()) 23 | } 24 | -------------------------------------------------------------------------------- /zluda/tests/bfi.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry kernel_bfi( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .#TYPE# a; 13 | .reg .#TYPE# b; 14 | .reg .b32 c; 15 | .reg .b32 d; 16 | .reg .#TYPE# f; 17 | 18 | ld.param.u64 in_addr, [input]; 19 | ld.param.u64 out_addr, [output]; 20 | 21 | ld.#TYPE# a, [in_addr]; 22 | add.u64 in_addr, in_addr, #WIDTH#; 23 | ld.#TYPE# b, [in_addr]; 24 | add.u64 in_addr, in_addr, #WIDTH#; 25 | ld.b32 c, [in_addr]; 26 | add.u64 in_addr, in_addr, #WIDTH#; 27 | ld.b32 d, [in_addr]; 28 | 29 | bfi.#TYPE# f,a,b,c,d; 30 | 31 | st.#TYPE# [out_addr], f; 32 | 33 | ret; 34 | } 35 | -------------------------------------------------------------------------------- /zluda/tests/context_double_destroy_fails.rs: -------------------------------------------------------------------------------- 1 | use crate::common::CudaDriverFns; 2 | use cuda_types::*; 3 | use std::ptr; 4 | 5 | mod common; 6 | 7 | cuda_driver_test!(double_destroy_fails); 8 | 9 | unsafe fn double_destroy_fails(cuda: T) { 10 | assert_eq!(cuda.cuInit(0), CUresult::CUDA_SUCCESS); 11 | let mut ctx = ptr::null_mut(); 12 | assert_eq!( 13 | cuda.cuCtxCreate_v2(&mut ctx, 0, CUdevice_v1(0)), 14 | CUresult::CUDA_SUCCESS 15 | ); 16 | assert_eq!(cuda.cuCtxDestroy_v2(ctx), CUresult::CUDA_SUCCESS); 17 | let destroy_result = cuda.cuCtxDestroy_v2(ctx); 18 | // original CUDA impl returns randomly one or the other 19 | assert!( 20 | destroy_result == CUresult::CUDA_ERROR_INVALID_CONTEXT 21 | || destroy_result == CUresult::CUDA_ERROR_CONTEXT_IS_DESTROYED 22 | ); 23 | } 24 | -------------------------------------------------------------------------------- /zluda/tests/context_empty_pop_fails.rs: -------------------------------------------------------------------------------- 1 | use crate::common::CudaDriverFns; 2 | use cuda_types::*; 3 | use std::ptr; 4 | 5 | mod common; 6 | 7 | cuda_driver_test!(empty_pop_fails); 8 | 9 | unsafe fn empty_pop_fails(cuda: T) { 10 | assert_eq!(cuda.cuInit(0), CUresult::CUDA_SUCCESS); 11 | let mut ctx = ptr::null_mut(); 12 | assert_eq!( 13 | cuda.cuCtxPopCurrent_v2(&mut ctx), 14 | CUresult::CUDA_ERROR_INVALID_CONTEXT 15 | ); 16 | } 17 | -------------------------------------------------------------------------------- /zluda/tests/context_no_current_on_init.rs: -------------------------------------------------------------------------------- 1 | use crate::common::CudaDriverFns; 2 | use cuda_types::*; 3 | use std::ptr; 4 | 5 | mod common; 6 | 7 | cuda_driver_test!(no_current_on_init); 8 | 9 | unsafe fn no_current_on_init(cuda: T) { 10 | assert_eq!(cuda.cuInit(0), CUresult::CUDA_SUCCESS); 11 | let mut ctx = 1 as _; 12 | assert_eq!(cuda.cuCtxGetCurrent(&mut ctx), CUresult::CUDA_SUCCESS); 13 | assert_eq!(ctx, ptr::null_mut()); 14 | } 15 | -------------------------------------------------------------------------------- /zluda/tests/context_push_invalid_should_crash.rs: -------------------------------------------------------------------------------- 1 | use crate::common::CudaDriverFns; 2 | use cuda_types::*; 3 | 4 | mod common; 5 | 6 | cuda_driver_test!(context_push_invalid_should_crash); 7 | 8 | // This test is supposed to segfault on NV runtime, but this is impossible 9 | // to express easily in Rust right now on Windows 10 | unsafe fn context_push_invalid_should_crash(cuda: T) { 11 | assert_eq!(cuda.cuInit(0), CUresult::CUDA_SUCCESS); 12 | let mut fake_ctx = vec![0usize; 32]; 13 | let result = cuda.cuCtxPushCurrent_v2(fake_ctx.as_mut_ptr() as _); 14 | assert_eq!(result, CUresult::CUDA_ERROR_INVALID_CONTEXT); 15 | } 16 | -------------------------------------------------------------------------------- /zluda/tests/function_version.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_35 3 | .address_size 64 4 | 5 | .entry foobar() { ret; } 6 | -------------------------------------------------------------------------------- /zluda/tests/kernel_args_align.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry add( 6 | .param .u32 value_arg, 7 | .param .align 8 .b8 input[8], 8 | .param .u64 output 9 | ) 10 | { 11 | .reg .u64 in_addr; 12 | .reg .u64 out_addr; 13 | .reg .u32 value; 14 | .reg .u32 temp; 15 | .reg .u32 temp2; 16 | 17 | ld.param.u32 value, [value_arg]; 18 | ld.param.u64 in_addr, [input]; 19 | ld.param.u64 out_addr, [output]; 20 | 21 | ld.u32 temp, [in_addr]; 22 | add.u32 temp2, temp, value; 23 | st.u32 [out_addr], temp2; 24 | ret; 25 | } 26 | -------------------------------------------------------------------------------- /zluda/tests/kernel_extra.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry add( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | { 10 | .reg .u64 in_addr; 11 | .reg .u64 out_addr; 12 | .reg .u64 temp; 13 | .reg .u64 temp2; 14 | 15 | ld.param.u64 in_addr, [input]; 16 | ld.param.u64 out_addr, [output]; 17 | 18 | ld.u64 temp, [in_addr]; 19 | add.u64 temp2, temp, 1; 20 | st.u64 [out_addr], temp2; 21 | ret; 22 | } 23 | -------------------------------------------------------------------------------- /zluda/tests/kernel_sust.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .global .surfref image; 6 | 7 | .visible .entry sust( 8 | .param .b32 input_x, 9 | .param .b32 input_y, 10 | .param .b32 input_z, 11 | .param .b64 image_bindless_param, 12 | #PARAM_VALUES# 13 | ) 14 | { 15 | .reg .b32 coord_x; 16 | .reg .b32 coord_y; 17 | .reg .b32 coord_z; 18 | .reg .b32 coord_depth; 19 | .reg .u64 image_bindless; 20 | 21 | ld.param.b32 coord_x, [input_x]; 22 | ld.param.b32 coord_y, [input_y]; 23 | ld.param.b32 coord_z, [input_z]; 24 | ld.param.u64 image_bindless, [image_bindless_param]; 25 | mov.b32 coord_depth, coord_z; 26 | 27 | #REG_VALUES# 28 | 29 | sust.b.#GEOMETRY##FORMAT#.trap [#IMAGE_SRC#, #COORDINATES#], #VALUES#; 30 | ret; 31 | } 32 | -------------------------------------------------------------------------------- /zluda/tests/kernel_texref_1d.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .global .texref image; 6 | 7 | .visible .entry texref_1d( 8 | .param .s32 input_x, 9 | .param .u64 output 10 | ) 11 | { 12 | .reg .u64 out_addr; 13 | .reg .u64 temp; 14 | .reg .u64 temp2; 15 | .reg .s32 x; 16 | .reg .f32 r; 17 | .reg .f32 g; 18 | .reg .f32 b; 19 | .reg .f32 a; 20 | 21 | ld.param.s32 x, [input_x]; 22 | ld.param.u64 out_addr, [output]; 23 | 24 | tex.1d.v4.f32.s32 {r, g, b, a}, [image, {x}]; 25 | st.b32 [out_addr], a; 26 | st.b32 [out_addr+4], b; 27 | st.b32 [out_addr+8], g; 28 | st.b32 [out_addr+12], r; 29 | ret; 30 | } 31 | -------------------------------------------------------------------------------- /zluda/tests/kernel_texref_2d.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .global .texref image; 6 | 7 | .visible .entry texref( 8 | .param .f32 input_x, 9 | .param .f32 input_y, 10 | .param .u64 output 11 | ) 12 | { 13 | .reg .u64 out_addr; 14 | .reg .u64 temp; 15 | .reg .u64 temp2; 16 | .reg .f32 x; 17 | .reg .f32 y; 18 | .reg .s32 r; 19 | .reg .s32 g; 20 | .reg .s32 b; 21 | .reg .s32 a; 22 | 23 | ld.param.f32 x, [input_x]; 24 | ld.param.f32 y, [input_y]; 25 | ld.param.u64 out_addr, [output]; 26 | 27 | tex.2d.v4.s32.f32 {r, g, b, a}, [image, {x, y}]; 28 | st.b32 [out_addr], a; 29 | st.b32 [out_addr+4], b; 30 | st.b32 [out_addr+8], g; 31 | st.b32 [out_addr+12], r; 32 | ret; 33 | } 34 | -------------------------------------------------------------------------------- /zluda/tests/kernel_unused_global.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .global .align 4 .b8 global_buffer[4] = {202, 29, 180, 50}; 6 | 7 | .visible .entry kernel( 8 | .param .u64 input 9 | ) 10 | { 11 | ret; 12 | } 13 | -------------------------------------------------------------------------------- /zluda/tests/llama.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lshqqytiger/ZLUDA/5e717459179dc272b7d7d23391f0fad66c7459cf/zluda/tests/llama.bin -------------------------------------------------------------------------------- /zluda/tests/maxntid.ptx: -------------------------------------------------------------------------------- 1 | .version 6.5 2 | .target sm_30 3 | .address_size 64 4 | 5 | .visible .entry add( 6 | .param .u64 input, 7 | .param .u64 output 8 | ) 9 | .maxntid 32, 1, 1 10 | { 11 | .reg .u64 in_addr; 12 | .reg .u64 out_addr; 13 | .reg .u64 temp; 14 | .reg .u64 temp2; 15 | 16 | ld.param.u64 in_addr, [input]; 17 | ld.param.u64 out_addr, [output]; 18 | 19 | ld.u64 temp, [in_addr]; 20 | add.u64 temp2, temp, 1; 21 | st.u64 [out_addr], temp2; 22 | ret; 23 | } 24 | -------------------------------------------------------------------------------- /zluda/tests/stream_can_destroy.rs: -------------------------------------------------------------------------------- 1 | use crate::common::CudaDriverFns; 2 | use cuda_types::*; 3 | use std::ptr; 4 | 5 | mod common; 6 | 7 | cuda_driver_test!(can_destroy_stream); 8 | 9 | unsafe fn can_destroy_stream(cuda: T) { 10 | assert_eq!(cuda.cuInit(0), CUresult::CUDA_SUCCESS); 11 | let mut ctx = ptr::null_mut(); 12 | assert_eq!( 13 | cuda.cuCtxCreate_v2(&mut ctx, 0, CUdevice_v1(0)), 14 | CUresult::CUDA_SUCCESS 15 | ); 16 | let mut stream = ptr::null_mut(); 17 | assert_eq!(cuda.cuStreamCreate(&mut stream, 0), CUresult::CUDA_SUCCESS); 18 | assert_eq!(cuda.cuStreamDestroy_v2(stream), CUresult::CUDA_SUCCESS); 19 | // Cleanup 20 | assert_eq!(cuda.cuCtxDestroy_v2(ctx), CUresult::CUDA_SUCCESS); 21 | } 22 | -------------------------------------------------------------------------------- /zluda/tests/stream_cant_destroy_default.rs: -------------------------------------------------------------------------------- 1 | use crate::common::{CudaDriverFns, CU_STREAM_LEGACY}; 2 | use cuda_types::*; 3 | use std::ptr; 4 | 5 | mod common; 6 | 7 | cuda_driver_test!(cant_destroy_default_stream); 8 | 9 | unsafe fn cant_destroy_default_stream(cuda: T) { 10 | assert_eq!(cuda.cuInit(0), CUresult::CUDA_SUCCESS); 11 | let mut ctx = ptr::null_mut(); 12 | assert_eq!( 13 | cuda.cuCtxCreate_v2(&mut ctx, 0, CUdevice_v1(0)), 14 | CUresult::CUDA_SUCCESS 15 | ); 16 | assert_ne!( 17 | cuda.cuStreamDestroy_v2(CU_STREAM_LEGACY as *mut _), 18 | CUresult::CUDA_SUCCESS 19 | ); 20 | // Cleanup 21 | assert_eq!(cuda.cuCtxDestroy_v2(ctx), CUresult::CUDA_SUCCESS); 22 | } 23 | -------------------------------------------------------------------------------- /zluda_api/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "zluda_api" 3 | version = "0.0.0" 4 | authors = ["Andrzej Janik "] 5 | edition = "2018" 6 | 7 | [lib] 8 | name = "nvapi64" 9 | crate-type = ["cdylib"] 10 | 11 | [dependencies] 12 | # winapi = { version = "0.3", features = ["d3d12", "std"] } 13 | libloading = "0.8" 14 | once_cell = "1.18.0" 15 | cuda_types = { path = "../cuda_types" } 16 | 17 | [dependencies.windows] 18 | version = "0.48" 19 | features = [ 20 | "Win32_Foundation", 21 | "Win32_Graphics_Direct3D11", 22 | "Win32_Graphics_Direct3D12", 23 | "Win32_Graphics_Dxgi_Common", 24 | ] 25 | 26 | [package.metadata.zluda] 27 | debug_only = true 28 | windows_only = true 29 | skip_zip = true 30 | -------------------------------------------------------------------------------- /zluda_api/README: -------------------------------------------------------------------------------- 1 | bindgen "src\nvapi_wrapper.h" --allowlist-var="[nN][vV].*" --allowlist-type="[nN][vV].*" --blocklist-type="[dD].*" --allowlist-function="$^" --no-derive-debug --default-enum-style=newtype --no-layout-tests --no-doc-comments -o src/nvapi.rs -- -I"C:\dev\nvapi\R530-developer-2" -x c++ 2 | -------------------------------------------------------------------------------- /zluda_api/src/nvapi_wrapper.h: -------------------------------------------------------------------------------- 1 | #define __in 2 | #define __out 3 | #define __inout 4 | #define __in_opt 5 | #define __out_opt 6 | #define __inout_opt 7 | #define __in_ecount(x) 8 | #define __inout_ecount_part_opt(x,y) 9 | #define __out_ecount_full_opt(x) 10 | #define __inout_ecount_full(x) 11 | #include 12 | #include 13 | #include 14 | #include -------------------------------------------------------------------------------- /zluda_blas/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "zluda_blas" 3 | version = "0.0.0" 4 | authors = ["Andrzej Janik "] 5 | edition = "2021" 6 | 7 | [lib] 8 | name = "cublas" 9 | crate-type = ["cdylib"] 10 | 11 | [features] 12 | rocm5 = ["hip_common/rocm5", "zluda_dark_api/rocm5"] 13 | 14 | [dependencies] 15 | rocblas-sys = { path = "../rocblas-sys" } 16 | rocsolver-sys = { path = "../rocsolver-sys" } 17 | hip_common = { path = "../hip_common" } 18 | zluda_dark_api = { path = "../zluda_dark_api" } 19 | cuda_types = { path = "../cuda_types" } 20 | 21 | [package.metadata.zluda] 22 | linux_names = ["libcublas.so.10", "libcublas.so.11"] 23 | dump_names = ["libcublas.so"] 24 | -------------------------------------------------------------------------------- /zluda_blas/README: -------------------------------------------------------------------------------- 1 | bindgen /usr/local/cuda/targets/x86_64-linux/include/cublas.h -o src/cublas.rs --allowlist-function="^cublas.*" --default-enum-style=newtype --no-layout-tests --no-derive-debug -- -I/usr/local/cuda/targets/x86_64-linux/include -x c++ 2 | sed -i -e 's/extern "C" {//g' -e 's/-> cublasStatus_t;/-> cublasStatus_t { crate::unsupported()/g' -e 's/pub fn /#[no_mangle] pub extern "system" fn /g' src/cublas.rs 3 | bindgen /usr/local/cuda/targets/x86_64-linux/include/cublasXt.h -o src/cublasxt.rs --allowlist-function="^cublasXt.*" --default-enum-style=newtype --no-layout-tests --no-derive-debug -- -I/usr/local/cuda/targets/x86_64-linux/include 4 | sed -i -e 's/extern "C" {//g' -e 's/-> cublasStatus_t;/-> cublasStatus_t { crate::unsupported()/g' -e 's/pub fn /#[no_mangle] pub extern "system" fn /g' src/cublasxt.rs -------------------------------------------------------------------------------- /zluda_blas/build/wrapper.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include -------------------------------------------------------------------------------- /zluda_blaslt/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "zluda_blaslt" 3 | version = "0.0.0" 4 | authors = ["Andrzej Janik "] 5 | edition = "2021" 6 | 7 | [lib] 8 | name = "cublasLt" 9 | crate-type = ["cdylib"] 10 | 11 | [features] 12 | rocm5 = ["hip_common/rocm5", "zluda_dark_api/rocm5"] 13 | 14 | [dependencies] 15 | hipblaslt-sys = { path = "../hipblaslt-sys" } 16 | cuda_types = { path = "../cuda_types" } 17 | hip_common = { path = "../hip_common" } 18 | zluda_dark_api = { path = "../zluda_dark_api" } 19 | 20 | [package.metadata.zluda] 21 | windows_nightly = true 22 | linux_names = ["libcublasLt.so.11"] 23 | dump_names = ["libcublasLt.so"] 24 | -------------------------------------------------------------------------------- /zluda_blaslt/README: -------------------------------------------------------------------------------- 1 | bindgen /usr/local/cuda/targets/x86_64-linux/include/cublasLt.h -o src/cublaslt.rs --allowlist-function="^cublasLt.*" --blocklist-function="^cublasLtLoggerSetFile$" --allowlist-type="cu.*" --default-enum-style=newtype --no-layout-tests --no-derive-debug -- -I/usr/local/cuda/targets/x86_64-linux/include 2 | sed -i -e 's/extern "C" {//g' -e 's/-> cublasStatus_t;/-> cublasStatus_t { crate::unsupported()/g' -e 's/pub fn /#[no_mangle] pub extern "system" fn /g' src/cublaslt.rs -------------------------------------------------------------------------------- /zluda_blaslt/src/decl.rs: -------------------------------------------------------------------------------- 1 | #[macro_export] 2 | macro_rules! decl { 3 | ($name:ident) => { 4 | #[no_mangle] 5 | pub extern "system" fn $name() -> cublasStatus_t { 6 | cublasStatus_t::CUBLAS_STATUS_SUCCESS 7 | } 8 | }; 9 | } 10 | -------------------------------------------------------------------------------- /zluda_ccl/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "zluda_ccl" 3 | version = "0.0.0" 4 | authors = ["Andrzej Janik "] 5 | edition = "2018" 6 | 7 | [lib] 8 | name = "nccl" 9 | crate-type = ["cdylib"] 10 | 11 | [dependencies] 12 | 13 | [package.metadata.zluda] 14 | linux_names = ["libnccl.so.2"] 15 | dump_names = ["libnccl.so"] 16 | -------------------------------------------------------------------------------- /zluda_ccl/README: -------------------------------------------------------------------------------- 1 | bindgen /usr/include/nccl.h -o src/nccl.rs --allowlist-function="^p?nccl.*" --must-use-type "ncclResult_t" --default-enum-style=newtype --no-layout-tests --no-derive-debug -- -I/usr/local/cuda/targets/x86_64-linux/include 2 | sed -i -e 's/extern "C" {//g' -e 's/-> ncclResult_t;/-> ncclResult_t { crate::unsupported()/g' -e 's/pub fn /#[no_mangle] pub extern "C" fn /g' src/nccl.rs -------------------------------------------------------------------------------- /zluda_ccl/src/lib.rs: -------------------------------------------------------------------------------- 1 | #[allow(warnings)] 2 | mod nccl; 3 | pub use nccl::*; 4 | 5 | #[cfg(debug_assertions)] 6 | pub(crate) fn unsupported() -> ncclResult_t { 7 | unimplemented!() 8 | } 9 | 10 | #[cfg(not(debug_assertions))] 11 | pub(crate) fn unsupported() -> ncclResult_t { 12 | ncclResult_t::ncclInternalError 13 | } 14 | -------------------------------------------------------------------------------- /zluda_dark_api/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "zluda_dark_api" 3 | version = "0.0.0" 4 | authors = ["Andrzej Janik "] 5 | edition = "2018" 6 | 7 | [lib] 8 | 9 | [features] 10 | rocm5 = ["hip_common/rocm5"] 11 | 12 | [dependencies] 13 | cuda_types = { path = "../cuda_types" } 14 | hip_common = { path = "../hip_common" } 15 | bitflags = "2.4" 16 | either = "1.9" 17 | bit-vec = "0.6.3" 18 | paste = "1.0" 19 | lz4-sys = "1.9" 20 | cloudflare-zlib = "0.2.10" 21 | thread-id = "4.1.0" 22 | # we don't need elf32, but goblin has a bug where elf64 does not build without elf32 23 | goblin = { version = "0.5.1", default-features = false, features = ["elf64", "elf32"] } 24 | -------------------------------------------------------------------------------- /zluda_dnn/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "zluda_dnn" 3 | version = "0.0.0" 4 | authors = ["Andrzej Janik "] 5 | edition = "2021" 6 | 7 | [lib] 8 | name = "cudnn" 9 | crate-type = ["cdylib"] 10 | 11 | [features] 12 | rocm5 = ["hip_runtime-sys/rocm5"] 13 | 14 | [dependencies] 15 | cuda_types = { path = "../cuda_types" } 16 | hip_common = { path = "../hip_common" } 17 | miopen-sys = { path = "../miopen-sys" } 18 | hip_runtime-sys = { path = "../hip_runtime-sys" } 19 | zluda_dark_api = { path = "../zluda_dark_api" } 20 | lazy_static = "1.4.0" 21 | 22 | [package.metadata.zluda] 23 | linux_only = true # windows_nightly = true 24 | linux_names = ["libcudnn.so.9"] 25 | dump_names = ["libcudnn.so"] 26 | -------------------------------------------------------------------------------- /zluda_dnn/README: -------------------------------------------------------------------------------- 1 | bindgen /usr/local/cuda/targets/x86_64-linux/include/cudnn_v9.h -o src/cudnn.rs --allowlist-function="^cudnn.*" --default-enum-style=newtype --no-layout-tests --no-derive-debug -- -I/usr/local/cuda/targets/x86_64-linux/include 2 | sed -i -e 's/extern "C" {//g' -e 's/-> cudnnStatus_t;/-> cudnnStatus_t { crate::unsupported()/g' -e 's/pub fn /#[no_mangle] pub extern "system" fn /g' src/cudnn.rs -------------------------------------------------------------------------------- /zluda_dump/README.md: -------------------------------------------------------------------------------- 1 | grep -E '^cu.*' log.txt | sed 's/([^)]*)//g' | sort | uniq > uniq_host.txt 2 | cat *.log | grep "^Unrecognized s" | grep -Eo '`([^`]*)`' | sed -E 's/^`((@\w+ )?[^[:space:]]*).*`/\1/' | sort | uniq > uniq_statements.txt 3 | cat *.log | grep "^Unrecognized d" | grep -Eo '`([^`]*)`' | sed -E 's/^`([^`]*)`/\1/' | sort | uniq > uniq_directives.txt -------------------------------------------------------------------------------- /zluda_dump/src/events_to_csv.py: -------------------------------------------------------------------------------- 1 | # Convert event trace json to csv 2 | import json 3 | import csv 4 | import sys 5 | 6 | def main(p): 7 | with open(p, 'rb') as f: 8 | event_text = f.read() 9 | try: 10 | event_trace = json.loads(event_text) 11 | except json.JSONDecodeError: 12 | event_text = bytearray(event_text) 13 | event_text.append(ord(']')) 14 | event_trace = json.loads(event_text) 15 | with open(f'{p}.csv', 'w', newline='') as csvfile: 16 | writer = csv.writer(csvfile, dialect='excel') 17 | writer.writerow(['name', 'cat', 'ts', 'dur', 'pid', 'tid']) 18 | for e in event_trace: 19 | if e['ph'] != 'X': 20 | continue 21 | writer.writerow([e['name'], e['cat'], e['ts'], e['dur'], e['pid'], e['tid']]) 22 | 23 | if __name__ == "__main__": 24 | main(sys.argv[1]) 25 | -------------------------------------------------------------------------------- /zluda_fft/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "zluda_fft" 3 | version = "0.0.0" 4 | authors = ["Andrzej Janik "] 5 | edition = "2018" 6 | 7 | [lib] 8 | name = "cufft" 9 | crate-type = ["cdylib"] 10 | 11 | [features] 12 | rocm5 = ["hip_common/rocm5", "zluda_dark_api/rocm5"] 13 | 14 | [dependencies] 15 | hipfft-sys = { path = "../hipfft-sys" } 16 | hip_common = { path = "../hip_common" } 17 | cuda_types = { path = "../cuda_types" } 18 | zluda_dark_api = { path = "../zluda_dark_api" } 19 | slab = "0.4" 20 | lazy_static = "1.4.0" 21 | 22 | [package.metadata.zluda] 23 | linux_names = ["libcufft.so.10"] 24 | dump_names = ["libcufft.so"] 25 | -------------------------------------------------------------------------------- /zluda_fft/README: -------------------------------------------------------------------------------- 1 | bindgen /usr/local/cuda/targets/x86_64-linux/include/cufft.h -o src/cufft.rs --allowlist-function="^cufft.*" --default-enum-style=newtype --no-layout-tests --no-derive-debug -- -I/usr/local/cuda/targets/x86_64-linux/include 2 | bindgen /usr/local/cuda/targets/x86_64-linux/include/cufftXt.h --allowlist-function="^cufftXt.*" --blocklist-type="^cufft[^XB].*" --default-enum-style=newtype --no-layout-tests --no-derive-debug -o src/cufftxt.rs -- -I/usr/local/cuda/targets/x86_64-linux/include 3 | sed -i -e 's/extern "C" {//g' -e 's/-> cufftResult;/-> cufftResult { crate::unsupported()/g' -e 's/pub fn /#[no_mangle] pub extern "system" fn /g' src/cufft.rs 4 | sed -i -e 's/extern "C" {//g' -e 's/-> cufftResult;/-> cufftResult { crate::unsupported()/g' -e 's/pub fn /#[no_mangle] pub extern "system" fn /g' src/cufftxt.rs -------------------------------------------------------------------------------- /zluda_fftw/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "zluda_fftw" 3 | version = "0.0.0" 4 | authors = ["Seunghoon Lee "] 5 | edition = "2021" 6 | 7 | [lib] 8 | name = "cufftw" 9 | crate-type = ["cdylib"] 10 | 11 | [dependencies] 12 | 13 | [package.metadata.zluda] 14 | linux_names = ["libcufftw.so.10"] 15 | dump_names = ["libcufftw.so"] 16 | -------------------------------------------------------------------------------- /zluda_fftw/README: -------------------------------------------------------------------------------- 1 | bindgen /usr/local/cuda/targets/x86_64-linux/include/cufftw.h -o src/cufftw.rs --allowlist-function="^fftw.*" --default-enum-style=newtype --no-layout-tests --no-derive-debug -- -I/usr/local/cuda/targets/x86_64-linux/include 2 | sed -i -e 's/extern "C" {//g' -e 's/-> fftw_plan;/-> fftw_plan { unimplemented!()/g' -e 's/-> fftwf_plan;/-> fftwf_plan { unimplemented!()/g' -e 's/pub fn /#[no_mangle] pub extern "system" fn /g' src/cufftw.rs -------------------------------------------------------------------------------- /zluda_fftw/src/lib.rs: -------------------------------------------------------------------------------- 1 | #[allow(warnings)] 2 | mod cufftw; 3 | pub use cufftw::*; 4 | -------------------------------------------------------------------------------- /zluda_inject/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "zluda_inject" 3 | version = "0.0.0" 4 | authors = ["Andrzej Janik "] 5 | edition = "2018" 6 | 7 | [[bin]] 8 | name = "zluda" 9 | path = "src/main.rs" 10 | 11 | [target.'cfg(windows)'.dependencies] 12 | winapi = { version = "0.3.9", features = ["jobapi", "jobapi2", "processenv", "processthreadsapi", "synchapi", "winbase", "std"] } 13 | tempfile = "3" 14 | argh = "0.1" 15 | detours-sys = { path = "../detours-sys" } 16 | 17 | [dev-dependencies] 18 | # all of those are used in integration tests 19 | zluda_redirect = { path = "../zluda_redirect" } 20 | zluda_dump = { path = "../zluda_dump" } 21 | zluda_ml = { path = "../zluda_ml" } 22 | 23 | [build-dependencies] 24 | embed-manifest = "1.3.1" 25 | 26 | [package.metadata.zluda] 27 | windows_only = true 28 | -------------------------------------------------------------------------------- /zluda_inject/src/main.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | #[cfg(target_os = "windows")] 3 | mod win; 4 | #[cfg(target_os = "windows")] 5 | mod bin; 6 | 7 | #[cfg(target_os = "windows")] 8 | fn main() -> Result<(), Box> { 9 | bin::main_impl() 10 | } 11 | 12 | #[cfg(not(target_os = "windows"))] 13 | fn main() {} 14 | -------------------------------------------------------------------------------- /zluda_inject/tests/helpers/direct_cuinit.rs: -------------------------------------------------------------------------------- 1 | #![crate_type = "bin"] 2 | 3 | extern "system" { 4 | fn cuInit(flags: u32) -> u32; 5 | } 6 | 7 | fn main() { 8 | unsafe { cuInit(0) }; 9 | } 10 | -------------------------------------------------------------------------------- /zluda_inject/tests/helpers/do_cuinit.rs: -------------------------------------------------------------------------------- 1 | #![crate_type = "cdylib"] 2 | 3 | extern "system" { 4 | fn cuInit(flags: u32) -> u32; 5 | } 6 | 7 | #[no_mangle] 8 | unsafe extern "system" fn do_cuinit(flags: u32) -> u32 { 9 | cuInit(flags) 10 | } 11 | -------------------------------------------------------------------------------- /zluda_inject/tests/helpers/do_cuinit_early.rs: -------------------------------------------------------------------------------- 1 | #![crate_type = "bin"] 2 | 3 | #[link(name = "do_cuinit")] 4 | extern "system" { 5 | fn do_cuinit(flags: u32) -> u32; 6 | } 7 | 8 | fn main() { 9 | unsafe { do_cuinit(0) }; 10 | } 11 | -------------------------------------------------------------------------------- /zluda_inject/tests/helpers/do_cuinit_late.rs: -------------------------------------------------------------------------------- 1 | #![crate_type = "bin"] 2 | 3 | use std::ffi::c_void; 4 | use std::mem; 5 | use std::env; 6 | use std::path::PathBuf; 7 | use std::ffi::CString; 8 | 9 | extern "system" { 10 | fn LoadLibraryA(lpFileName: *const i8) -> *mut c_void; 11 | fn GetProcAddress(hModule: *mut c_void, lpProcName: *const u8) -> *mut c_void; 12 | } 13 | 14 | fn main() { 15 | let current_exe = env::current_exe().unwrap(); 16 | let mut dll = PathBuf::from(current_exe.parent().unwrap()); 17 | dll.push("do_cuinit.dll"); 18 | let dll_cstring = CString::new(dll.to_str().unwrap()).unwrap(); 19 | let nvcuda = unsafe { LoadLibraryA(dll_cstring.as_ptr()) }; 20 | let cu_init = unsafe { GetProcAddress(nvcuda, b"do_cuinit\0".as_ptr()) }; 21 | let cu_init = unsafe { mem::transmute::<_, unsafe extern "system" fn(u32) -> u32>(cu_init) }; 22 | unsafe { cu_init(0) }; 23 | } 24 | -------------------------------------------------------------------------------- /zluda_inject/tests/helpers/do_cuinit_late_clr.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lshqqytiger/ZLUDA/5e717459179dc272b7d7d23391f0fad66c7459cf/zluda_inject/tests/helpers/do_cuinit_late_clr.exe -------------------------------------------------------------------------------- /zluda_inject/tests/helpers/indirect_cuinit.rs: -------------------------------------------------------------------------------- 1 | #![crate_type = "bin"] 2 | 3 | use std::ffi::c_void; 4 | use std::mem; 5 | 6 | extern "system" { 7 | fn LoadLibraryA(lpFileName: *const u8) -> *mut c_void; 8 | fn GetProcAddress(hModule: *mut c_void, lpProcName: *const u8) -> *mut c_void; 9 | } 10 | 11 | fn main() { 12 | let nvcuda = unsafe { LoadLibraryA(b"C:\\Windows\\System32\\nvcuda.dll\0".as_ptr()) }; 13 | let cu_init = unsafe { GetProcAddress(nvcuda, b"cuInit\0".as_ptr()) }; 14 | let cu_init = unsafe { mem::transmute::<_, unsafe extern "system" fn(u32) -> u32>(cu_init) }; 15 | unsafe { cu_init(0) }; 16 | } 17 | -------------------------------------------------------------------------------- /zluda_inject/tests/helpers/nvcuda.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lshqqytiger/ZLUDA/5e717459179dc272b7d7d23391f0fad66c7459cf/zluda_inject/tests/helpers/nvcuda.lib -------------------------------------------------------------------------------- /zluda_inject/tests/helpers/query_exe.rs: -------------------------------------------------------------------------------- 1 | #![crate_type = "bin"] 2 | 3 | use std::io; 4 | use std::process::Command; 5 | 6 | fn main() -> io::Result<()> { 7 | let status = Command::new("query.exe").arg("session").status()?; 8 | // App returns 1 on my machine 9 | assert_eq!(status.code(), Some(1)); 10 | Ok(()) 11 | } 12 | -------------------------------------------------------------------------------- /zluda_inject/tests/helpers/subprocess.rs: -------------------------------------------------------------------------------- 1 | #![crate_type = "bin"] 2 | 3 | use std::io; 4 | use std::process::Command; 5 | 6 | fn main() -> io::Result<()> { 7 | let status = Command::new("direct_cuinit.exe").status()?; 8 | assert!(status.success()); 9 | Ok(()) 10 | } 11 | -------------------------------------------------------------------------------- /zluda_lib/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "zluda_lib" 3 | version = "0.0.0" 4 | authors = ["Andrzej Janik "] 5 | edition = "2018" 6 | 7 | [lib] 8 | name = "nvcuda" 9 | crate-type = ["cdylib"] 10 | 11 | [features] 12 | rocm5 = ["zluda/rocm5"] 13 | nightly = ["zluda/nightly"] 14 | 15 | [dependencies] 16 | zluda = { path = "../zluda" } 17 | 18 | [package.metadata.zluda] 19 | linux_names = ["libcuda.so", "libcuda.so.1"] 20 | skip_dump_link = true 21 | -------------------------------------------------------------------------------- /zluda_lib/README.md: -------------------------------------------------------------------------------- 1 | This project exist solely as a workaround, to make sure that ZLUDA-created CUDA driver does not clash with real CUDA driver when running unit tests -------------------------------------------------------------------------------- /zluda_lib/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub extern crate zluda; 2 | 3 | pub use zluda::cuda::*; 4 | 5 | // For some reason, on Linux linker strips out all our re-exports, 6 | // there's probably a cleaner solution, but for now just exporting 7 | // the function below stops it from doing so 8 | #[no_mangle] 9 | fn _zluda_very_bad_linker_hack() { 10 | let _ = unsafe { cuInit(0) }; 11 | } 12 | -------------------------------------------------------------------------------- /zluda_llvm/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "zluda_llvm" 3 | version = "0.0.0" 4 | edition = "2018" 5 | 6 | [dependencies] 7 | bitflags = "2.4" 8 | llvm-sys = { path = "../ext/llvm-sys.rs" } 9 | 10 | [build-dependencies] 11 | cc = "1.0.69" 12 | -------------------------------------------------------------------------------- /zluda_llvm/README.md: -------------------------------------------------------------------------------- 1 | LLVM-C interfaces has a decent coverage, but it does not expose everything, 2 | hence this project to fill in the gaps. 3 | Compilation order: 4 | * CMake generate llvm-project 5 | * Compile llvm-config and build subset of LLVM components 6 | * Link llvm-sys rust wrapper with LLVM componets 7 | * Compile C++ code in zluda_llvm 8 | * Link zluda_llvm Rust + zluda_llvm C++ + llvm-sys 9 | -------------------------------------------------------------------------------- /zluda_llvm/build.rs: -------------------------------------------------------------------------------- 1 | use std::env; 2 | 3 | fn main() { 4 | println!("cargo:rerun-if-changed=src/lib.cpp"); 5 | println!("cargo:rerun-if-changed=src/lib.rs"); 6 | let llvm_cxxflags = env::var("DEP_LLVM_15_CXXFLAGS").unwrap(); 7 | let mut cc = cc::Build::new(); 8 | for flag in llvm_cxxflags.split_ascii_whitespace() { 9 | cc.flag(flag); 10 | } 11 | cc.shared_flag(true) 12 | .file("src/lib.cpp") 13 | .compile("llvm_zluda_cpp"); 14 | // rustc-link-lib and rustc-link-search are already set by cc 15 | } 16 | -------------------------------------------------------------------------------- /zluda_ml/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "zluda_ml" 3 | version = "0.0.0" 4 | authors = ["Andrzej Janik "] 5 | edition = "2018" 6 | 7 | [lib] 8 | name = "nvml" 9 | crate-type = ["cdylib"] 10 | 11 | [target.'cfg(windows)'.dependencies] 12 | atiadlxx-sys = { path = "../atiadlxx-sys" } 13 | 14 | [target.'cfg(unix)'.dependencies] 15 | rocm_smi-sys = { path = "../rocm_smi-sys" } 16 | 17 | [package.metadata.zluda] 18 | linux_names = ["libnvidia-ml.so", "libnvidia-ml.so.1"] 19 | -------------------------------------------------------------------------------- /zluda_ml/README: -------------------------------------------------------------------------------- 1 | bindgen /usr/local/cuda-12/include/nvml.h --no-derive-debug --allowlist-var="^NVML.*" --allowlist-function="^nvml.*" --default-enum-style=newtype --no-layout-tests --no-doc-comments -o src/nvml.rs -- -DNVML_NO_UNVERSIONED_FUNC_DEFS 2 | sed -i -e 's/extern "C" {//g' -e 's/-> nvmlReturn_t;/-> nvmlReturn_t { crate::r#impl::unimplemented()/g' -e 's/pub fn /#[no_mangle] pub extern "C" fn /g' src/nvml.rs 3 | rustfmt src/nvml.rs -------------------------------------------------------------------------------- /zluda_ml/src/lib.rs: -------------------------------------------------------------------------------- 1 | mod common; 2 | #[cfg_attr(unix, path = "unix.rs")] 3 | #[cfg_attr(windows, path = "windows.rs")] 4 | pub mod r#impl; 5 | #[allow(warnings)] 6 | mod nvml; 7 | -------------------------------------------------------------------------------- /zluda_redirect/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "zluda_redirect" 3 | version = "0.0.0" 4 | authors = ["Andrzej Janik "] 5 | edition = "2018" 6 | 7 | [lib] 8 | crate-type = ["cdylib"] 9 | 10 | [target.'cfg(windows)'.dependencies] 11 | detours-sys = { path = "../detours-sys" } 12 | wchar = "0.6" 13 | winapi = { version = "0.3", features = ["winuser", "sysinfoapi", "memoryapi", "processthreadsapi", "winbase", "winnt", "winerror", "libloaderapi", "tlhelp32", "handleapi", "std"] } 14 | memchr = "2.5.0" 15 | 16 | [package.metadata.zluda] 17 | windows_only = true 18 | -------------------------------------------------------------------------------- /zluda_rt/bin/liboptix.so.6.5.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lshqqytiger/ZLUDA/5e717459179dc272b7d7d23391f0fad66c7459cf/zluda_rt/bin/liboptix.so.6.5.0 -------------------------------------------------------------------------------- /zluda_rt/bin/optix.6.5.0.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lshqqytiger/ZLUDA/5e717459179dc272b7d7d23391f0fad66c7459cf/zluda_rt/bin/optix.6.5.0.dll -------------------------------------------------------------------------------- /zluda_rt/build.rs: -------------------------------------------------------------------------------- 1 | use vergen::{Config, vergen}; 2 | 3 | fn main() { 4 | vergen(Config::default()).unwrap() 5 | } -------------------------------------------------------------------------------- /zluda_rt/src/tests/alloca_bug.cu: -------------------------------------------------------------------------------- 1 | // nvcc alloca_bug.cu -I"C:\dev\OptiX SDK 6.5.0\include" -ptx -x cu -dc 2 | #include 3 | #include 4 | 5 | rtDeclareVariable(rtCallableProgramId, sysBRDFEval, , ); 6 | rtBuffer sysMaterialParameters; 7 | 8 | RT_PROGRAM void closest_hit() 9 | { 10 | float3 mat = sysMaterialParameters[0]; 11 | 12 | if (mat.x != 0) 13 | { 14 | const float3 texColor = make_float3(0, 0,0); 15 | mat = make_float3(powf(texColor.x, 2.2f), 0,0); 16 | } 17 | float3 prd2; 18 | float3 f = sysBRDFEval(mat, prd2); 19 | 20 | if (prd2.x > 0.0f) 21 | prd2 *= f; 22 | } 23 | -------------------------------------------------------------------------------- /zluda_rt/src/tests/buffer_id.cu: -------------------------------------------------------------------------------- 1 | // nvcc buffer_id.cu -I"C:\dev\OptiX SDK 6.5.0\include" -ptx -x cu -dc 2 | #include 3 | #include 4 | #include 5 | 6 | using namespace optix; 7 | 8 | rtBuffer > buffers; 9 | 10 | RT_PROGRAM void start() { 11 | buffers[0][2] = 0x0118378c; 12 | buffers[0][1] = buffers[0].size(); 13 | } 14 | -------------------------------------------------------------------------------- /zluda_rt/src/tests/buffer_id_call.cu: -------------------------------------------------------------------------------- 1 | // nvcc buffer_id_call.cu -I"C:\dev\OptiX SDK 6.5.0\include" -ptx -x cu -dc 2 | #include 3 | #include 4 | #include 5 | 6 | using namespace optix; 7 | 8 | rtBuffer > buffers; 9 | 10 | __noinline__ 11 | __device__ void start2() { 12 | buffers[0][2] = 0x0118378c; 13 | buffers[0][1] = buffers[0].size(); 14 | } 15 | 16 | __noinline__ 17 | __device__ void start1() { 18 | start2(); 19 | } 20 | 21 | RT_PROGRAM void start() { 22 | start1(); 23 | } 24 | -------------------------------------------------------------------------------- /zluda_rt/src/tests/buffer_id_callable.cu: -------------------------------------------------------------------------------- 1 | // nvcc buffer_id_callable.cu -I"C:\dev\OptiX SDK 6.5.0\include" -ptx -x cu -dc 2 | #include 3 | #include 4 | #include 5 | 6 | using namespace optix; 7 | 8 | rtBuffer > buffers; 9 | rtDeclareVariable(rtCallableProgramId, program,,); 10 | 11 | RT_CALLABLE_PROGRAM void callable() { 12 | buffers[0][2] = 0x0118378c; 13 | buffers[0][1] = buffers[0].size(); 14 | } 15 | 16 | RT_PROGRAM void start() { 17 | program(); 18 | } 19 | -------------------------------------------------------------------------------- /zluda_rt/src/tests/buffer_mipmap.cu: -------------------------------------------------------------------------------- 1 | // nvcc buffer_id.cu -I"C:\dev\OptiX SDK 6.5.0\include" -ptx -x cu -dc 2 | #include 3 | #include 4 | #include 5 | 6 | using namespace optix; 7 | 8 | rtDeclareVariable( uint, texture_id, , ); 9 | rtBuffer output_buffer; 10 | 11 | RT_PROGRAM void start() { 12 | bool isResident; 13 | uint4 val0 = rtTex2DLodLoadOrRequest( texture_id, 0, 0, 0, isResident ); 14 | output_buffer[0] = make_uint2(val0.x, val0.y); 15 | uint4 val1 = rtTex2DLodLoadOrRequest( texture_id, 0, 0, 1000, isResident ); 16 | output_buffer[1] = make_uint2(val1.x, val1.y); 17 | } 18 | -------------------------------------------------------------------------------- /zluda_rt/src/tests/callable_programs.cu: -------------------------------------------------------------------------------- 1 | // nvcc callable_programs.cu -I"C:\dev\OptiX SDK 6.5.0\include" -ptx -x cu -dc 2 | #include 3 | #include 4 | #include 5 | 6 | using namespace optix; 7 | 8 | rtDeclareVariable(unsigned int, value, , ); 9 | rtBuffer output_buffer; 10 | 11 | typedef rtCallableProgramId int_operator; 12 | rtDeclareVariable(int_operator, add_fn,,); 13 | rtDeclareVariable(int_operator, mult_fn,,); 14 | 15 | RT_CALLABLE_PROGRAM unsigned int add_value(unsigned int input) { 16 | return input + value; 17 | } 18 | 19 | RT_CALLABLE_PROGRAM unsigned int multiply_value(unsigned int input) { 20 | return input * value; 21 | } 22 | 23 | RT_PROGRAM void start() { 24 | unsigned int x = value; 25 | x = add_fn(x); 26 | x = mult_fn(x); 27 | output_buffer[0] = x; 28 | } 29 | -------------------------------------------------------------------------------- /zluda_rt/src/tests/default_variable.cu: -------------------------------------------------------------------------------- 1 | // nvcc default_variable.cu -I"C:\dev\OptiX SDK 6.5.0\include" -ptx -x cu -dc 2 | #include 3 | #include 4 | #include 5 | 6 | using namespace optix; 7 | 8 | rtBuffer var_buffer; 9 | rtDeclareVariable(unsigned int, x, , ) = 55; 10 | 11 | RT_PROGRAM void start() { 12 | var_buffer[0] = x; 13 | } 14 | -------------------------------------------------------------------------------- /zluda_rt/src/tests/exception_subfunc.cu: -------------------------------------------------------------------------------- 1 | // nvcc exception.cu -I"C:\dev\OptiX SDK 6.5.0\include" -ptx -x cu -dc 2 | #include 3 | #include 4 | #include 5 | 6 | using namespace optix; 7 | 8 | rtBuffer var_buffer; 9 | rtDeclareVariable(rtObject, bvh, , ); 10 | rtDeclareVariable(uint2, launch_index, rtLaunchIndex, ); 11 | 12 | __device__ __noinline__ void trace() { 13 | Ray ray = make_Ray(make_float3(float(launch_index.x), 0, -1), make_float3(0,0,1), 0, 0.0, RT_DEFAULT_MAX); 14 | char unused = 0; 15 | rtTrace(bvh, ray, unused); 16 | } 17 | 18 | RT_PROGRAM void start() { 19 | trace(); 20 | } 21 | 22 | RT_PROGRAM void throw_() { 23 | rtThrow(RT_EXCEPTION_USER); 24 | } 25 | 26 | RT_PROGRAM void exception() { 27 | var_buffer[0] = rtGetExceptionCode(); 28 | } 29 | -------------------------------------------------------------------------------- /zluda_rt/src/tests/oob.cu: -------------------------------------------------------------------------------- 1 | // nvcc oob.cu -I"C:\dev\OptiX SDK 6.5.0\include" -ptx -x cu -dc 2 | #include 3 | #include 4 | #include 5 | 6 | using namespace optix; 7 | 8 | rtBuffer index_; 9 | rtBuffer input; 10 | rtBuffer output; 11 | 12 | RT_PROGRAM void start() { 13 | output[0] = input[index_[0]]; 14 | output[1] = input[index_[1]]; 15 | output[2] = *((unsigned int *)rt_buffer_get_id(0, 1, 4, 10,10,0,0)); 16 | } 17 | -------------------------------------------------------------------------------- /zluda_rtc/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "zluda_rtc" 3 | version = "0.0.0" 4 | authors = ["Seunghoon Lee "] 5 | edition = "2018" 6 | 7 | [lib] 8 | name = "nvrtc" 9 | crate-type = ["cdylib"] 10 | 11 | [dependencies] 12 | libloading = "0.8" 13 | lazy_static = "1.4" 14 | 15 | [package.metadata.zluda] 16 | linux_names = ["libnvrtc.so.10", "libnvrtc.so.11"] 17 | dump_names = ["libnvrtc.so"] 18 | -------------------------------------------------------------------------------- /zluda_rtc/README: -------------------------------------------------------------------------------- 1 | bindgen include/nvrtc.h -o src/nvrtc.rs --allowlist-function="^nvrtc.*" --default-enum-style=newtype --no-layout-tests --no-derive-debug --dynamic-loading LibNvrtc --dynamic-link-require-all -- -Iinclude 2 | sed -i -e 's/extern "C" {//g' -e 's/-> nvrtcResult;/-> nvrtcResult { crate::unsupported()/g' -e 's/pub fn /#[no_mangle] pub extern "system" fn /g' src/nvrtc.rs 3 | rustfmt src/nvrtc.rs -------------------------------------------------------------------------------- /zluda_runtime/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "zluda_runtime" 3 | version = "0.0.0" 4 | authors = ["Seunghoon Lee "] 5 | edition = "2018" 6 | 7 | [lib] 8 | name = "cudart" 9 | crate-type = ["cdylib"] 10 | 11 | [features] 12 | rocm5 = ["hip_common/rocm5", "hip_runtime-sys/rocm5", "zluda_dark_api/rocm5"] 13 | 14 | [dependencies] 15 | cuda_types = { path = "../cuda_types" } 16 | hip_common = { path = "../hip_common" } 17 | hip_runtime-sys = { path = "../hip_runtime-sys" } 18 | zluda_dark_api = { path = "../zluda_dark_api" } 19 | rustc-hash = "1.1" 20 | lazy_static = "1.4.0" 21 | 22 | [package.metadata.zluda] 23 | broken = true 24 | linux_names = ["libcudart.so.10", "libcudart.so.11"] 25 | dump_names = ["libcudart.so"] 26 | -------------------------------------------------------------------------------- /zluda_runtime/README: -------------------------------------------------------------------------------- 1 | bindgen $CUDA_PATH/include/cuda_runtime.h -o src/cudart.rs --allowlist-function="^cuda.*" --default-enum-style=newtype --no-layout-tests --no-derive-debug -- -I"" 2 | sed -i -e 's/extern "C" {//g' -e 's/-> cudaError_t;/-> cudaError_t { crate::unsupported()/g' -e 's/pub fn /#[no_mangle] pub extern "system" fn /g' src/cudart.rs 3 | bindgen $CUDA_PATH/include/cuda_profiler_api.h -o src/profiler.rs --allowlist-function="^cuda.*" --default-enum-style=newtype --no-layout-tests --no-derive-debug -- -I"" 4 | sed -i -e 's/extern "C" {//g' -e 's/-> cudaError_t;/-> cudaError_t { crate::unsupported()/g' -e 's/pub fn /#[no_mangle] pub extern "system" fn /g' src/profiler.rs -------------------------------------------------------------------------------- /zluda_runtime/src/decl.rs: -------------------------------------------------------------------------------- 1 | #[macro_export] 2 | macro_rules! decl { 3 | ($name:ident) => { 4 | #[no_mangle] 5 | pub extern "system" fn $name() -> cudaError_t { 6 | unimplemented!() 7 | } 8 | }; 9 | } 10 | -------------------------------------------------------------------------------- /zluda_runtime/src/extra.rs: -------------------------------------------------------------------------------- 1 | impl cudaOutputMode { 2 | #[doc = "< Output mode Key-Value pair format."] 3 | pub const cudaKeyValuePair: cudaOutputMode = cudaOutputMode(0); 4 | } 5 | impl cudaOutputMode { 6 | #[doc = "< Output mode Comma separated values format."] 7 | pub const cudaCSV: cudaOutputMode = cudaOutputMode(1); 8 | } 9 | #[repr(transparent)] 10 | #[doc = " CUDA Profiler Output modes"] 11 | #[derive(Copy, Clone, Hash, PartialEq, Eq)] 12 | pub struct cudaOutputMode(pub ::std::os::raw::c_int); 13 | #[doc = " CUDA output file modes"] 14 | pub use self::cudaOutputMode as cudaOutputMode_t; 15 | -------------------------------------------------------------------------------- /zluda_sparse/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "zluda_sparse" 3 | version = "0.0.0" 4 | authors = ["Andrzej Janik "] 5 | edition = "2018" 6 | 7 | [lib] 8 | name = "cusparse" 9 | crate-type = ["cdylib"] 10 | 11 | [features] 12 | rocm5 = ["hip_common/rocm5", "hip_runtime-sys/rocm5", "zluda_dark_api/rocm5"] 13 | 14 | [dependencies] 15 | rocsparse-sys = { path = "../rocsparse-sys" } 16 | hip_common = { path = "../hip_common" } 17 | hip_runtime-sys = { path = "../hip_runtime-sys" } 18 | zluda_dark_api = { path = "../zluda_dark_api" } 19 | cuda_types = { path = "../cuda_types" } 20 | 21 | [package.metadata.zluda] 22 | linux_names = ["libcusparse.so.11"] 23 | dump_names = ["libcusparse.so"] 24 | -------------------------------------------------------------------------------- /zluda_sparse/README: -------------------------------------------------------------------------------- 1 | bindgen /usr/local/cuda-11/targets/x86_64-linux/include/cusparse_v2.h -o src/cusparse11.rs --allowlist-function="^cusparse.*" --default-enum-style=newtype --no-layout-tests --no-derive-debug -- -I/usr/local/cuda-11/targets/x86_64-linux/include 2 | sed -i -e 's/extern "C" {//g' -e 's/-> cusparseStatus_t;/-> cusparseStatus_t { crate::unsupported()/g' -e 's/pub fn /#[no_mangle] pub extern "system" fn /g' src/cusparse.rs --------------------------------------------------------------------------------