├── INSTALL.md ├── LICENSE.TXT ├── README.md ├── bin ├── cloc.sh ├── copyright ├── intrinsics.map ├── kfd_check_installation.sh ├── mygpu ├── mymcpu ├── printhsail ├── rocm_tools ├── snack.sh ├── snackhsail.sh ├── snk_genw.sh └── snk_genwhsail.sh ├── examples ├── README.md ├── hsa │ ├── vector_copy │ │ ├── Makefile │ │ ├── obj │ │ │ └── .gitignore │ │ ├── vector_copy.c │ │ └── vector_copy.cl │ ├── vector_copy_async │ │ ├── Makefile │ │ ├── obj │ │ │ └── .gitignore │ │ ├── vector_copy.c │ │ └── vector_copy.cl │ └── vector_copy_codeobject_rocm │ │ ├── Makefile │ │ ├── README.md │ │ ├── vector_copy_codeobject.cl │ │ └── vector_copy_codeobject.cpp ├── mathdemo_hsaillib │ ├── mathdemo_hsaillib.h │ └── mathdemo_hsaillib.hsail ├── rocm_tools │ ├── README │ ├── ubuntu-carrizo-102015 │ ├── ubuntu-kaveri-060816 │ ├── ubuntu-kaveri-081616 │ ├── ubuntu-kaveri-081916 │ ├── ubuntu-kaveri-090215 │ ├── ubuntu-kaveri-092415 │ ├── ubuntu-kaveri-101615 │ └── ubuntu-kaveri-102015 └── snack │ ├── README │ ├── async_vecsum │ ├── buildrun.sh │ ├── sumKernel.cl │ └── vecsum.cpp │ ├── csquares │ ├── CSquares.cl │ ├── CSquares.cpp │ └── buildrun.sh │ ├── fortran │ ├── HelloWorld.f │ ├── buildrun.sh │ └── hw.cl │ ├── helloworld │ ├── HelloWorld.c │ ├── HelloWorld.cpp │ ├── HelloWorld.f │ ├── buildrun.sh │ └── hw.cl │ ├── matmul │ ├── buildrun.sh │ ├── matmul.c │ ├── matmul.h │ └── matmulKernels.cl │ ├── multiple_cl_files │ ├── HelloWorld.c │ ├── buildrun.sh │ ├── hw.cl │ └── hw2.cl │ ├── test_hsail_lib │ ├── buildrun.sh │ ├── test_hsail_lib.cl │ └── test_hsail_lib.cpp │ └── vector_copy │ ├── VectorCopy.cpp │ ├── buildrun.sh │ └── vector_copy.cl ├── libamdgcn ├── .old_make_method │ ├── Makefile │ ├── SOURCES │ └── create_makefile_include.sh ├── CMakeLists.txt ├── README ├── README-rsync_libclc.sh ├── libclc │ ├── CREDITS.TXT │ ├── LICENSE.TXT │ ├── README.TXT │ ├── amdgcn │ │ └── lib │ │ │ ├── OVERRIDES │ │ │ ├── SOURCES │ │ │ ├── synchronization │ │ │ └── barrier_impl.ll │ │ │ └── workitem │ │ │ ├── get_group_id.ll │ │ │ └── get_local_id.ll │ ├── amdgpu │ │ └── lib │ │ │ ├── OVERRIDES │ │ │ ├── SOURCES │ │ │ ├── atomic │ │ │ └── atomic.cl │ │ │ ├── image │ │ │ ├── get_image_attributes_impl.ll │ │ │ ├── get_image_channel_data_type.cl │ │ │ ├── get_image_channel_order.cl │ │ │ ├── get_image_depth.cl │ │ │ ├── get_image_height.cl │ │ │ ├── get_image_width.cl │ │ │ ├── read_image_impl.ll │ │ │ ├── read_imagef.cl │ │ │ ├── read_imagei.cl │ │ │ ├── read_imageui.cl │ │ │ ├── write_image_impl.ll │ │ │ ├── write_imagef.cl │ │ │ ├── write_imagei.cl │ │ │ └── write_imageui.cl │ │ │ ├── math │ │ │ ├── ldexp.cl │ │ │ ├── nextafter.cl │ │ │ └── sqrt.cl │ │ │ ├── synchronization │ │ │ └── barrier.cl │ │ │ └── workitem │ │ │ ├── get_global_size.ll │ │ │ ├── get_local_size.ll │ │ │ ├── get_num_groups.ll │ │ │ └── get_work_dim.ll │ ├── generic │ │ ├── include │ │ │ ├── clc │ │ │ │ ├── as_type.h │ │ │ │ ├── async │ │ │ │ │ ├── async_work_group_copy.h │ │ │ │ │ ├── async_work_group_copy.inc │ │ │ │ │ ├── async_work_group_strided_copy.h │ │ │ │ │ ├── async_work_group_strided_copy.inc │ │ │ │ │ ├── gentype.inc │ │ │ │ │ ├── prefetch.h │ │ │ │ │ ├── prefetch.inc │ │ │ │ │ └── wait_group_events.h │ │ │ │ ├── atomic │ │ │ │ │ ├── atomic_add.h │ │ │ │ │ ├── atomic_and.h │ │ │ │ │ ├── atomic_cmpxchg.h │ │ │ │ │ ├── atomic_dec.h │ │ │ │ │ ├── atomic_decl.inc │ │ │ │ │ ├── atomic_inc.h │ │ │ │ │ ├── atomic_max.h │ │ │ │ │ ├── atomic_min.h │ │ │ │ │ ├── atomic_or.h │ │ │ │ │ ├── atomic_sub.h │ │ │ │ │ ├── atomic_xchg.h │ │ │ │ │ └── atomic_xor.h │ │ │ │ ├── cl_khr_global_int32_base_atomics │ │ │ │ │ ├── atom_add.h │ │ │ │ │ ├── atom_cmpxchg.h │ │ │ │ │ ├── atom_dec.h │ │ │ │ │ ├── atom_inc.h │ │ │ │ │ ├── atom_sub.h │ │ │ │ │ └── atom_xchg.h │ │ │ │ ├── cl_khr_global_int32_extended_atomics │ │ │ │ │ ├── atom_and.h │ │ │ │ │ ├── atom_max.h │ │ │ │ │ ├── atom_min.h │ │ │ │ │ ├── atom_or.h │ │ │ │ │ └── atom_xor.h │ │ │ │ ├── cl_khr_local_int32_base_atomics │ │ │ │ │ ├── atom_add.h │ │ │ │ │ ├── atom_cmpxchg.h │ │ │ │ │ ├── atom_dec.h │ │ │ │ │ ├── atom_inc.h │ │ │ │ │ ├── atom_sub.h │ │ │ │ │ └── atom_xchg.h │ │ │ │ ├── cl_khr_local_int32_extended_atomics │ │ │ │ │ ├── atom_and.h │ │ │ │ │ ├── atom_max.h │ │ │ │ │ ├── atom_min.h │ │ │ │ │ ├── atom_or.h │ │ │ │ │ └── atom_xor.h │ │ │ │ ├── clc.h │ │ │ │ ├── clcfunc.h │ │ │ │ ├── clctypes.h │ │ │ │ ├── clcversion.h │ │ │ │ ├── common │ │ │ │ │ ├── degrees.h │ │ │ │ │ ├── degrees.inc │ │ │ │ │ ├── mix.h │ │ │ │ │ ├── mix.inc │ │ │ │ │ ├── radians.h │ │ │ │ │ ├── radians.inc │ │ │ │ │ ├── sign.h │ │ │ │ │ ├── smoothstep.h │ │ │ │ │ ├── smoothstep.inc │ │ │ │ │ ├── step.h │ │ │ │ │ └── step.inc │ │ │ │ ├── convert.h │ │ │ │ ├── float │ │ │ │ │ └── definitions.h │ │ │ │ ├── geometric │ │ │ │ │ ├── cross.h │ │ │ │ │ ├── distance.h │ │ │ │ │ ├── distance.inc │ │ │ │ │ ├── dot.h │ │ │ │ │ ├── dot.inc │ │ │ │ │ ├── fast_distance.h │ │ │ │ │ ├── fast_distance.inc │ │ │ │ │ ├── fast_length.h │ │ │ │ │ ├── fast_length.inc │ │ │ │ │ ├── fast_normalize.h │ │ │ │ │ ├── fast_normalize.inc │ │ │ │ │ ├── floatn.inc │ │ │ │ │ ├── length.h │ │ │ │ │ ├── length.inc │ │ │ │ │ ├── normalize.h │ │ │ │ │ └── normalize.inc │ │ │ │ ├── image │ │ │ │ │ ├── image.h │ │ │ │ │ └── image_defines.h │ │ │ │ ├── integer │ │ │ │ │ ├── abs.h │ │ │ │ │ ├── abs.inc │ │ │ │ │ ├── abs_diff.h │ │ │ │ │ ├── abs_diff.inc │ │ │ │ │ ├── add_sat.h │ │ │ │ │ ├── add_sat.inc │ │ │ │ │ ├── clz.h │ │ │ │ │ ├── clz.inc │ │ │ │ │ ├── definitions.h │ │ │ │ │ ├── gentype.inc │ │ │ │ │ ├── hadd.h │ │ │ │ │ ├── hadd.inc │ │ │ │ │ ├── integer-gentype.inc │ │ │ │ │ ├── mad24.h │ │ │ │ │ ├── mad24.inc │ │ │ │ │ ├── mad_hi.h │ │ │ │ │ ├── mad_sat.h │ │ │ │ │ ├── mad_sat.inc │ │ │ │ │ ├── mul24.h │ │ │ │ │ ├── mul24.inc │ │ │ │ │ ├── mul_hi.h │ │ │ │ │ ├── mul_hi.inc │ │ │ │ │ ├── rhadd.h │ │ │ │ │ ├── rhadd.inc │ │ │ │ │ ├── rotate.h │ │ │ │ │ ├── rotate.inc │ │ │ │ │ ├── sub_sat.h │ │ │ │ │ ├── sub_sat.inc │ │ │ │ │ └── upsample.h │ │ │ │ ├── math │ │ │ │ │ ├── acos.h │ │ │ │ │ ├── acos.inc │ │ │ │ │ ├── acosh.h │ │ │ │ │ ├── acosh.inc │ │ │ │ │ ├── acospi.h │ │ │ │ │ ├── acospi.inc │ │ │ │ │ ├── asin.h │ │ │ │ │ ├── asin.inc │ │ │ │ │ ├── asinh.h │ │ │ │ │ ├── asinh.inc │ │ │ │ │ ├── asinpi.h │ │ │ │ │ ├── asinpi.inc │ │ │ │ │ ├── atan.h │ │ │ │ │ ├── atan.inc │ │ │ │ │ ├── atan2.h │ │ │ │ │ ├── atan2.inc │ │ │ │ │ ├── atan2pi.h │ │ │ │ │ ├── atan2pi.inc │ │ │ │ │ ├── atanh.h │ │ │ │ │ ├── atanh.inc │ │ │ │ │ ├── atanpi.h │ │ │ │ │ ├── atanpi.inc │ │ │ │ │ ├── binary_decl.inc │ │ │ │ │ ├── binary_intrin.inc │ │ │ │ │ ├── ceil.h │ │ │ │ │ ├── clc_nextafter.h │ │ │ │ │ ├── copysign.h │ │ │ │ │ ├── copysign.inc │ │ │ │ │ ├── cos.h │ │ │ │ │ ├── cos.inc │ │ │ │ │ ├── cospi.h │ │ │ │ │ ├── cospi.inc │ │ │ │ │ ├── erf.h │ │ │ │ │ ├── erfc.h │ │ │ │ │ ├── exp.h │ │ │ │ │ ├── exp10.h │ │ │ │ │ ├── exp2.h │ │ │ │ │ ├── exp2.inc │ │ │ │ │ ├── fabs.h │ │ │ │ │ ├── fdim.h │ │ │ │ │ ├── fdim.inc │ │ │ │ │ ├── floor.h │ │ │ │ │ ├── fma.h │ │ │ │ │ ├── fmax.h │ │ │ │ │ ├── fmin.h │ │ │ │ │ ├── fmod.h │ │ │ │ │ ├── fmod.inc │ │ │ │ │ ├── fract.h │ │ │ │ │ ├── fract.inc │ │ │ │ │ ├── frexp.h │ │ │ │ │ ├── frexp.inc │ │ │ │ │ ├── gentype.inc │ │ │ │ │ ├── half_rsqrt.h │ │ │ │ │ ├── half_sqrt.h │ │ │ │ │ ├── hypot.h │ │ │ │ │ ├── hypot.inc │ │ │ │ │ ├── ilogb.h │ │ │ │ │ ├── ilogb.inc │ │ │ │ │ ├── ldexp.h │ │ │ │ │ ├── ldexp.inc │ │ │ │ │ ├── log.h │ │ │ │ │ ├── log.inc │ │ │ │ │ ├── log10.h │ │ │ │ │ ├── log1p.h │ │ │ │ │ ├── log1p.inc │ │ │ │ │ ├── log2.h │ │ │ │ │ ├── log2.inc │ │ │ │ │ ├── mad.h │ │ │ │ │ ├── mad.inc │ │ │ │ │ ├── modf.h │ │ │ │ │ ├── modf.inc │ │ │ │ │ ├── native_cos.h │ │ │ │ │ ├── native_divide.h │ │ │ │ │ ├── native_exp.h │ │ │ │ │ ├── native_exp10.h │ │ │ │ │ ├── native_exp2.h │ │ │ │ │ ├── native_log.h │ │ │ │ │ ├── native_log.inc │ │ │ │ │ ├── native_log2.h │ │ │ │ │ ├── native_log2.inc │ │ │ │ │ ├── native_powr.h │ │ │ │ │ ├── native_sin.h │ │ │ │ │ ├── native_sqrt.h │ │ │ │ │ ├── nextafter.h │ │ │ │ │ ├── pow.h │ │ │ │ │ ├── pown.h │ │ │ │ │ ├── rint.h │ │ │ │ │ ├── round.h │ │ │ │ │ ├── rsqrt.h │ │ │ │ │ ├── sin.h │ │ │ │ │ ├── sin.inc │ │ │ │ │ ├── sincos.h │ │ │ │ │ ├── sincos.inc │ │ │ │ │ ├── sinpi.h │ │ │ │ │ ├── sinpi.inc │ │ │ │ │ ├── sqrt.h │ │ │ │ │ ├── sqrt.inc │ │ │ │ │ ├── tan.h │ │ │ │ │ ├── tan.inc │ │ │ │ │ ├── tanh.h │ │ │ │ │ ├── tanh.inc │ │ │ │ │ ├── ternary_intrin.inc │ │ │ │ │ ├── trunc.h │ │ │ │ │ ├── unary_decl.inc │ │ │ │ │ └── unary_intrin.inc │ │ │ │ ├── relational │ │ │ │ │ ├── all.h │ │ │ │ │ ├── any.h │ │ │ │ │ ├── binary_decl.inc │ │ │ │ │ ├── bitselect.h │ │ │ │ │ ├── bitselect.inc │ │ │ │ │ ├── floatn.inc │ │ │ │ │ ├── isequal.h │ │ │ │ │ ├── isfinite.h │ │ │ │ │ ├── isgreater.h │ │ │ │ │ ├── isgreaterequal.h │ │ │ │ │ ├── isinf.h │ │ │ │ │ ├── isless.h │ │ │ │ │ ├── islessequal.h │ │ │ │ │ ├── islessgreater.h │ │ │ │ │ ├── isnan.h │ │ │ │ │ ├── isnormal.h │ │ │ │ │ ├── isnotequal.h │ │ │ │ │ ├── isordered.h │ │ │ │ │ ├── isunordered.h │ │ │ │ │ ├── select.h │ │ │ │ │ ├── signbit.h │ │ │ │ │ └── unary_decl.inc │ │ │ │ ├── shared │ │ │ │ │ ├── clamp.h │ │ │ │ │ ├── clamp.inc │ │ │ │ │ ├── max.h │ │ │ │ │ ├── max.inc │ │ │ │ │ ├── min.h │ │ │ │ │ ├── min.inc │ │ │ │ │ ├── vload.h │ │ │ │ │ └── vstore.h │ │ │ │ ├── synchronization │ │ │ │ │ ├── barrier.h │ │ │ │ │ └── cl_mem_fence_flags.h │ │ │ │ └── workitem │ │ │ │ │ ├── get_global_id.h │ │ │ │ │ ├── get_global_size.h │ │ │ │ │ ├── get_group_id.h │ │ │ │ │ ├── get_local_id.h │ │ │ │ │ ├── get_local_size.h │ │ │ │ │ ├── get_num_groups.h │ │ │ │ │ └── get_work_dim.h │ │ │ ├── config.h │ │ │ └── math │ │ │ │ ├── clc_ldexp.h │ │ │ │ ├── clc_nextafter.h │ │ │ │ ├── clc_sqrt.h │ │ │ │ └── clc_sqrt.inc │ │ └── lib │ │ │ ├── SOURCES │ │ │ ├── async │ │ │ ├── async_work_group_copy.cl │ │ │ ├── async_work_group_copy.inc │ │ │ ├── async_work_group_strided_copy.cl │ │ │ ├── async_work_group_strided_copy.inc │ │ │ ├── prefetch.cl │ │ │ ├── prefetch.inc │ │ │ └── wait_group_events.cl │ │ │ ├── atomic │ │ │ ├── atomic_impl.ll │ │ │ └── atomic_xchg.cl │ │ │ ├── cl_khr_global_int32_base_atomics │ │ │ ├── atom_add.cl │ │ │ ├── atom_cmpxchg.cl │ │ │ ├── atom_dec.cl │ │ │ ├── atom_inc.cl │ │ │ ├── atom_sub.cl │ │ │ └── atom_xchg.cl │ │ │ ├── cl_khr_global_int32_extended_atomics │ │ │ ├── atom_and.cl │ │ │ ├── atom_max.cl │ │ │ ├── atom_min.cl │ │ │ ├── atom_or.cl │ │ │ └── atom_xor.cl │ │ │ ├── cl_khr_local_int32_base_atomics │ │ │ ├── atom_add.cl │ │ │ ├── atom_cmpxchg.cl │ │ │ ├── atom_dec.cl │ │ │ ├── atom_inc.cl │ │ │ ├── atom_sub.cl │ │ │ └── atom_xchg.cl │ │ │ ├── cl_khr_local_int32_extended_atomics │ │ │ ├── atom_and.cl │ │ │ ├── atom_max.cl │ │ │ ├── atom_min.cl │ │ │ ├── atom_or.cl │ │ │ └── atom_xor.cl │ │ │ ├── clcmacro.h │ │ │ ├── common │ │ │ ├── degrees.cl │ │ │ ├── mix.cl │ │ │ ├── mix.inc │ │ │ ├── radians.cl │ │ │ ├── sign.cl │ │ │ ├── smoothstep.cl │ │ │ └── step.cl │ │ │ ├── convert.cl │ │ │ ├── gen_convert.py │ │ │ ├── geometric │ │ │ ├── cross.cl │ │ │ ├── distance.cl │ │ │ ├── distance.inc │ │ │ ├── dot.cl │ │ │ ├── fast_distance.cl │ │ │ ├── fast_distance.inc │ │ │ ├── fast_length.cl │ │ │ ├── fast_normalize.cl │ │ │ ├── fast_normalize.inc │ │ │ ├── length.cl │ │ │ └── normalize.cl │ │ │ ├── image │ │ │ └── get_image_dim.cl │ │ │ ├── integer │ │ │ ├── abs.cl │ │ │ ├── abs.inc │ │ │ ├── abs_diff.cl │ │ │ ├── abs_diff.inc │ │ │ ├── add_sat.cl │ │ │ ├── add_sat_if.ll │ │ │ ├── add_sat_impl.ll │ │ │ ├── clz.cl │ │ │ ├── clz_if.ll │ │ │ ├── clz_impl.ll │ │ │ ├── hadd.cl │ │ │ ├── hadd.inc │ │ │ ├── mad24.cl │ │ │ ├── mad24.inc │ │ │ ├── mad_sat.cl │ │ │ ├── mul24.cl │ │ │ ├── mul24.inc │ │ │ ├── mul_hi.cl │ │ │ ├── rhadd.cl │ │ │ ├── rhadd.inc │ │ │ ├── rotate.cl │ │ │ ├── rotate.inc │ │ │ ├── sub_sat.cl │ │ │ ├── sub_sat_if.ll │ │ │ ├── sub_sat_impl.ll │ │ │ └── upsample.cl │ │ │ ├── math │ │ │ ├── acos.cl │ │ │ ├── acos.inc │ │ │ ├── acosh.cl │ │ │ ├── acospi.cl │ │ │ ├── asin.cl │ │ │ ├── asin.inc │ │ │ ├── asinh.cl │ │ │ ├── asinpi.cl │ │ │ ├── atan.cl │ │ │ ├── atan2.cl │ │ │ ├── atan2pi.cl │ │ │ ├── atanh.cl │ │ │ ├── atanpi.cl │ │ │ ├── binary_impl.inc │ │ │ ├── clc_ldexp.cl │ │ │ ├── clc_nextafter.cl │ │ │ ├── clc_sqrt.cl │ │ │ ├── clc_sqrt_impl.inc │ │ │ ├── copysign.cl │ │ │ ├── cos.cl │ │ │ ├── cospi.cl │ │ │ ├── ep_log.cl │ │ │ ├── ep_log.h │ │ │ ├── erf.cl │ │ │ ├── erfc.cl │ │ │ ├── exp.cl │ │ │ ├── exp10.cl │ │ │ ├── exp10.inc │ │ │ ├── exp2.cl │ │ │ ├── exp_helper.cl │ │ │ ├── exp_helper.h │ │ │ ├── fdim.cl │ │ │ ├── fdim.inc │ │ │ ├── fmax.cl │ │ │ ├── fmax.inc │ │ │ ├── fmin.cl │ │ │ ├── fmin.inc │ │ │ ├── fmod.cl │ │ │ ├── fract.cl │ │ │ ├── fract.inc │ │ │ ├── frexp.cl │ │ │ ├── frexp.inc │ │ │ ├── half_rsqrt.cl │ │ │ ├── half_rsqrt.inc │ │ │ ├── half_sqrt.cl │ │ │ ├── half_sqrt.inc │ │ │ ├── hypot.cl │ │ │ ├── hypot.inc │ │ │ ├── ilogb.cl │ │ │ ├── ldexp.cl │ │ │ ├── ldexp.inc │ │ │ ├── log.cl │ │ │ ├── log10.cl │ │ │ ├── log10.inc │ │ │ ├── log1p.cl │ │ │ ├── log2.cl │ │ │ ├── log_base.h │ │ │ ├── mad.cl │ │ │ ├── mad.inc │ │ │ ├── math.h │ │ │ ├── modf.cl │ │ │ ├── modf.inc │ │ │ ├── native_log.cl │ │ │ ├── native_log.inc │ │ │ ├── native_log2.cl │ │ │ ├── native_log2.inc │ │ │ ├── nextafter.cl │ │ │ ├── pown.cl │ │ │ ├── sin.cl │ │ │ ├── sincos.cl │ │ │ ├── sincos.inc │ │ │ ├── sincosD_piby4.h │ │ │ ├── sincos_helpers.cl │ │ │ ├── sincos_helpers.h │ │ │ ├── sincospiF_piby4.h │ │ │ ├── sinpi.cl │ │ │ ├── sqrt.cl │ │ │ ├── tables.cl │ │ │ ├── tables.h │ │ │ ├── tan.cl │ │ │ ├── tan.inc │ │ │ └── tanh.cl │ │ │ ├── relational │ │ │ ├── all.cl │ │ │ ├── any.cl │ │ │ ├── bitselect.cl │ │ │ ├── bitselect.inc │ │ │ ├── isequal.cl │ │ │ ├── isfinite.cl │ │ │ ├── isgreater.cl │ │ │ ├── isgreaterequal.cl │ │ │ ├── isinf.cl │ │ │ ├── isless.cl │ │ │ ├── islessequal.cl │ │ │ ├── islessgreater.cl │ │ │ ├── isnan.cl │ │ │ ├── isnormal.cl │ │ │ ├── isnotequal.cl │ │ │ ├── isordered.cl │ │ │ ├── isunordered.cl │ │ │ ├── relational.h │ │ │ └── signbit.cl │ │ │ ├── shared │ │ │ ├── clamp.cl │ │ │ ├── clamp.inc │ │ │ ├── max.cl │ │ │ ├── max.inc │ │ │ ├── min.cl │ │ │ ├── min.inc │ │ │ ├── vload.cl │ │ │ └── vstore.cl │ │ │ ├── subnormal_config.cl │ │ │ ├── subnormal_disable.ll │ │ │ ├── subnormal_helper_func.ll │ │ │ ├── subnormal_use_default.ll │ │ │ └── workitem │ │ │ ├── get_global_id.cl │ │ │ └── get_global_size.cl │ └── utils │ │ └── prepare-builtins.cpp └── libclc_overrides │ ├── README │ ├── include │ └── clc │ │ ├── clc.h │ │ ├── clctypes.h │ │ └── workitem │ │ ├── get_global_id.h │ │ ├── get_group_id.h │ │ ├── get_local_id.h │ │ ├── get_local_size.h │ │ ├── get_num_groups.h │ │ ├── get_reserved_device_mem.h │ │ └── get_work_dim.h │ └── lib │ └── workitem │ ├── get_dispatch_ptr.ll │ ├── get_global_id.cl │ ├── get_global_size.ll │ ├── get_group_id.cl │ ├── get_group_id_ll.ll │ ├── get_local_id.cl │ ├── get_local_id_ll.ll │ ├── get_local_size.cl │ ├── get_local_size_ll.ll │ ├── get_num_groups.cl │ ├── get_num_groups_ll.ll │ ├── get_reserved_device_mem.cl │ ├── get_reserved_device_mem.ll │ ├── get_work_dim.cl │ ├── get_work_dim_ll.ll │ └── overloadables └── packages ├── fedora ├── libatmi-runtime-0.3-3.x86_64.rpm └── mymcpu-0.1-4.noarch.rpm └── ubuntu ├── hlc_3.2-4_amd64.deb ├── libatmi-runtime_0.3-4_amd64.deb └── mymcpu_0.1-4_all.deb /bin/intrinsics.map: -------------------------------------------------------------------------------- 1 | llvm.nvvm.barrier0 llvm.amdgcn.s.barrier 2 | llvm.nvvm.read.ptx.sreg.ctaid.x llvm.amdgcn.workgroup.id.x 3 | llvm.nvvm.read.ptx.sreg.tid.x llvm.amdgcn.workitem.id.x 4 | llvm.nvvm.read.ptx.sreg.nctaid.x llvm_amdgcn_read_num_groups_x 5 | llvm.nvvm.read.ptx.sreg.ntid.x llvm_amdgcn_read_local_size_x 6 | -------------------------------------------------------------------------------- /bin/mygpu: -------------------------------------------------------------------------------- 1 | mymcpu -------------------------------------------------------------------------------- /examples/hsa/vector_copy/Makefile: -------------------------------------------------------------------------------- 1 | TEST_NAME=vector_copy 2 | LFLAGS= -g -Wl,--unresolved-symbols=ignore-in-shared-libs 3 | INCS = -I /opt/rocm/include 4 | C_FILES := $(wildcard *.c) 5 | OBJ_FILES := $(addprefix obj/, $(notdir $(C_FILES:.c=.o))) 6 | 7 | all: $(TEST_NAME) $(TEST_NAME).brig 8 | 9 | $(TEST_NAME): $(OBJ_FILES) $(COMMON_OBJ_FILES) 10 | $(CC) $(LFLAGS) $(OBJ_FILES) -L/opt/rocm/lib -lhsa-runtime64 -o $(TEST_NAME) 11 | 12 | $(TEST_NAME).brig : 13 | cloc.sh -brig $(TEST_NAME).cl 14 | 15 | obj/%.o: %.c 16 | $(CC) -c $(CFLAGS) $(INCS) -o $@ $< -std=c99 17 | 18 | clean: 19 | rm -rf obj/*o *.brig $(TEST_NAME) 20 | 21 | 22 | test: 23 | ./$(TEST_NAME) 24 | 25 | 26 | -------------------------------------------------------------------------------- /examples/hsa/vector_copy/obj/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore everything in this directory 2 | * 3 | # Except this file 4 | !.gitignore 5 | -------------------------------------------------------------------------------- /examples/hsa/vector_copy/vector_copy.cl: -------------------------------------------------------------------------------- 1 | __kernel void vector_copy(__global int *in, __global int *out) { 2 | int id = get_global_id(0); 3 | out[id] = in[id]; 4 | } 5 | -------------------------------------------------------------------------------- /examples/hsa/vector_copy_async/Makefile: -------------------------------------------------------------------------------- 1 | 2 | TEST_NAME=vector_copy 3 | LFLAGS= -g -Wl,--unresolved-symbols=ignore-in-shared-libs 4 | INCS = -I /opt/rocm/include 5 | C_FILES := $(wildcard *.c) 6 | OBJ_FILES := $(addprefix obj/, $(notdir $(C_FILES:.c=.o))) 7 | 8 | all: $(TEST_NAME) $(TEST_NAME).brig 9 | 10 | $(TEST_NAME): $(OBJ_FILES) $(COMMON_OBJ_FILES) 11 | $(CC) $(LFLAGS) $(OBJ_FILES) -L/opt/rocm/lib -lhsa-runtime64 -o $(TEST_NAME) 12 | 13 | $(TEST_NAME).brig : 14 | cloc.sh -brig $(TEST_NAME).cl 15 | 16 | obj/%.o: %.c 17 | $(CC) -c $(CFLAGS) $(INCS) -o $@ $< -std=c99 18 | 19 | clean: 20 | rm -rf obj/*o *.brig $(TEST_NAME) 21 | 22 | 23 | test: 24 | ./$(TEST_NAME) 25 | 26 | -------------------------------------------------------------------------------- /examples/hsa/vector_copy_async/obj/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore everything in this directory 2 | * 3 | # Except this file 4 | !.gitignore 5 | -------------------------------------------------------------------------------- /examples/hsa/vector_copy_async/vector_copy.cl: -------------------------------------------------------------------------------- 1 | __kernel void vector_copy(__global int *in, __global int *out, int offset) { 2 | int id = get_global_id(0); 3 | out[id + offset] = in[id +offset]; 4 | } 5 | -------------------------------------------------------------------------------- /examples/hsa/vector_copy_codeobject_rocm/Makefile: -------------------------------------------------------------------------------- 1 | 2 | TEST_NAME=vector_copy_codeobject 3 | CL_FILE=vector_copy_codeobject 4 | #LFLAGS= -g -Wl,--unresolved-symbols=ignore-in-shared-libs 5 | LFLAGS = 6 | CFLAGS = -std=c++11 7 | 8 | CXX_FILES := $(wildcard *.cpp) 9 | OBJ_FILES := $(addprefix obj/, $(notdir $(CXX_FILES:.cpp=.o))) 10 | 11 | all: $(TEST_NAME) $(CL_FILE).hsaco 12 | 13 | $(TEST_NAME): $(OBJ_FILES) $(COMMON_OBJ_FILES) 14 | $(CXX) $(LFLAGS) $(OBJ_FILES) -L/opt/rocm/lib -lhsa-runtime64 -o $(TEST_NAME) 15 | 16 | $(CL_FILE).hsaco : 17 | cloc.sh $(CL_FILE).cl 18 | 19 | obj/%.o: %.cpp 20 | mkdir -p obj 21 | $(CXX) -I/opt/rocm/include -c $(CFLAGS) -o $@ $< 22 | 23 | clean: 24 | rm -rf obj/*o *.hsaco $(TEST_NAME) 25 | 26 | 27 | test: 28 | ./$(TEST_NAME) 29 | 30 | 31 | -------------------------------------------------------------------------------- /examples/hsa/vector_copy_codeobject_rocm/README.md: -------------------------------------------------------------------------------- 1 | vector_copy_codeobject sample modified to run on ROCM platform , 2 | so the code can run on either APU or dGPU. The changes include: 3 | 4 | - use kernarg region for kernel arguments. 5 | - find a device memory region and use this to allocate in_d and out_d memory regions. 6 | Also add commands to explicitly copy the in->in_d before running kernel, and 7 | copy out_d -> out after kernel finishes. 8 | - remove memory registration code. 9 | -------------------------------------------------------------------------------- /examples/hsa/vector_copy_codeobject_rocm/vector_copy_codeobject.cl: -------------------------------------------------------------------------------- 1 | __kernel void vector_copy(__global int *in, __global int *out) { 2 | int id = get_global_id(0); 3 | out[id] = in[id]; 4 | } 5 | -------------------------------------------------------------------------------- /examples/mathdemo_hsaillib/mathdemo_hsaillib.h: -------------------------------------------------------------------------------- 1 | float __sin(float in); 2 | float __cos(float in); 3 | float __exp(float in); 4 | -------------------------------------------------------------------------------- /examples/rocm_tools/README: -------------------------------------------------------------------------------- 1 | 2 | This directory contains output from the rocm_tools script for comparison purposes. 3 | Use the name of the file to describe your environment and date run. 4 | -------------------------------------------------------------------------------- /examples/snack/async_vecsum/buildrun.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Compile accelerated functions 4 | echo 5 | if [ -f sumKernel.o ] ; then rm sumKernel.o ; fi 6 | echo snack.sh -c sumKernel.cl 7 | snack.sh -c sumKernel.cl 8 | 9 | echo 10 | if [ -f vecsum ] ; then rm vecsum ; fi 11 | echo g++ -O3 -o vecsum sumKernel.o vecsum.cpp -L /opt/rocm/lib -lhsa-runtime64 12 | g++ -O3 -o vecsum sumKernel.o vecsum.cpp -L /opt/rocm/lib -lhsa-runtime64 13 | 14 | # Execute 15 | echo 16 | echo ./vecsum 17 | ./vecsum | tee vecsum.out 18 | -------------------------------------------------------------------------------- /examples/snack/async_vecsum/sumKernel.cl: -------------------------------------------------------------------------------- 1 | #define INT_TYPE int 2 | __kernel void sum8192Kernel(__global const INT_TYPE * x, __global INT_TYPE * result) { 3 | __local INT_TYPE buffer[512]; 4 | int gid=get_local_id(0); 5 | buffer[gid] = x[gid] + x[gid+512] + x[gid+1024] + x[gid+1536] + 6 | x[gid+2048] + x[gid+2560] + x[gid+3072] + x[gid+3584] + 7 | x[gid+4096] + x[gid+4608] + x[gid+5120] + x[gid+5632] + 8 | x[gid+6144] + x[gid+6656] + x[gid+7168] + x[gid+7680] ; 9 | barrier(CLK_LOCAL_MEM_FENCE); 10 | if(gid<256) buffer[gid] = buffer[gid]+buffer[gid+256]; 11 | barrier(CLK_LOCAL_MEM_FENCE); 12 | if(gid<128) buffer[gid] = buffer[gid]+buffer[gid+128]; 13 | barrier(CLK_LOCAL_MEM_FENCE); 14 | if(gid<64) buffer[gid] = buffer[gid]+buffer[gid+64]; 15 | barrier(CLK_LOCAL_MEM_FENCE); 16 | if(gid<32) buffer[gid] = buffer[gid]+buffer[gid+32]; 17 | if(gid<16) buffer[gid] = buffer[gid]+buffer[gid+16]; 18 | if(gid<8) buffer[gid] = buffer[gid]+buffer[gid+8]; 19 | if(gid<4) buffer[gid] = buffer[gid]+buffer[gid+4]; 20 | if(gid<2) buffer[gid] = buffer[gid]+buffer[gid+2]; 21 | if(gid == 0) result[0] = buffer[0] + buffer[1]; 22 | } 23 | -------------------------------------------------------------------------------- /examples/snack/csquares/CSquares.cl: -------------------------------------------------------------------------------- 1 | __kernel void csquares(__global float *out, __global float *in) { 2 | int i = get_global_id(0); 3 | out[i] = in[i] * in[i]; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /examples/snack/csquares/buildrun.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Compile accelerated functions 4 | echo 5 | if [ -f CSquares.o ] ; then rm CSquares.o ; fi 6 | echo snack.sh -c CSquares.cl 7 | snack.sh -c CSquares.cl 8 | 9 | # Compile Main and link to accelerated functions in CSquares.o 10 | echo 11 | if [ -f CSquares ] ; then rm CSquares ; fi 12 | echo "g++ -o CSquares CSquares.o CSquares.cpp -L/opt/rocm/lib -lhsa-runtime64 " 13 | g++ -o CSquares CSquares.o CSquares.cpp -L/opt/rocm/lib -lhsa-runtime64 14 | 15 | # Execute 16 | echo 17 | echo ./CSquares 18 | ./CSquares 19 | -------------------------------------------------------------------------------- /examples/snack/fortran/HelloWorld.f: -------------------------------------------------------------------------------- 1 | PROGRAM helloworld 2 | C cloc -c -fort will generate launch_params.f so you can set dimensions 3 | INCLUDE 'launch_params.f' 4 | 5 | INTEGER :: i,lmsg 6 | CHARACTER(LEN=*), PARAMETER :: msg = "Hello HSA World" 7 | CHARACTER inputg(*), secret(*), output(*) 8 | pointer(s_ptr,secret) 9 | pointer(o_ptr,output) 10 | pointer(i_ptr,inputg) 11 | C Malloc memory to use in GPU functions 12 | #ifdef GLOBALMALLOC 13 | s_ptr = malloc_global(64) 14 | o_ptr = malloc_global(64) 15 | i_ptr = malloc_global(64) 16 | #else 17 | s_ptr = malloc(64) 18 | o_ptr = malloc(64) 19 | i_ptr = malloc(64) 20 | #endif 21 | 22 | lmsg=LEN(msg) 23 | DO i=1,lmsg 24 | inputg(i:i) = msg(i:i) 25 | END DO 26 | 27 | C Initialize the grid dimensions defined in the launch_params.f file 28 | lparm%ndim=1 29 | lparm%gdims(1)=lmsg 30 | lparm%ldims(1)=1 31 | 32 | C Call the GPU functions 33 | C Must use HSA global register memory 34 | CALL encode(inputg,secret,lparm); 35 | PRINT*, "Coded message :",secret(1:lmsg) 36 | CALL decode(secret,output,lparm); 37 | PRINT*, "Decoded message:",output(1:lmsg) 38 | CALL SNACK_Stop(); 39 | END 40 | -------------------------------------------------------------------------------- /examples/snack/fortran/buildrun.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # First compile the acclerated functions to create hw.o 4 | # Tell cloc to use fortran names for external references 5 | echo snack.sh -fort -c hw.cl 6 | snack.sh -fort -c hw.cl 7 | 8 | # Compile the main Fortran program and link to hw.o 9 | mymcpu=`mymcpu` 10 | mymcpu=${mymcpu:-fiji} 11 | if [ "$mymcpu" == "fiji" ] ; then 12 | malloc_trigger="-DGLOBALMALLOC" 13 | else 14 | malloc_trigger="" 15 | fi 16 | echo f95 -cpp -fcray-pointer $malloc_trigger -o HelloWorld hw.o HelloWorld.f -L/opt/rocm/lib -lhsa-runtime64 17 | f95 -cpp -fcray-pointer $malloc_trigger -o HelloWorld hw.o HelloWorld.f -L/opt/rocm/lib -lhsa-runtime64 18 | 19 | echo ./HelloWorld 20 | ./HelloWorld 21 | -------------------------------------------------------------------------------- /examples/snack/fortran/hw.cl: -------------------------------------------------------------------------------- 1 | __kernel void decode(__global char* in, __global char* out) { 2 | int num = get_global_id(0); 3 | out[num] = in[num] + 1; 4 | } 5 | __kernel void encode(__global char*in, __global char* out) { 6 | int num = get_global_id(0); 7 | out[num] = in[num] - 1; 8 | } 9 | 10 | __kernel void super_encode(__global char*in, __global char* out) { 11 | int num = get_global_id(0); 12 | out[num] = in[num] - 2; 13 | } 14 | __kernel void super_decode(__global char*in, __global char* out) { 15 | int num = get_global_id(0); 16 | out[num] = in[num] + 2; 17 | } 18 | -------------------------------------------------------------------------------- /examples/snack/helloworld/HelloWorld.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "hw.h" 5 | 6 | int main(int argc, char* argv[]) { 7 | char* input_const = "Gdkkn\x1FGR@\x1FVnqkc"; 8 | size_t strlength = strlen(input_const); 9 | char *input = (char*) malloc_global(strlength + 1); 10 | char *output = (char*) malloc_global(strlength + 1); 11 | char *secode = (char*) malloc_global(strlength + 1); 12 | char *output2 = (char*) malloc_global(strlength + 1); 13 | strncpy(input,input_const,strlength); 14 | 15 | SNK_INIT_LPARM(lparm,strlength); 16 | decode(input,output,lparm); 17 | output[strlength] = '\0'; 18 | printf("Decoded :%s\n",output); 19 | /* Show we can call multiple functions in the .cl file */ 20 | super_encode(output,secode,lparm); 21 | printf("Super encoded :%s\n",secode); 22 | super_decode(secode,output2,lparm); 23 | printf("Super decoded :%s\n",output2); 24 | /* Show we can call same function multiple times */ 25 | decode(secode,output,lparm); 26 | decode(output,output2,lparm); 27 | printf("Decoded twice :%s\n",output2); 28 | free_global(input); 29 | free_global(output); 30 | free_global(secode); 31 | free_global(output2); 32 | SNACK_Stop(); 33 | return 0; 34 | } 35 | -------------------------------------------------------------------------------- /examples/snack/helloworld/HelloWorld.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | using namespace std; 5 | #include "hw.h" 6 | int main(int argc, char* argv[]) { 7 | const char* input_const = "Gdkkn\x1FGR@\x1FVnqkc"; 8 | size_t strlength = strlen(input_const); 9 | char *output = (char*) malloc_global(strlength + 1); 10 | char *input = (char*) malloc_global(strlength + 1); 11 | strncpy(input,input_const,strlength); 12 | SNK_INIT_LPARM(lparm,strlength); 13 | decode(input,output,lparm); 14 | output[strlength] = '\0'; 15 | cout << output << endl; 16 | free_global(input); 17 | free_global(output); 18 | SNACK_Stop(); 19 | return 0; 20 | 21 | } 22 | -------------------------------------------------------------------------------- /examples/snack/helloworld/HelloWorld.f: -------------------------------------------------------------------------------- 1 | PROGRAM helloworld 2 | C cloc -c -fort will generate launch_params.f so you can set dimensions 3 | INCLUDE 'launch_params.f' 4 | 5 | INTEGER :: i,lmsg 6 | CHARACTER(LEN=*), PARAMETER :: msg = "Hello HSA World" 7 | CHARACTER inputg(*), secret(*), output(*) 8 | pointer(s_ptr,secret) 9 | pointer(o_ptr,output) 10 | pointer(i_ptr,inputg) 11 | C Malloc memory to use in GPU functions 12 | #ifdef GLOBALMALLOC 13 | s_ptr = malloc_global(64) 14 | o_ptr = malloc_global(64) 15 | i_ptr = malloc_global(64) 16 | #else 17 | s_ptr = malloc(64) 18 | o_ptr = malloc(64) 19 | i_ptr = malloc(64) 20 | #endif 21 | 22 | lmsg=LEN(msg) 23 | DO i=1,lmsg 24 | inputg(i:i) = msg(i:i) 25 | END DO 26 | 27 | C Initialize the grid dimensions defined in the launch_params.f file 28 | lparm%ndim=1 29 | lparm%gdims(1)=lmsg 30 | lparm%ldims(1)=1 31 | 32 | C Call the GPU functions 33 | C Must use HSA global register memory 34 | CALL encode(inputg,secret,lparm); 35 | PRINT*, "Coded message :",secret(1:lmsg) 36 | CALL decode(secret,output,lparm); 37 | PRINT*, "Decoded message:",output(1:lmsg) 38 | CALL SNACK_Stop(); 39 | END 40 | -------------------------------------------------------------------------------- /examples/snack/helloworld/hw.cl: -------------------------------------------------------------------------------- 1 | __kernel void decode(__global const char* in, __global char* out) { 2 | int num = get_global_id(0); 3 | out[num] = in[num] + 1; 4 | } 5 | __kernel void encode(__global const char*in, __global char* out) { 6 | int num = get_global_id(0); 7 | out[num] = in[num] - 1; 8 | } 9 | 10 | __kernel void super_encode(__global const char*in, __global char* out) { 11 | int num = get_global_id(0); 12 | out[num] = in[num] - 2; 13 | } 14 | __kernel void super_decode(__global const char*in, __global char* out) { 15 | int num = get_global_id(0); 16 | out[num] = in[num] + 2; 17 | } 18 | -------------------------------------------------------------------------------- /examples/snack/matmul/buildrun.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # For this test case we need libbsd-dev for the random number generator 4 | # sudo apt-get install libbsd-dev 5 | 6 | snkcmd=snack.sh # could also use snackhsail.sh 7 | 8 | # Compile accelerated functions 9 | echo 10 | if [ -f matmulKernels.o ] ; then rm matmulKernels.o ; fi 11 | echo "$snkcmd -v -c matmulKernels.cl " 12 | $snkcmd -v -c matmulKernels.cl 13 | 14 | # Compile Main .c and link to accelerated functions in matmulKernels.o 15 | echo 16 | if [ -f matmul ] ; then rm matmul ; fi 17 | echo gcc -O3 -o matmul matmulKernels.o matmul.c -L/opt/rocm/lib -lhsa-runtime64 -lbsd 18 | gcc -O3 -o matmul matmulKernels.o matmul.c -L/opt/rocm/lib -lhsa-runtime64 -lbsd 19 | 20 | # Execute the application 21 | echo 22 | # Make sure parci 23 | #./matmul 5 6 7 24 | #./matmul 2000 2000 2000 25 | ./matmul 2048 2048 2048 26 | -------------------------------------------------------------------------------- /examples/snack/matmul/matmul.h: -------------------------------------------------------------------------------- 1 | #define BLOCK_SIZE 16 2 | -------------------------------------------------------------------------------- /examples/snack/multiple_cl_files/buildrun.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This is a demo to show that cloc can work with multiple cl files. 4 | 5 | # First compile all files with acclerated functions to create hw.o and hw2.o 6 | echo 7 | echo snack.sh -c hw.cl 8 | snack.sh -c hw.cl 9 | echo 10 | echo snack.sh -c -noglobs hw2.cl 11 | snack.sh -c -noglobs hw2.cl 12 | 13 | # Compile the main program and link to hw.o 14 | # Main program can be c, cpp, or fotran 15 | echo 16 | echo gcc -o HelloWorld hw.o hw2.o HelloWorld.c -L/opt/rocm/lib -lhsa-runtime64 17 | gcc -o HelloWorld hw.o hw2.o HelloWorld.c -L/opt/rocm/lib -lhsa-runtime64 18 | 19 | echo 20 | echo ./HelloWorld 21 | ./HelloWorld 22 | -------------------------------------------------------------------------------- /examples/snack/multiple_cl_files/hw.cl: -------------------------------------------------------------------------------- 1 | __kernel void decode(__global const char* in, __global char* out) { 2 | int num = get_global_id(0); 3 | out[num] = in[num] + 1; 4 | } 5 | __kernel void encode(__global const char*in, __global char* out) { 6 | int num = get_global_id(0); 7 | out[num] = in[num] - 1; 8 | } 9 | -------------------------------------------------------------------------------- /examples/snack/multiple_cl_files/hw2.cl: -------------------------------------------------------------------------------- 1 | __kernel void super_encode(__global const char*in, __global char* out) { 2 | int num = get_global_id(0); 3 | out[num] = in[num] - 2; 4 | } 5 | __kernel void super_decode(__global const char*in, __global char* out) { 6 | int num = get_global_id(0); 7 | out[num] = in[num] + 2; 8 | } 9 | -------------------------------------------------------------------------------- /examples/snack/test_hsail_lib/buildrun.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | # Compile accelerated functions 5 | echo 6 | if [ -f test_hsail_lib.o ] ; then rm test_hsail_lib.o ; fi 7 | echo "snackhsail.sh -v -c -hsaillib ../../mathdemo_hsaillib/mathdemo_hsaillib.hsail test_hsail_lib.cl " 8 | snackhsail.sh -v -c -hsaillib ../../mathdemo_hsaillib/mathdemo_hsaillib.hsail test_hsail_lib.cl 9 | 10 | # Compile Main and link to accelerated functions in test_hsail_lib.o 11 | echo 12 | if [ -f test_hsail_lib ] ; then rm test_hsail_lib ; fi 13 | 14 | g++ -o test_hsail_lib test_hsail_lib.o test_hsail_lib.cpp -L/opt/rocm/lib -lhsa-runtime64 -lm 15 | 16 | # Execute 17 | echo 18 | echo ./test_hsail_lib 19 | ./test_hsail_lib 20 | -------------------------------------------------------------------------------- /examples/snack/test_hsail_lib/test_hsail_lib.cl: -------------------------------------------------------------------------------- 1 | #include "../../mathdemo_hsaillib/mathdemo_hsaillib.h" 2 | __kernel void testkernel( __global float * outfval , __global const float * fval) { 3 | int i = get_global_id(0); 4 | outfval[i] = __sin(fval[i]); 5 | } 6 | -------------------------------------------------------------------------------- /examples/snack/test_hsail_lib/test_hsail_lib.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | using namespace std; 6 | #include "test_hsail_lib.h" 7 | static const int NUMELEMENTS = 40; 8 | int main(int argc, char *argv[]) { 9 | float *inArray = NEW_GLOBAL(float,NUMELEMENTS); 10 | float *outArray = NEW_GLOBAL(float,NUMELEMENTS); 11 | // initialize inArray 12 | for (int i=0; i" << outArray[i] << "," << sin(inArray[i]) << ", "; 19 | if ( (outArray[i]-sin(inArray[i])) > 1.0 ) passed = false; 20 | } 21 | cout << endl << (passed ? "PASSED" : "FAILED") << endl; 22 | return 0; 23 | } 24 | -------------------------------------------------------------------------------- /examples/snack/vector_copy/VectorCopy.cpp: -------------------------------------------------------------------------------- 1 | #include "stdio.h" 2 | #include "stdlib.h" 3 | #include "string.h" 4 | #include 5 | #include "vector_copy.h" 6 | 7 | int main(int argc, char **argv) 8 | { 9 | 10 | //Setup kernel arguments 11 | int* in=(int*)malloc_global(1024*1024*4); 12 | int* out=(int*)malloc_global(1024*1024*4); 13 | memset(out, 0, 1024*1024*4); 14 | memset(in, 1, 1024*1024*4); 15 | 16 | SNK_INIT_LPARM(lparm,1024*1024); 17 | vcopy(out,in,lparm); 18 | 19 | //Validate 20 | bool valid=true; 21 | int failIndex=0; 22 | for(int i=0; i<1024*1024; i++) { 23 | if(out[i]!=in[i]) { 24 | failIndex=i; 25 | valid=false; 26 | break; 27 | } 28 | } 29 | if(valid) 30 | printf("passed validation\n"); 31 | else 32 | printf("VALIDATION FAILED!\nBad index: %d\n", failIndex); 33 | 34 | 35 | free_global(in); 36 | free_global(out); 37 | 38 | return 0; 39 | } 40 | 41 | -------------------------------------------------------------------------------- /examples/snack/vector_copy/buildrun.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Compile accelerated functions 4 | echo 5 | if [ -f vector_copy.o ] ; then rm vector_copy.o ; fi 6 | snack.sh -q -c vector_copy.cl 7 | 8 | # Compile Main and link to accelerated functions in vector_copy.o 9 | if [ -f VectorCopy ] ; then rm VectorCopy ; fi 10 | echo g++ -o VectorCopy vector_copy.o VectorCopy.cpp -L/opt/rocm/lib -lhsa-runtime64 11 | g++ -o VectorCopy vector_copy.o VectorCopy.cpp -L/opt/rocm/lib -lhsa-runtime64 12 | # Execute 13 | echo ./VectorCopy 14 | ./VectorCopy 15 | -------------------------------------------------------------------------------- /examples/snack/vector_copy/vector_copy.cl: -------------------------------------------------------------------------------- 1 | __kernel void vcopy(__global int *a, __global int *b) { 2 | int id = get_global_id(0); 3 | b[id] = a[id]; 4 | } 5 | -------------------------------------------------------------------------------- /libamdgcn/README: -------------------------------------------------------------------------------- 1 | libamdgcn README file 2 | 3 | libamdgcn is a builtin device library for amdgcn. It is normally 4 | installed at /opt/rocm/libamdgcn. It is a superset of libclc. 5 | The CLOC (CL Offline Compiler) requires libamdgcn. The IBM Coral 6 | compiler (LLVM OpenMP 4.5) with support for amdgcn also requires 7 | libamdgcn. Currently the source for libamdgcn is stored in the 8 | libamdgcn directory of the cloc git repository. Eventually, 9 | libamdgcn will be replaced with a new device library from AMD. 10 | 11 | Install procedure: 12 | dpkg -i libamdgcn*.deb 13 | or 14 | rpm -i libamdgcn*.rpm 15 | 16 | Build from source procedure: 17 | - Install amdllvm 18 | - Install python 19 | - Assume cloc repository is in $HOME/git/cloc 20 | mkdir /tmp/build ; cd /tmp/build 21 | cmake -DCMAKE_INSTALL_PREFIX=/opt/rocm/libamdgcn $HOME/git/cloc/libamdgcn 22 | make 23 | sudo make install 24 | 25 | -------------------------------------------------------------------------------- /libamdgcn/README-rsync_libclc.sh: -------------------------------------------------------------------------------- 1 | 2 | # 3 | # This script copies the libclc github repository into the 4 | # libamdgcn directory and excludes items not used. 5 | # 6 | 7 | cd $HOME/git/cloc/libamdngcn 8 | rsync -avC --exclude ".git" --exclude "amdgcn--/" \ 9 | --exclude "generic--/" --exclude "nvptx--nvidiacl" \ 10 | --exclude "r600--/" --exclude "nvptx64--nvidiacl" \ 11 | --exclude "ptx/" --exclude "ptx-nvidiacl" \ 12 | --exclude "test/" --exclude "www/" \ 13 | --exclude "amdgcn-amdhsa/" --exclude "compile-test.sh" \ 14 | --exclude "configure.py" --exclude "Makefile" \ 15 | --exclude "libclc.pc" --exclude ".gitignore" \ 16 | --exclude "r600/" --exclude "prepare-builtins.o.d" \ 17 | --exclude "build/" $HOME/git/libclc . 18 | 19 | -------------------------------------------------------------------------------- /libamdgcn/libclc/CREDITS.TXT: -------------------------------------------------------------------------------- 1 | N: Peter Collingbourne 2 | E: peter@pcc.me.uk 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/amdgcn/lib/OVERRIDES: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HSAFoundation/CLOC/fe5372f02adb9e92812f89026f6a9ac4f462b055/libamdgcn/libclc/amdgcn/lib/OVERRIDES -------------------------------------------------------------------------------- /libamdgcn/libclc/amdgcn/lib/SOURCES: -------------------------------------------------------------------------------- 1 | synchronization/barrier_impl.ll 2 | workitem/get_group_id.ll 3 | workitem/get_local_id.ll 4 | -------------------------------------------------------------------------------- /libamdgcn/libclc/amdgcn/lib/synchronization/barrier_impl.ll: -------------------------------------------------------------------------------- 1 | declare i32 @__clc_clk_local_mem_fence() #1 2 | declare i32 @__clc_clk_global_mem_fence() #1 3 | declare void @llvm.amdgcn.s.barrier() #0 4 | 5 | define void @barrier(i32 %flags) #2 { 6 | barrier_local_test: 7 | %CLK_LOCAL_MEM_FENCE = call i32 @__clc_clk_local_mem_fence() 8 | %0 = and i32 %flags, %CLK_LOCAL_MEM_FENCE 9 | %1 = icmp ne i32 %0, 0 10 | br i1 %1, label %barrier_local, label %barrier_global_test 11 | 12 | barrier_local: 13 | call void @llvm.amdgcn.s.barrier() 14 | br label %barrier_global_test 15 | 16 | barrier_global_test: 17 | %CLK_GLOBAL_MEM_FENCE = call i32 @__clc_clk_global_mem_fence() 18 | %2 = and i32 %flags, %CLK_GLOBAL_MEM_FENCE 19 | %3 = icmp ne i32 %2, 0 20 | br i1 %3, label %barrier_global, label %done 21 | 22 | barrier_global: 23 | call void @llvm.amdgcn.s.barrier() 24 | br label %done 25 | 26 | done: 27 | ret void 28 | } 29 | 30 | attributes #0 = { nounwind convergent } 31 | attributes #1 = { nounwind alwaysinline } 32 | attributes #2 = { nounwind convergent alwaysinline } 33 | -------------------------------------------------------------------------------- /libamdgcn/libclc/amdgcn/lib/workitem/get_group_id.ll: -------------------------------------------------------------------------------- 1 | declare i32 @llvm.amdgcn.workgroup.id.x() #0 2 | declare i32 @llvm.amdgcn.workgroup.id.y() #0 3 | declare i32 @llvm.amdgcn.workgroup.id.z() #0 4 | 5 | define i32 @get_group_id(i32 %dim) #1 { 6 | switch i32 %dim, label %default [ 7 | i32 0, label %x_dim 8 | i32 1, label %y_dim 9 | i32 2, label %z_dim 10 | ] 11 | 12 | x_dim: 13 | %x = tail call i32 @llvm.amdgcn.workgroup.id.x() 14 | ret i32 %x 15 | 16 | y_dim: 17 | %y = tail call i32 @llvm.amdgcn.workgroup.id.y() 18 | ret i32 %y 19 | 20 | z_dim: 21 | %z = tail call i32 @llvm.amdgcn.workgroup.id.z() 22 | ret i32 %z 23 | 24 | default: 25 | ret i32 0 26 | } 27 | 28 | attributes #0 = { nounwind readnone } 29 | attributes #1 = { alwaysinline norecurse nounwind readnone } 30 | -------------------------------------------------------------------------------- /libamdgcn/libclc/amdgcn/lib/workitem/get_local_id.ll: -------------------------------------------------------------------------------- 1 | declare i32 @llvm.amdgcn.workitem.id.x() #0 2 | declare i32 @llvm.amdgcn.workitem.id.y() #0 3 | declare i32 @llvm.amdgcn.workitem.id.z() #0 4 | 5 | define i32 @get_local_id(i32 %dim) #1 { 6 | switch i32 %dim, label %default [ 7 | i32 0, label %x_dim 8 | i32 1, label %y_dim 9 | i32 2, label %z_dim 10 | ] 11 | 12 | x_dim: 13 | %x = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0 14 | ret i32 %x 15 | 16 | y_dim: 17 | %y = tail call i32 @llvm.amdgcn.workitem.id.y(), !range !0 18 | ret i32 %y 19 | 20 | z_dim: 21 | %z = tail call i32 @llvm.amdgcn.workitem.id.z(), !range !0 22 | ret i32 %z 23 | 24 | default: 25 | ret i32 0 26 | } 27 | 28 | attributes #0 = { nounwind readnone } 29 | attributes #1 = { alwaysinline norecurse nounwind readnone } 30 | 31 | !0 = !{ i32 0, i32 2048 } 32 | -------------------------------------------------------------------------------- /libamdgcn/libclc/amdgpu/lib/OVERRIDES: -------------------------------------------------------------------------------- 1 | workitem/get_group_id.cl 2 | workitem/get_global_size.cl 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/amdgpu/lib/SOURCES: -------------------------------------------------------------------------------- 1 | atomic/atomic.cl 2 | math/ldexp.cl 3 | math/nextafter.cl 4 | math/sqrt.cl 5 | workitem/get_num_groups.ll 6 | workitem/get_local_size.ll 7 | workitem/get_global_size.ll 8 | workitem/get_work_dim.ll 9 | synchronization/barrier.cl 10 | image/get_image_width.cl 11 | image/get_image_height.cl 12 | image/get_image_depth.cl 13 | image/get_image_channel_data_type.cl 14 | image/get_image_channel_order.cl 15 | image/get_image_attributes_impl.ll 16 | image/read_imagef.cl 17 | image/read_imagei.cl 18 | image/read_imageui.cl 19 | image/read_image_impl.ll 20 | image/write_imagef.cl 21 | image/write_imagei.cl 22 | image/write_imageui.cl 23 | image/write_image_impl.ll 24 | -------------------------------------------------------------------------------- /libamdgcn/libclc/amdgpu/lib/image/get_image_channel_data_type.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DECL int __clc_get_image_channel_data_type_2d(image2d_t); 4 | _CLC_DECL int __clc_get_image_channel_data_type_3d(image3d_t); 5 | 6 | _CLC_OVERLOAD _CLC_DEF int 7 | get_image_channel_data_type(image2d_t image) { 8 | return __clc_get_image_channel_data_type_2d(image); 9 | } 10 | _CLC_OVERLOAD _CLC_DEF int 11 | get_image_channel_data_type(image3d_t image) { 12 | return __clc_get_image_channel_data_type_3d(image); 13 | } 14 | -------------------------------------------------------------------------------- /libamdgcn/libclc/amdgpu/lib/image/get_image_channel_order.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DECL int __clc_get_image_channel_order_2d(image2d_t); 4 | _CLC_DECL int __clc_get_image_channel_order_3d(image3d_t); 5 | 6 | _CLC_OVERLOAD _CLC_DEF int 7 | get_image_channel_order(image2d_t image) { 8 | return __clc_get_image_channel_order_2d(image); 9 | } 10 | _CLC_OVERLOAD _CLC_DEF int 11 | get_image_channel_order(image3d_t image) { 12 | return __clc_get_image_channel_order_3d(image); 13 | } 14 | -------------------------------------------------------------------------------- /libamdgcn/libclc/amdgpu/lib/image/get_image_depth.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DECL int __clc_get_image_depth_3d(image3d_t); 4 | 5 | _CLC_OVERLOAD _CLC_DEF int 6 | get_image_depth(image3d_t image) { 7 | return __clc_get_image_depth_3d(image); 8 | } 9 | -------------------------------------------------------------------------------- /libamdgcn/libclc/amdgpu/lib/image/get_image_height.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DECL int __clc_get_image_height_2d(image2d_t); 4 | _CLC_DECL int __clc_get_image_height_3d(image3d_t); 5 | 6 | _CLC_OVERLOAD _CLC_DEF int 7 | get_image_height(image2d_t image) { 8 | return __clc_get_image_height_2d(image); 9 | } 10 | _CLC_OVERLOAD _CLC_DEF int 11 | get_image_height(image3d_t image) { 12 | return __clc_get_image_height_3d(image); 13 | } 14 | -------------------------------------------------------------------------------- /libamdgcn/libclc/amdgpu/lib/image/get_image_width.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DECL int __clc_get_image_width_2d(image2d_t); 4 | _CLC_DECL int __clc_get_image_width_3d(image3d_t); 5 | 6 | _CLC_OVERLOAD _CLC_DEF int 7 | get_image_width(image2d_t image) { 8 | return __clc_get_image_width_2d(image); 9 | } 10 | _CLC_OVERLOAD _CLC_DEF int 11 | get_image_width(image3d_t image) { 12 | return __clc_get_image_width_3d(image); 13 | } 14 | -------------------------------------------------------------------------------- /libamdgcn/libclc/amdgpu/lib/image/read_imagef.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DECL float4 __clc_read_imagef_tex(image2d_t, sampler_t, float2); 4 | 5 | _CLC_OVERLOAD _CLC_DEF float4 read_imagef(image2d_t image, sampler_t sampler, 6 | int2 coord) { 7 | float2 coord_float = (float2)(coord.x, coord.y); 8 | return __clc_read_imagef_tex(image, sampler, coord_float); 9 | } 10 | 11 | _CLC_OVERLOAD _CLC_DEF float4 read_imagef(image2d_t image, sampler_t sampler, 12 | float2 coord) { 13 | return __clc_read_imagef_tex(image, sampler, coord); 14 | } 15 | -------------------------------------------------------------------------------- /libamdgcn/libclc/amdgpu/lib/image/read_imagei.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DECL float4 __clc_read_imagef_tex(image2d_t, sampler_t, float2); 4 | 5 | int4 __clc_reinterpret_v4f_to_v4i(float4 v) { 6 | union { 7 | int4 v4i; 8 | float4 v4f; 9 | } res = { .v4f = v}; 10 | return res.v4i; 11 | } 12 | 13 | _CLC_OVERLOAD _CLC_DEF int4 read_imagei(image2d_t image, sampler_t sampler, 14 | int2 coord) { 15 | float2 coord_float = (float2)(coord.x, coord.y); 16 | return __clc_reinterpret_v4f_to_v4i( 17 | __clc_read_imagef_tex(image, sampler, coord_float)); 18 | } 19 | _CLC_OVERLOAD _CLC_DEF int4 read_imagei(image2d_t image, sampler_t sampler, 20 | float2 coord) { 21 | return __clc_reinterpret_v4f_to_v4i( 22 | __clc_read_imagef_tex(image, sampler, coord)); 23 | } 24 | -------------------------------------------------------------------------------- /libamdgcn/libclc/amdgpu/lib/image/read_imageui.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DECL float4 __clc_read_imagef_tex(image2d_t, sampler_t, float2); 4 | 5 | uint4 __clc_reinterpret_v4f_to_v4ui(float4 v) { 6 | union { 7 | uint4 v4ui; 8 | float4 v4f; 9 | } res = { .v4f = v}; 10 | return res.v4ui; 11 | } 12 | 13 | _CLC_OVERLOAD _CLC_DEF uint4 read_imageui(image2d_t image, sampler_t sampler, 14 | int2 coord) { 15 | float2 coord_float = (float2)(coord.x, coord.y); 16 | return __clc_reinterpret_v4f_to_v4ui( 17 | __clc_read_imagef_tex(image, sampler, coord_float)); 18 | } 19 | _CLC_OVERLOAD _CLC_DEF uint4 read_imageui(image2d_t image, sampler_t sampler, 20 | float2 coord) { 21 | return __clc_reinterpret_v4f_to_v4ui( 22 | __clc_read_imagef_tex(image, sampler, coord)); 23 | } 24 | -------------------------------------------------------------------------------- /libamdgcn/libclc/amdgpu/lib/image/write_imagef.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DECL void __clc_write_imagef_2d(image2d_t image, int2 coord, float4 color); 4 | 5 | _CLC_OVERLOAD _CLC_DEF void 6 | write_imagef(image2d_t image, int2 coord, float4 color) 7 | { 8 | __clc_write_imagef_2d(image, coord, color); 9 | } 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/amdgpu/lib/image/write_imagei.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DECL void __clc_write_imagei_2d(image2d_t image, int2 coord, int4 color); 4 | 5 | _CLC_OVERLOAD _CLC_DEF void 6 | write_imagei(image2d_t image, int2 coord, int4 color) 7 | { 8 | __clc_write_imagei_2d(image, coord, color); 9 | } 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/amdgpu/lib/image/write_imageui.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DECL void __clc_write_imageui_2d(image2d_t image, int2 coord, uint4 color); 4 | 5 | _CLC_OVERLOAD _CLC_DEF void 6 | write_imageui(image2d_t image, int2 coord, uint4 color) 7 | { 8 | __clc_write_imageui_2d(image, coord, color); 9 | } 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/amdgpu/lib/math/nextafter.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../lib/clcmacro.h" 3 | 4 | _CLC_DEFINE_BINARY_BUILTIN(float, nextafter, __clc_nextafter, float, float) 5 | -------------------------------------------------------------------------------- /libamdgcn/libclc/amdgpu/lib/synchronization/barrier.cl: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | _CLC_DEF int __clc_clk_local_mem_fence() { 5 | return CLK_LOCAL_MEM_FENCE; 6 | } 7 | 8 | _CLC_DEF int __clc_clk_global_mem_fence() { 9 | return CLK_GLOBAL_MEM_FENCE; 10 | } 11 | -------------------------------------------------------------------------------- /libamdgcn/libclc/amdgpu/lib/workitem/get_global_size.ll: -------------------------------------------------------------------------------- 1 | declare i32 @llvm.r600.read.global.size.x() nounwind readnone 2 | declare i32 @llvm.r600.read.global.size.y() nounwind readnone 3 | declare i32 @llvm.r600.read.global.size.z() nounwind readnone 4 | 5 | define i32 @get_global_size(i32 %dim) nounwind readnone alwaysinline { 6 | switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim] 7 | x_dim: 8 | %x = call i32 @llvm.r600.read.global.size.x() nounwind readnone 9 | ret i32 %x 10 | y_dim: 11 | %y = call i32 @llvm.r600.read.global.size.y() nounwind readnone 12 | ret i32 %y 13 | z_dim: 14 | %z = call i32 @llvm.r600.read.global.size.z() nounwind readnone 15 | ret i32 %z 16 | default: 17 | ret i32 0 18 | } 19 | -------------------------------------------------------------------------------- /libamdgcn/libclc/amdgpu/lib/workitem/get_local_size.ll: -------------------------------------------------------------------------------- 1 | declare i32 @llvm.r600.read.local.size.x() nounwind readnone 2 | declare i32 @llvm.r600.read.local.size.y() nounwind readnone 3 | declare i32 @llvm.r600.read.local.size.z() nounwind readnone 4 | 5 | define i32 @get_local_size(i32 %dim) nounwind readnone alwaysinline { 6 | switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim] 7 | x_dim: 8 | %x = call i32 @llvm.r600.read.local.size.x() nounwind readnone 9 | ret i32 %x 10 | y_dim: 11 | %y = call i32 @llvm.r600.read.local.size.y() nounwind readnone 12 | ret i32 %y 13 | z_dim: 14 | %z = call i32 @llvm.r600.read.local.size.z() nounwind readnone 15 | ret i32 %z 16 | default: 17 | ret i32 0 18 | } 19 | -------------------------------------------------------------------------------- /libamdgcn/libclc/amdgpu/lib/workitem/get_num_groups.ll: -------------------------------------------------------------------------------- 1 | declare i32 @llvm.r600.read.ngroups.x() nounwind readnone 2 | declare i32 @llvm.r600.read.ngroups.y() nounwind readnone 3 | declare i32 @llvm.r600.read.ngroups.z() nounwind readnone 4 | 5 | define i32 @get_num_groups(i32 %dim) nounwind readnone alwaysinline { 6 | switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim] 7 | x_dim: 8 | %x = call i32 @llvm.r600.read.ngroups.x() nounwind readnone 9 | ret i32 %x 10 | y_dim: 11 | %y = call i32 @llvm.r600.read.ngroups.y() nounwind readnone 12 | ret i32 %y 13 | z_dim: 14 | %z = call i32 @llvm.r600.read.ngroups.z() nounwind readnone 15 | ret i32 %z 16 | default: 17 | ret i32 0 18 | } 19 | -------------------------------------------------------------------------------- /libamdgcn/libclc/amdgpu/lib/workitem/get_work_dim.ll: -------------------------------------------------------------------------------- 1 | declare i32 @llvm.AMDGPU.read.workdim() nounwind readnone 2 | 3 | define i32 @get_work_dim() nounwind readnone alwaysinline { 4 | %x = call i32 @llvm.AMDGPU.read.workdim() nounwind readnone , !range !0 5 | ret i32 %x 6 | } 7 | 8 | !0 = !{ i32 1, i32 4 } 9 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/async/async_work_group_copy.h: -------------------------------------------------------------------------------- 1 | #define __CLC_DST_ADDR_SPACE local 2 | #define __CLC_SRC_ADDR_SPACE global 3 | #define __CLC_BODY 4 | #include 5 | #undef __CLC_DST_ADDR_SPACE 6 | #undef __CLC_SRC_ADDR_SPACE 7 | #undef __CLC_BODY 8 | 9 | #define __CLC_DST_ADDR_SPACE global 10 | #define __CLC_SRC_ADDR_SPACE local 11 | #define __CLC_BODY 12 | #include 13 | #undef __CLC_DST_ADDR_SPACE 14 | #undef __CLC_SRC_ADDR_SPACE 15 | #undef __CLC_BODY 16 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/async/async_work_group_copy.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL event_t async_work_group_copy( 2 | __CLC_DST_ADDR_SPACE __CLC_GENTYPE *dst, 3 | const __CLC_SRC_ADDR_SPACE __CLC_GENTYPE *src, 4 | size_t num_gentypes, 5 | event_t event); 6 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/async/async_work_group_strided_copy.h: -------------------------------------------------------------------------------- 1 | #define __CLC_DST_ADDR_SPACE local 2 | #define __CLC_SRC_ADDR_SPACE global 3 | #define __CLC_BODY 4 | #include 5 | #undef __CLC_DST_ADDR_SPACE 6 | #undef __CLC_SRC_ADDR_SPACE 7 | #undef __CLC_BODY 8 | 9 | #define __CLC_DST_ADDR_SPACE global 10 | #define __CLC_SRC_ADDR_SPACE local 11 | #define __CLC_BODY 12 | #include 13 | #undef __CLC_DST_ADDR_SPACE 14 | #undef __CLC_SRC_ADDR_SPACE 15 | #undef __CLC_BODY 16 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/async/async_work_group_strided_copy.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL event_t async_work_group_strided_copy( 2 | __CLC_DST_ADDR_SPACE __CLC_GENTYPE *dst, 3 | const __CLC_SRC_ADDR_SPACE __CLC_GENTYPE *src, 4 | size_t num_gentypes, 5 | size_t stride, 6 | event_t event); 7 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/async/prefetch.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | #undef __CLC_BODY 4 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/async/prefetch.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL void prefetch(const global __CLC_GENTYPE *p, size_t num_gentypes); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/async/wait_group_events.h: -------------------------------------------------------------------------------- 1 | void wait_group_events(int num_events, event_t *event_list); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/atomic/atomic_add.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atomic_add 2 | #include 3 | #undef __CLC_FUNCTION 4 | #undef __CLC_DECLARE_ATOMIC 5 | #undef __CLC_DECLARE_ATOMIC_ADDRSPACE 6 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/atomic/atomic_and.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atomic_and 2 | #include 3 | #undef __CLC_FUNCTION 4 | #undef __CLC_DECLARE_ATOMIC 5 | #undef __CLC_DECLARE_ATOMIC_ADDRSPACE 6 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/atomic/atomic_cmpxchg.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atomic_cmpxchg 2 | 3 | #define __CLC_DECLARE_ATOMIC_3_ARG(ADDRSPACE, TYPE) \ 4 | _CLC_OVERLOAD _CLC_DECL TYPE __CLC_FUNCTION (volatile ADDRSPACE TYPE *, TYPE, TYPE); 5 | 6 | #define __CLC_DECLARE_ATOMIC_ADDRSPACE_3_ARG(TYPE) \ 7 | __CLC_DECLARE_ATOMIC_3_ARG(global, TYPE) \ 8 | __CLC_DECLARE_ATOMIC_3_ARG(local, TYPE) 9 | 10 | __CLC_DECLARE_ATOMIC_ADDRSPACE_3_ARG(int) 11 | __CLC_DECLARE_ATOMIC_ADDRSPACE_3_ARG(uint) 12 | 13 | #undef __CLC_FUNCTION 14 | #undef __CLC_DECLARE_ATOMIC_3_ARG 15 | #undef __CLC_DECLARE_ATOMIC_ADDRESS_SPACE_3_ARG 16 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/atomic/atomic_dec.h: -------------------------------------------------------------------------------- 1 | #define atomic_dec(p) atomic_sub(p, 1) 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/atomic/atomic_decl.inc: -------------------------------------------------------------------------------- 1 | 2 | #define __CLC_DECLARE_ATOMIC(ADDRSPACE, TYPE) \ 3 | _CLC_OVERLOAD _CLC_DECL TYPE __CLC_FUNCTION (volatile ADDRSPACE TYPE *, TYPE); 4 | 5 | #define __CLC_DECLARE_ATOMIC_ADDRSPACE(TYPE) \ 6 | __CLC_DECLARE_ATOMIC(global, TYPE) \ 7 | __CLC_DECLARE_ATOMIC(local, TYPE) 8 | 9 | __CLC_DECLARE_ATOMIC_ADDRSPACE(int) 10 | __CLC_DECLARE_ATOMIC_ADDRSPACE(uint) 11 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/atomic/atomic_inc.h: -------------------------------------------------------------------------------- 1 | #define atomic_inc(p) atomic_add(p, 1) 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/atomic/atomic_max.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atomic_max 2 | #include 3 | #undef __CLC_FUNCTION 4 | #undef __CLC_DECLARE_ATOMIC 5 | #undef __CLC_DECLARE_ATOMIC_ADDRSPACE 6 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/atomic/atomic_min.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atomic_min 2 | #include 3 | #undef __CLC_FUNCTION 4 | #undef __CLC_DECLARE_ATOMIC 5 | #undef __CLC_DECLARE_ATOMIC_ADDRSPACE 6 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/atomic/atomic_or.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atomic_or 2 | #include 3 | #undef __CLC_FUNCTION 4 | #undef __CLC_DECLARE_ATOMIC 5 | #undef __CLC_DECLARE_ATOMIC_ADDRSPACE 6 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/atomic/atomic_sub.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atomic_sub 2 | #include 3 | #undef __CLC_FUNCTION 4 | #undef __CLC_DECLARE_ATOMIC 5 | #undef __CLC_DECLARE_ATOMIC_ADDRSPACE 6 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/atomic/atomic_xchg.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atomic_xchg 2 | #include 3 | __CLC_DECLARE_ATOMIC_ADDRSPACE(float); 4 | #undef __CLC_FUNCTION 5 | #undef __CLC_DECLARE_ATOMIC 6 | #undef __CLC_DECLARE_ATOMIC_ADDRSPACE 7 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/atomic/atomic_xor.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atomic_xor 2 | #include 3 | #undef __CLC_FUNCTION 4 | #undef __CLC_DECLARE_ATOMIC 5 | #undef __CLC_DECLARE_ATOMIC_ADDRSPACE 6 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/cl_khr_global_int32_base_atomics/atom_add.h: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL int atom_add(global int *p, int val); 2 | _CLC_OVERLOAD _CLC_DECL unsigned int atom_add(global unsigned int *p, unsigned int val); 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/cl_khr_global_int32_base_atomics/atom_cmpxchg.h: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL int atom_cmpxchg(global int *p, int cmp, int val); 2 | _CLC_OVERLOAD _CLC_DECL unsigned int atom_cmpxchg(global unsigned int *p, unsigned int cmp, unsigned int val); 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/cl_khr_global_int32_base_atomics/atom_dec.h: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL int atom_dec(global int *p); 2 | _CLC_OVERLOAD _CLC_DECL unsigned int atom_dec(global unsigned int *p); 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/cl_khr_global_int32_base_atomics/atom_inc.h: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL int atom_inc(global int *p); 2 | _CLC_OVERLOAD _CLC_DECL unsigned int atom_inc(global unsigned int *p); 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/cl_khr_global_int32_base_atomics/atom_sub.h: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL int atom_sub(global int *p, int val); 2 | _CLC_OVERLOAD _CLC_DECL unsigned int atom_sub(global unsigned int *p, unsigned int val); 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/cl_khr_global_int32_base_atomics/atom_xchg.h: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL int atom_xchg(global int *p, int val); 2 | _CLC_OVERLOAD _CLC_DECL unsigned int atom_xchg(global unsigned int *p, unsigned int val); 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_and.h: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL int atom_and(global int *p, int val); 2 | _CLC_OVERLOAD _CLC_DECL unsigned int atom_and(global unsigned int *p, unsigned int val); 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_max.h: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL int atom_max(global int *p, int val); 2 | _CLC_OVERLOAD _CLC_DECL unsigned int atom_max(global unsigned int *p, unsigned int val); 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_min.h: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL int atom_min(global int *p, int val); 2 | _CLC_OVERLOAD _CLC_DECL unsigned int atom_min(global unsigned int *p, unsigned int val); 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_or.h: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL int atom_or(global int *p, int val); 2 | _CLC_OVERLOAD _CLC_DECL unsigned int atom_or(global unsigned int *p, unsigned int val); 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_xor.h: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL int atom_xor(global int *p, int val); 2 | _CLC_OVERLOAD _CLC_DECL unsigned int atom_xor(global unsigned int *p, unsigned int val); 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/cl_khr_local_int32_base_atomics/atom_add.h: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL int atom_add(local int *p, int val); 2 | _CLC_OVERLOAD _CLC_DECL unsigned int atom_add(local unsigned int *p, unsigned int val); 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/cl_khr_local_int32_base_atomics/atom_cmpxchg.h: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL int atom_cmpxchg(local int *p, int cmp, int val); 2 | _CLC_OVERLOAD _CLC_DECL unsigned int atom_cmpxchg(local unsigned int *p, unsigned int cmp, unsigned int val); 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/cl_khr_local_int32_base_atomics/atom_dec.h: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL int atom_dec(local int *p); 2 | _CLC_OVERLOAD _CLC_DECL unsigned int atom_dec(local unsigned int *p); 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/cl_khr_local_int32_base_atomics/atom_inc.h: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL int atom_inc(local int *p); 2 | _CLC_OVERLOAD _CLC_DECL unsigned int atom_inc(local unsigned int *p); 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/cl_khr_local_int32_base_atomics/atom_sub.h: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL int atom_sub(local int *p, int val); 2 | _CLC_OVERLOAD _CLC_DECL unsigned int atom_sub(local unsigned int *p, unsigned int val); 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/cl_khr_local_int32_base_atomics/atom_xchg.h: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL int atom_xchg(local int *p, int val); 2 | _CLC_OVERLOAD _CLC_DECL unsigned int atom_xchg(local unsigned int *p, unsigned int val); 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_and.h: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL int atom_and(local int *p, int val); 2 | _CLC_OVERLOAD _CLC_DECL unsigned int atom_and(local unsigned int *p, unsigned int val); 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_max.h: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL int atom_max(local int *p, int val); 2 | _CLC_OVERLOAD _CLC_DECL unsigned int atom_max(local unsigned int *p, unsigned int val); 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_min.h: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL int atom_min(local int *p, int val); 2 | _CLC_OVERLOAD _CLC_DECL unsigned int atom_min(local unsigned int *p, unsigned int val); 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_or.h: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL int atom_or(local int *p, int val); 2 | _CLC_OVERLOAD _CLC_DECL unsigned int atom_or(local unsigned int *p, unsigned int val); 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_xor.h: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL int atom_xor(local int *p, int val); 2 | _CLC_OVERLOAD _CLC_DECL unsigned int atom_xor(local unsigned int *p, unsigned int val); 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/clcfunc.h: -------------------------------------------------------------------------------- 1 | #define _CLC_OVERLOAD __attribute__((overloadable)) 2 | #define _CLC_DECL 3 | #define _CLC_DEF __attribute__((always_inline)) 4 | #define _CLC_INLINE __attribute__((always_inline)) inline 5 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/clcversion.h: -------------------------------------------------------------------------------- 1 | #if __OPENCL_VERSION__ >= 110 2 | #define CLC_VERSION_1_0 100 3 | #define CLC_VERSION_1_1 110 4 | #endif 5 | 6 | #if __OPENCL_VERSION__ >= 120 7 | #define CLC_VERSION_1_2 120 8 | #endif 9 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/common/degrees.inc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014,2015 Advanced Micro Devices, Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE degrees(__CLC_GENTYPE x); 24 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/common/mix.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/common/mix.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mix(__CLC_GENTYPE a, __CLC_GENTYPE b, __CLC_GENTYPE c); 2 | 3 | #ifndef __CLC_SCALAR 4 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mix(__CLC_GENTYPE a, __CLC_GENTYPE b, __CLC_SCALAR_GENTYPE c); 5 | #endif 6 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/common/radians.inc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014,2015 Advanced Micro Devices, Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE radians(__CLC_GENTYPE x); 24 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/common/sign.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION sign 2 | #define __CLC_BODY 3 | #include 4 | #undef __CLC_FUNCTION 5 | #undef __CLC_BODY 6 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/geometric/cross.h: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL float3 cross(float3 p0, float3 p1); 2 | _CLC_OVERLOAD _CLC_DECL float4 cross(float4 p0, float4 p1); 3 | 4 | #ifdef cl_khr_fp64 5 | _CLC_OVERLOAD _CLC_DECL double3 cross(double3 p0, double3 p1); 6 | _CLC_OVERLOAD _CLC_DECL double4 cross(double4 p0, double4 p1); 7 | #endif 8 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/geometric/distance.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/geometric/dot.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/geometric/dot.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_FLOAT dot(__CLC_FLOATN p0, __CLC_FLOATN p1); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/geometric/fast_length.inc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014,2015 Advanced Micro Devices, Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | _CLC_OVERLOAD _CLC_DECL __CLC_FLOAT fast_length(__CLC_FLOATN p0); 24 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/geometric/floatn.inc: -------------------------------------------------------------------------------- 1 | #define __CLC_FLOAT float 2 | #define __CLC_FPSIZE 32 3 | 4 | #define __CLC_FLOATN float 5 | #define __CLC_SCALAR 6 | #include __CLC_BODY 7 | #undef __CLC_FLOATN 8 | #undef __CLC_SCALAR 9 | 10 | #define __CLC_FLOATN float2 11 | #include __CLC_BODY 12 | #undef __CLC_FLOATN 13 | 14 | #define __CLC_FLOATN float3 15 | #include __CLC_BODY 16 | #undef __CLC_FLOATN 17 | 18 | #define __CLC_FLOATN float4 19 | #include __CLC_BODY 20 | #undef __CLC_FLOATN 21 | 22 | #undef __CLC_FLOAT 23 | #undef __CLC_FPSIZE 24 | 25 | #ifndef __FLOAT_ONLY 26 | #ifdef cl_khr_fp64 27 | 28 | #define __CLC_FLOAT double 29 | #define __CLC_FPSIZE 64 30 | 31 | #define __CLC_FLOATN double 32 | #define __CLC_SCALAR 33 | #include __CLC_BODY 34 | #undef __CLC_FLOATN 35 | #undef __CLC_SCALAR 36 | 37 | #define __CLC_FLOATN double2 38 | #include __CLC_BODY 39 | #undef __CLC_FLOATN 40 | 41 | #define __CLC_FLOATN double3 42 | #include __CLC_BODY 43 | #undef __CLC_FLOATN 44 | 45 | #define __CLC_FLOATN double4 46 | #include __CLC_BODY 47 | #undef __CLC_FLOATN 48 | 49 | #undef __CLC_FLOAT 50 | #undef __CLC_FPSIZE 51 | 52 | #endif 53 | #endif 54 | 55 | #undef __CLC_BODY 56 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/geometric/length.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/geometric/length.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_FLOAT length(__CLC_FLOATN p0); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/geometric/normalize.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/geometric/normalize.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_FLOATN normalize(__CLC_FLOATN p); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/integer/abs.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/integer/abs.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_U_GENTYPE abs(__CLC_GENTYPE x); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/integer/abs_diff.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/integer/abs_diff.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_U_GENTYPE abs_diff(__CLC_GENTYPE x, __CLC_GENTYPE y); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/integer/add_sat.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/integer/add_sat.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE add_sat(__CLC_GENTYPE x, __CLC_GENTYPE y); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/integer/clz.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/integer/clz.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE clz(__CLC_GENTYPE x); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/integer/definitions.h: -------------------------------------------------------------------------------- 1 | #define CHAR_BIT 8 2 | #define INT_MAX 2147483647 3 | #define INT_MIN (-2147483647 - 1) 4 | #define LONG_MAX 0x7fffffffffffffffL 5 | #define LONG_MIN (-0x7fffffffffffffffL - 1) 6 | #define CHAR_MAX SCHAR_MAX 7 | #define CHAR_MIN SCHAR_MIN 8 | #define SCHAR_MAX 127 9 | #define SCHAR_MIN (-127 - 1) 10 | #define SHRT_MAX 32767 11 | #define SHRT_MIN (-32767 - 1) 12 | #define UCHAR_MAX 255 13 | #define USHRT_MAX 65535 14 | #define UINT_MAX 0xffffffff 15 | #define ULONG_MAX 0xffffffffffffffffUL 16 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/integer/hadd.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/integer/hadd.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE hadd(__CLC_GENTYPE x, __CLC_GENTYPE y); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/integer/integer-gentype.inc: -------------------------------------------------------------------------------- 1 | #define __CLC_GENTYPE int 2 | #include __CLC_BODY 3 | #undef __CLC_GENTYPE 4 | 5 | #define __CLC_GENTYPE int2 6 | #include __CLC_BODY 7 | #undef __CLC_GENTYPE 8 | 9 | #define __CLC_GENTYPE int3 10 | #include __CLC_BODY 11 | #undef __CLC_GENTYPE 12 | 13 | #define __CLC_GENTYPE int4 14 | #include __CLC_BODY 15 | #undef __CLC_GENTYPE 16 | 17 | #define __CLC_GENTYPE int8 18 | #include __CLC_BODY 19 | #undef __CLC_GENTYPE 20 | 21 | #define __CLC_GENTYPE int16 22 | #include __CLC_BODY 23 | #undef __CLC_GENTYPE 24 | 25 | #define __CLC_GENTYPE uint 26 | #include __CLC_BODY 27 | #undef __CLC_GENTYPE 28 | 29 | #define __CLC_GENTYPE uint2 30 | #include __CLC_BODY 31 | #undef __CLC_GENTYPE 32 | 33 | #define __CLC_GENTYPE uint3 34 | #include __CLC_BODY 35 | #undef __CLC_GENTYPE 36 | 37 | #define __CLC_GENTYPE uint4 38 | #include __CLC_BODY 39 | #undef __CLC_GENTYPE 40 | 41 | #define __CLC_GENTYPE uint8 42 | #include __CLC_BODY 43 | #undef __CLC_GENTYPE 44 | 45 | #define __CLC_GENTYPE uint16 46 | #include __CLC_BODY 47 | #undef __CLC_GENTYPE 48 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/integer/mad24.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | #undef __CLC_BODY 4 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/integer/mad24.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mad24(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/integer/mad_hi.h: -------------------------------------------------------------------------------- 1 | #define mad_hi(a, b, c) (mul_hi((a),(b))+(c)) 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/integer/mad_sat.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | #undef __CLC_BODY 4 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/integer/mad_sat.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mad_sat(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/integer/mul24.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | #undef __CLC_BODY 4 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/integer/mul24.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mul24(__CLC_GENTYPE x, __CLC_GENTYPE y); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/integer/mul_hi.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/integer/mul_hi.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mul_hi(__CLC_GENTYPE x, __CLC_GENTYPE y); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/integer/rhadd.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/integer/rhadd.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE rhadd(__CLC_GENTYPE x, __CLC_GENTYPE y); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/integer/rotate.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/integer/rotate.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE rotate(__CLC_GENTYPE x, __CLC_GENTYPE y); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/integer/sub_sat.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/integer/sub_sat.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sub_sat(__CLC_GENTYPE x, __CLC_GENTYPE y); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/integer/upsample.h: -------------------------------------------------------------------------------- 1 | #define __CLC_UPSAMPLE_DECL(BGENTYPE, GENTYPE, UGENTYPE) \ 2 | _CLC_OVERLOAD _CLC_DECL BGENTYPE upsample(GENTYPE hi, UGENTYPE lo); 3 | 4 | #define __CLC_UPSAMPLE_VEC(BGENTYPE, GENTYPE, UGENTYPE) \ 5 | __CLC_UPSAMPLE_DECL(BGENTYPE, GENTYPE, UGENTYPE) \ 6 | __CLC_UPSAMPLE_DECL(BGENTYPE##2, GENTYPE##2, UGENTYPE##2) \ 7 | __CLC_UPSAMPLE_DECL(BGENTYPE##3, GENTYPE##3, UGENTYPE##3) \ 8 | __CLC_UPSAMPLE_DECL(BGENTYPE##4, GENTYPE##4, UGENTYPE##4) \ 9 | __CLC_UPSAMPLE_DECL(BGENTYPE##8, GENTYPE##8, UGENTYPE##8) \ 10 | __CLC_UPSAMPLE_DECL(BGENTYPE##16, GENTYPE##16, UGENTYPE##16) \ 11 | 12 | #define __CLC_UPSAMPLE_TYPES() \ 13 | __CLC_UPSAMPLE_VEC(short, char, uchar) \ 14 | __CLC_UPSAMPLE_VEC(ushort, uchar, uchar) \ 15 | __CLC_UPSAMPLE_VEC(int, short, ushort) \ 16 | __CLC_UPSAMPLE_VEC(uint, ushort, ushort) \ 17 | __CLC_UPSAMPLE_VEC(long, int, uint) \ 18 | __CLC_UPSAMPLE_VEC(ulong, uint, uint) \ 19 | 20 | __CLC_UPSAMPLE_TYPES() 21 | 22 | #undef __CLC_UPSAMPLE_TYPES 23 | #undef __CLC_UPSAMPLE_DECL 24 | #undef __CLC_UPSAMPLE_VEC 25 | 26 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/acos.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/acos.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE acos(__CLC_GENTYPE x); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/acosh.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | #define __CLC_BODY 24 | #include 25 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/acosh.inc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE acosh(__CLC_GENTYPE x); 24 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/acospi.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | #define __CLC_BODY 24 | #include 25 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/acospi.inc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE acospi(__CLC_GENTYPE x); 24 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/asin.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/asin.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE asin(__CLC_GENTYPE x); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/asinh.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | #define __CLC_BODY 24 | #include 25 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/asinh.inc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE asinh(__CLC_GENTYPE x); 24 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/asinpi.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | #define __CLC_BODY 24 | #include 25 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/asinpi.inc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE asinpi(__CLC_GENTYPE x); 24 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/atan.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014 Advanced Micro Devices, Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | #define __CLC_BODY 24 | #include 25 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/atan.inc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014 Advanced Micro Devices, Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE atan(__CLC_GENTYPE a); 24 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/atan2.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014 Advanced Micro Devices, Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | #define __CLC_BODY 24 | #include 25 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/atan2.inc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014 Advanced Micro Devices, Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE atan2(__CLC_GENTYPE a, __CLC_GENTYPE b); 24 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/atan2pi.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | #define __CLC_BODY 24 | #include 25 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/atanh.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | #define __CLC_BODY 24 | #include 25 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/atanh.inc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE atanh(__CLC_GENTYPE x); 24 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/atanpi.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | #define __CLC_BODY 24 | #include 25 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/atanpi.inc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE atanpi(__CLC_GENTYPE x); 24 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/binary_decl.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE a, __CLC_GENTYPE b); 2 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE a, float b); 3 | 4 | #ifdef cl_khr_fp64 5 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE a, double b); 6 | #endif 7 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/binary_intrin.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD float __CLC_FUNCTION(float, float) __asm(__CLC_INTRINSIC ".f32"); 2 | _CLC_OVERLOAD float2 __CLC_FUNCTION(float2, float2) __asm(__CLC_INTRINSIC ".v2f32"); 3 | _CLC_OVERLOAD float3 __CLC_FUNCTION(float3, float3) __asm(__CLC_INTRINSIC ".v3f32"); 4 | _CLC_OVERLOAD float4 __CLC_FUNCTION(float4, float4) __asm(__CLC_INTRINSIC ".v4f32"); 5 | _CLC_OVERLOAD float8 __CLC_FUNCTION(float8, float8) __asm(__CLC_INTRINSIC ".v8f32"); 6 | _CLC_OVERLOAD float16 __CLC_FUNCTION(float16, float16) __asm(__CLC_INTRINSIC ".v16f32"); 7 | 8 | #ifdef cl_khr_fp64 9 | _CLC_OVERLOAD double __CLC_FUNCTION(double, double) __asm(__CLC_INTRINSIC ".f64"); 10 | _CLC_OVERLOAD double2 __CLC_FUNCTION(double2, double2) __asm(__CLC_INTRINSIC ".v2f64"); 11 | _CLC_OVERLOAD double3 __CLC_FUNCTION(double3, double3) __asm(__CLC_INTRINSIC ".v3f64"); 12 | _CLC_OVERLOAD double4 __CLC_FUNCTION(double4, double4) __asm(__CLC_INTRINSIC ".v4f64"); 13 | _CLC_OVERLOAD double8 __CLC_FUNCTION(double8, double8) __asm(__CLC_INTRINSIC ".v8f64"); 14 | _CLC_OVERLOAD double16 __CLC_FUNCTION(double16, double16) __asm(__CLC_INTRINSIC ".v16f64"); 15 | #endif 16 | 17 | #undef __CLC_FUNCTION 18 | #undef __CLC_INTRINSIC 19 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/ceil.h: -------------------------------------------------------------------------------- 1 | #undef ceil 2 | #define ceil __clc_ceil 3 | 4 | #define __CLC_FUNCTION __clc_ceil 5 | #define __CLC_INTRINSIC "llvm.ceil" 6 | #include 7 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/clc_nextafter.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | 3 | #define __CLC_FUNCTION nextafter 4 | #include 5 | #undef __CLC_FUNCTION 6 | 7 | #define __CLC_FUNCTION __clc_nextafter 8 | #include 9 | #undef __CLC_FUNCTION 10 | 11 | #undef __CLC_BODY 12 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/copysign.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/copysign.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE copysign(__CLC_GENTYPE a, __CLC_GENTYPE b); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/cos.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | #undef __CLC_BODY 4 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/cos.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE cos(__CLC_GENTYPE a); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/cospi.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | #undef __CLC_BODY 4 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/cospi.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE cospi(__CLC_GENTYPE a); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/erf.h: -------------------------------------------------------------------------------- 1 | #undef erfc 2 | 3 | #define __CLC_BODY 4 | #define __CLC_FUNCTION erf 5 | 6 | #include 7 | 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/erfc.h: -------------------------------------------------------------------------------- 1 | #undef erfc 2 | 3 | #define __CLC_BODY 4 | #define __CLC_FUNCTION erfc 5 | 6 | #include 7 | 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/exp.h: -------------------------------------------------------------------------------- 1 | #undef exp 2 | 3 | #define __CLC_BODY 4 | #define __CLC_FUNCTION exp 5 | 6 | #include 7 | 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/exp10.h: -------------------------------------------------------------------------------- 1 | #undef exp10 2 | 3 | #define __CLC_BODY 4 | #define __CLC_FUNCTION exp10 5 | 6 | #include 7 | 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/exp2.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | #define __CLC_BODY 24 | #include 25 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/exp2.inc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE exp2(__CLC_GENTYPE x); 24 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/fabs.h: -------------------------------------------------------------------------------- 1 | #undef fabs 2 | #define fabs __clc_fabs 3 | 4 | #define __CLC_FUNCTION __clc_fabs 5 | #define __CLC_INTRINSIC "llvm.fabs" 6 | #include 7 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/fdim.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/fdim.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE fdim(__CLC_GENTYPE a, __CLC_GENTYPE b); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/floor.h: -------------------------------------------------------------------------------- 1 | #undef floor 2 | #define floor __clc_floor 3 | 4 | #define __CLC_FUNCTION __clc_floor 5 | #define __CLC_INTRINSIC "llvm.floor" 6 | #include 7 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/fma.h: -------------------------------------------------------------------------------- 1 | #undef fma 2 | #define fma __clc_fma 3 | 4 | #define __CLC_FUNCTION __clc_fma 5 | #define __CLC_INTRINSIC "llvm.fma" 6 | #include 7 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/fmax.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION fmax 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | 9 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/fmin.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION fmin 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | 9 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/fmod.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/fmod.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE fmod(__CLC_GENTYPE a, __CLC_GENTYPE b); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/fract.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | #define __CLC_BODY 24 | #include 25 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/frexp.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/frexp.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE frexp(__CLC_GENTYPE x, global __CLC_INTN *iptr); 2 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE frexp(__CLC_GENTYPE x, local __CLC_INTN *iptr); 3 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE frexp(__CLC_GENTYPE x, private __CLC_INTN *iptr); 4 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/hypot.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/hypot.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE hypot(__CLC_GENTYPE x, __CLC_GENTYPE y); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/ilogb.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | 3 | #include 4 | 5 | #undef __CLC_BODY 6 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/ilogb.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_INTN ilogb(__CLC_GENTYPE x); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/ldexp.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014,2015 Advanced Micro Devices, Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | #define __CLC_BODY 24 | #include 25 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/log.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015 Advanced Micro Devices, Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | #define __CLC_BODY 24 | #include 25 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/log.inc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015 Advanced Micro Devices, Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE log(__CLC_GENTYPE a); 24 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/log10.h: -------------------------------------------------------------------------------- 1 | #undef log10 2 | 3 | #define __CLC_BODY 4 | #define __CLC_FUNCTION log10 5 | 6 | #include 7 | 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/log1p.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014 Advanced Micro Devices, Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | #define __CLC_BODY 24 | #include 25 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/log1p.inc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014 Advanced Micro Devices, Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE log1p(__CLC_GENTYPE a); 24 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/log2.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015 Advanced Micro Devices, Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | #define __CLC_BODY 24 | #include 25 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/log2.inc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015 Advanced Micro Devices, Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE log2(__CLC_GENTYPE a); 24 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/mad.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/mad.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mad(__CLC_GENTYPE a, __CLC_GENTYPE b, __CLC_GENTYPE c); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/modf.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014 Advanced Micro Devices, Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | #define __CLC_BODY 24 | #include 25 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/native_cos.h: -------------------------------------------------------------------------------- 1 | #define native_cos cos 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/native_divide.h: -------------------------------------------------------------------------------- 1 | #define native_divide(x, y) ((x) / (y)) 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/native_exp.h: -------------------------------------------------------------------------------- 1 | #define native_exp exp 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/native_exp10.h: -------------------------------------------------------------------------------- 1 | #define native_exp10 exp10 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/native_exp2.h: -------------------------------------------------------------------------------- 1 | #define native_exp2 exp2 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/native_log.inc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015 Advanced Micro Devices, Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE native_log(__CLC_GENTYPE a); 24 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/native_log2.inc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015 Advanced Micro Devices, Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE native_log2(__CLC_GENTYPE a); 24 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/native_powr.h: -------------------------------------------------------------------------------- 1 | #define native_powr pow 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/native_sin.h: -------------------------------------------------------------------------------- 1 | #define native_sin sin 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/native_sqrt.h: -------------------------------------------------------------------------------- 1 | #define native_sqrt sqrt 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/nextafter.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION nextafter 3 | #include 4 | #undef __CLC_FUNCTION 5 | #undef __CLC_BODY 6 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/pow.h: -------------------------------------------------------------------------------- 1 | #undef pow 2 | #define pow __clc_pow 3 | 4 | #define __CLC_FUNCTION __clc_pow 5 | #define __CLC_INTRINSIC "llvm.pow" 6 | #include 7 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/pown.h: -------------------------------------------------------------------------------- 1 | #define _CLC_POWN_INTRINSIC "llvm.powi" 2 | 3 | #define _CLC_POWN_DECL(GENTYPE, INTTYPE) \ 4 | _CLC_OVERLOAD _CLC_DECL GENTYPE pown(GENTYPE x, INTTYPE y); 5 | 6 | #define _CLC_VECTOR_POWN_DECL(GENTYPE, INTTYPE) \ 7 | _CLC_POWN_DECL(GENTYPE##2, INTTYPE##2) \ 8 | _CLC_POWN_DECL(GENTYPE##3, INTTYPE##3) \ 9 | _CLC_POWN_DECL(GENTYPE##4, INTTYPE##4) \ 10 | _CLC_POWN_DECL(GENTYPE##8, INTTYPE##8) \ 11 | _CLC_POWN_DECL(GENTYPE##16, INTTYPE##16) 12 | 13 | _CLC_OVERLOAD float pown(float x, int y) __asm(_CLC_POWN_INTRINSIC ".f32"); 14 | 15 | _CLC_VECTOR_POWN_DECL(float, int) 16 | 17 | #ifdef cl_khr_fp64 18 | _CLC_OVERLOAD double pown(double x, int y) __asm(_CLC_POWN_INTRINSIC ".f64"); 19 | _CLC_VECTOR_POWN_DECL(double, int) 20 | #endif 21 | 22 | #undef _CLC_POWN_INTRINSIC 23 | #undef _CLC_POWN_DECL 24 | #undef _CLC_VECTOR_POWN_DECL 25 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/rint.h: -------------------------------------------------------------------------------- 1 | #undef rint 2 | #define rint __clc_rint 3 | 4 | #define __CLC_FUNCTION __clc_rint 5 | #define __CLC_INTRINSIC "llvm.rint" 6 | #include 7 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/round.h: -------------------------------------------------------------------------------- 1 | #undef round 2 | #define round __clc_round 3 | 4 | #define __CLC_FUNCTION __clc_round 5 | #define __CLC_INTRINSIC "llvm.round" 6 | #include 7 | 8 | #undef __CLC_FUNCTION 9 | #undef __CLC_INTRINSIC 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/rsqrt.h: -------------------------------------------------------------------------------- 1 | #define rsqrt(x) (1.f/sqrt(x)) 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/sin.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | #undef __CLC_BODY 4 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/sin.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sin(__CLC_GENTYPE a); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/sincos.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/sincos.inc: -------------------------------------------------------------------------------- 1 | #define __CLC_DECLARE_SINCOS(ADDRSPACE, TYPE) \ 2 | _CLC_OVERLOAD _CLC_DECL TYPE sincos (TYPE x, ADDRSPACE TYPE * cosval); 3 | 4 | __CLC_DECLARE_SINCOS(global, __CLC_GENTYPE) 5 | __CLC_DECLARE_SINCOS(local, __CLC_GENTYPE) 6 | __CLC_DECLARE_SINCOS(private, __CLC_GENTYPE) 7 | 8 | #undef __CLC_DECLARE_SINCOS 9 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/sinpi.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | #undef __CLC_BODY 4 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/sinpi.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sinpi(__CLC_GENTYPE a); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/sqrt.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | #undef __CLC_BODY 4 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/sqrt.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sqrt(__CLC_GENTYPE a); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/tan.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/tan.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE tan(__CLC_GENTYPE x); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/tanh.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014 Advanced Micro Devices, Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | #define __CLC_BODY 24 | #include 25 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/tanh.inc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014 Advanced Micro Devices, Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE tanh(__CLC_GENTYPE a); 24 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/ternary_intrin.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD float __CLC_FUNCTION(float, float, float) __asm(__CLC_INTRINSIC ".f32"); 2 | _CLC_OVERLOAD float2 __CLC_FUNCTION(float2, float2, float2) __asm(__CLC_INTRINSIC ".v2f32"); 3 | _CLC_OVERLOAD float3 __CLC_FUNCTION(float3, float3, float3) __asm(__CLC_INTRINSIC ".v3f32"); 4 | _CLC_OVERLOAD float4 __CLC_FUNCTION(float4, float4, float4) __asm(__CLC_INTRINSIC ".v4f32"); 5 | _CLC_OVERLOAD float8 __CLC_FUNCTION(float8, float8, float8) __asm(__CLC_INTRINSIC ".v8f32"); 6 | _CLC_OVERLOAD float16 __CLC_FUNCTION(float16, float16, float16) __asm(__CLC_INTRINSIC ".v16f32"); 7 | 8 | #ifdef cl_khr_fp64 9 | _CLC_OVERLOAD double __CLC_FUNCTION(double, double, double) __asm(__CLC_INTRINSIC ".f64"); 10 | _CLC_OVERLOAD double2 __CLC_FUNCTION(double2, double2, double2) __asm(__CLC_INTRINSIC ".v2f64"); 11 | _CLC_OVERLOAD double3 __CLC_FUNCTION(double3, double3, double3) __asm(__CLC_INTRINSIC ".v3f64"); 12 | _CLC_OVERLOAD double4 __CLC_FUNCTION(double4, double4, double4) __asm(__CLC_INTRINSIC ".v4f64"); 13 | _CLC_OVERLOAD double8 __CLC_FUNCTION(double8, double8, double8) __asm(__CLC_INTRINSIC ".v8f64"); 14 | _CLC_OVERLOAD double16 __CLC_FUNCTION(double16, double16, double16) __asm(__CLC_INTRINSIC ".v16f64"); 15 | #endif 16 | 17 | #undef __CLC_FUNCTION 18 | #undef __CLC_INTRINSIC 19 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/trunc.h: -------------------------------------------------------------------------------- 1 | #undef trunc 2 | #define trunc __clc_trunc 3 | 4 | #define __CLC_FUNCTION __clc_trunc 5 | #define __CLC_INTRINSIC "llvm.trunc" 6 | #include 7 | 8 | #undef __CLC_FUNCTION 9 | #undef __CLC_INTRINSIC 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/unary_decl.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/math/unary_intrin.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD float __CLC_FUNCTION(float f) __asm(__CLC_INTRINSIC ".f32"); 2 | _CLC_OVERLOAD float2 __CLC_FUNCTION(float2 f) __asm(__CLC_INTRINSIC ".v2f32"); 3 | _CLC_OVERLOAD float3 __CLC_FUNCTION(float3 f) __asm(__CLC_INTRINSIC ".v3f32"); 4 | _CLC_OVERLOAD float4 __CLC_FUNCTION(float4 f) __asm(__CLC_INTRINSIC ".v4f32"); 5 | _CLC_OVERLOAD float8 __CLC_FUNCTION(float8 f) __asm(__CLC_INTRINSIC ".v8f32"); 6 | _CLC_OVERLOAD float16 __CLC_FUNCTION(float16 f) __asm(__CLC_INTRINSIC ".v16f32"); 7 | 8 | #ifdef cl_khr_fp64 9 | _CLC_OVERLOAD double __CLC_FUNCTION(double d) __asm(__CLC_INTRINSIC ".f64"); 10 | _CLC_OVERLOAD double2 __CLC_FUNCTION(double2 d) __asm(__CLC_INTRINSIC ".v2f64"); 11 | _CLC_OVERLOAD double3 __CLC_FUNCTION(double3 d) __asm(__CLC_INTRINSIC ".v3f64"); 12 | _CLC_OVERLOAD double4 __CLC_FUNCTION(double4 d) __asm(__CLC_INTRINSIC ".v4f64"); 13 | _CLC_OVERLOAD double8 __CLC_FUNCTION(double8 d) __asm(__CLC_INTRINSIC ".v8f64"); 14 | _CLC_OVERLOAD double16 __CLC_FUNCTION(double16 d) __asm(__CLC_INTRINSIC ".v16f64"); 15 | #endif 16 | 17 | #undef __CLC_FUNCTION 18 | #undef __CLC_INTRINSIC 19 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/relational/all.h: -------------------------------------------------------------------------------- 1 | #define _CLC_ALL_DECL(TYPE) \ 2 | _CLC_OVERLOAD _CLC_DECL int all(TYPE v); 3 | 4 | #define _CLC_VECTOR_ALL_DECL(TYPE) \ 5 | _CLC_ALL_DECL(TYPE) \ 6 | _CLC_ALL_DECL(TYPE##2) \ 7 | _CLC_ALL_DECL(TYPE##3) \ 8 | _CLC_ALL_DECL(TYPE##4) \ 9 | _CLC_ALL_DECL(TYPE##8) \ 10 | _CLC_ALL_DECL(TYPE##16) 11 | 12 | _CLC_VECTOR_ALL_DECL(char) 13 | _CLC_VECTOR_ALL_DECL(short) 14 | _CLC_VECTOR_ALL_DECL(int) 15 | _CLC_VECTOR_ALL_DECL(long) 16 | 17 | #undef _CLC_ALL_DECL 18 | #undef _CLC_VECTOR_ALL_DECL 19 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/relational/any.h: -------------------------------------------------------------------------------- 1 | 2 | #define _CLC_ANY_DECL(TYPE) \ 3 | _CLC_OVERLOAD _CLC_DECL int any(TYPE v); 4 | 5 | #define _CLC_VECTOR_ANY_DECL(TYPE) \ 6 | _CLC_ANY_DECL(TYPE) \ 7 | _CLC_ANY_DECL(TYPE##2) \ 8 | _CLC_ANY_DECL(TYPE##3) \ 9 | _CLC_ANY_DECL(TYPE##4) \ 10 | _CLC_ANY_DECL(TYPE##8) \ 11 | _CLC_ANY_DECL(TYPE##16) 12 | 13 | _CLC_VECTOR_ANY_DECL(char) 14 | _CLC_VECTOR_ANY_DECL(short) 15 | _CLC_VECTOR_ANY_DECL(int) 16 | _CLC_VECTOR_ANY_DECL(long) 17 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/relational/binary_decl.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_INTN __CLC_FUNCTION(__CLC_FLOATN a, __CLC_FLOATN b); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/relational/isequal.h: -------------------------------------------------------------------------------- 1 | #define _CLC_ISEQUAL_DECL(TYPE, RETTYPE) \ 2 | _CLC_OVERLOAD _CLC_DECL RETTYPE isequal(TYPE x, TYPE y); 3 | 4 | #define _CLC_VECTOR_ISEQUAL_DECL(TYPE, RETTYPE) \ 5 | _CLC_ISEQUAL_DECL(TYPE##2, RETTYPE##2) \ 6 | _CLC_ISEQUAL_DECL(TYPE##3, RETTYPE##3) \ 7 | _CLC_ISEQUAL_DECL(TYPE##4, RETTYPE##4) \ 8 | _CLC_ISEQUAL_DECL(TYPE##8, RETTYPE##8) \ 9 | _CLC_ISEQUAL_DECL(TYPE##16, RETTYPE##16) 10 | 11 | _CLC_ISEQUAL_DECL(float, int) 12 | _CLC_VECTOR_ISEQUAL_DECL(float, int) 13 | 14 | #ifdef cl_khr_fp64 15 | _CLC_ISEQUAL_DECL(double, int) 16 | _CLC_VECTOR_ISEQUAL_DECL(double, long) 17 | #endif 18 | 19 | #undef _CLC_ISEQUAL_DECL 20 | #undef _CLC_VECTOR_ISEQUAL_DEC 21 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/relational/isfinite.h: -------------------------------------------------------------------------------- 1 | #undef isfinite 2 | 3 | #define __CLC_FUNCTION isfinite 4 | #define __CLC_BODY 5 | 6 | #include 7 | 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/relational/isgreater.h: -------------------------------------------------------------------------------- 1 | #undef isgreater 2 | 3 | #define __CLC_FUNCTION isgreater 4 | #define __CLC_BODY 5 | 6 | #include 7 | 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/relational/isgreaterequal.h: -------------------------------------------------------------------------------- 1 | #undef isgreaterequal 2 | 3 | #define __CLC_FUNCTION isgreaterequal 4 | #define __CLC_BODY 5 | 6 | #include 7 | 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/relational/isinf.h: -------------------------------------------------------------------------------- 1 | 2 | #define _CLC_ISINF_DECL(RET_TYPE, ARG_TYPE) \ 3 | _CLC_OVERLOAD _CLC_DECL RET_TYPE isinf(ARG_TYPE); 4 | 5 | #define _CLC_VECTOR_ISINF_DECL(RET_TYPE, ARG_TYPE) \ 6 | _CLC_ISINF_DECL(RET_TYPE##2, ARG_TYPE##2) \ 7 | _CLC_ISINF_DECL(RET_TYPE##3, ARG_TYPE##3) \ 8 | _CLC_ISINF_DECL(RET_TYPE##4, ARG_TYPE##4) \ 9 | _CLC_ISINF_DECL(RET_TYPE##8, ARG_TYPE##8) \ 10 | _CLC_ISINF_DECL(RET_TYPE##16, ARG_TYPE##16) 11 | 12 | _CLC_ISINF_DECL(int, float) 13 | _CLC_VECTOR_ISINF_DECL(int, float) 14 | 15 | #ifdef cl_khr_fp64 16 | _CLC_ISINF_DECL(int, double) 17 | _CLC_VECTOR_ISINF_DECL(long, double) 18 | #endif 19 | 20 | #undef _CLC_ISINF_DECL 21 | #undef _CLC_VECTOR_ISINF_DECL 22 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/relational/isless.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION isless 2 | #define __CLC_BODY 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/relational/islessequal.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION islessequal 2 | #define __CLC_BODY 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/relational/islessgreater.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION islessgreater 2 | #define __CLC_BODY 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/relational/isnan.h: -------------------------------------------------------------------------------- 1 | 2 | #define _CLC_ISNAN_DECL(RET_TYPE, ARG_TYPE) \ 3 | _CLC_OVERLOAD _CLC_DECL RET_TYPE isnan(ARG_TYPE); 4 | 5 | #define _CLC_VECTOR_ISNAN_DECL(RET_TYPE, ARG_TYPE) \ 6 | _CLC_ISNAN_DECL(RET_TYPE##2, ARG_TYPE##2) \ 7 | _CLC_ISNAN_DECL(RET_TYPE##3, ARG_TYPE##3) \ 8 | _CLC_ISNAN_DECL(RET_TYPE##4, ARG_TYPE##4) \ 9 | _CLC_ISNAN_DECL(RET_TYPE##8, ARG_TYPE##8) \ 10 | _CLC_ISNAN_DECL(RET_TYPE##16, ARG_TYPE##16) 11 | 12 | _CLC_ISNAN_DECL(int, float) 13 | _CLC_VECTOR_ISNAN_DECL(int, float) 14 | 15 | #ifdef cl_khr_fp64 16 | _CLC_ISNAN_DECL(int, double) 17 | _CLC_VECTOR_ISNAN_DECL(long, double) 18 | #endif 19 | 20 | #undef _CLC_ISNAN_DECL 21 | #undef _CLC_VECTOR_ISNAN_DECL 22 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/relational/isnormal.h: -------------------------------------------------------------------------------- 1 | #undef isnormal 2 | 3 | #define __CLC_FUNCTION isnormal 4 | #define __CLC_BODY 5 | 6 | #include 7 | 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/relational/isnotequal.h: -------------------------------------------------------------------------------- 1 | #undef isnotequal 2 | 3 | #define __CLC_FUNCTION isnotequal 4 | #define __CLC_BODY 5 | 6 | #include 7 | 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/relational/isordered.h: -------------------------------------------------------------------------------- 1 | #undef isordered 2 | 3 | #define __CLC_FUNCTION isordered 4 | #define __CLC_BODY 5 | 6 | #include 7 | 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/relational/isunordered.h: -------------------------------------------------------------------------------- 1 | #undef isunordered 2 | 3 | #define __CLC_FUNCTION isunordered 4 | #define __CLC_BODY 5 | 6 | #include 7 | 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/relational/select.h: -------------------------------------------------------------------------------- 1 | #define select(a, b, c) ((c) ? (b) : (a)) 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/relational/signbit.h: -------------------------------------------------------------------------------- 1 | #undef signbit 2 | 3 | #define __CLC_FUNCTION signbit 4 | #define __CLC_BODY 5 | 6 | #include 7 | 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/relational/unary_decl.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_INTN __CLC_FUNCTION(__CLC_FLOATN x); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/shared/clamp.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | 4 | #define __CLC_BODY 5 | #include 6 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/shared/clamp.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE clamp(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z); 2 | 3 | #ifndef __CLC_SCALAR 4 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE clamp(__CLC_GENTYPE x, __CLC_SCALAR_GENTYPE y, __CLC_SCALAR_GENTYPE z); 5 | #endif 6 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/shared/max.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | 4 | #define __CLC_BODY 5 | #include 6 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/shared/max.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE max(__CLC_GENTYPE a, __CLC_GENTYPE b); 2 | 3 | #ifndef __CLC_SCALAR 4 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE max(__CLC_GENTYPE a, __CLC_SCALAR_GENTYPE b); 5 | #endif 6 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/shared/min.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | 4 | #define __CLC_BODY 5 | #include 6 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/shared/min.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE min(__CLC_GENTYPE a, __CLC_GENTYPE b); 2 | 3 | #ifndef __CLC_SCALAR 4 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE min(__CLC_GENTYPE a, __CLC_SCALAR_GENTYPE b); 5 | #endif 6 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/synchronization/barrier.h: -------------------------------------------------------------------------------- 1 | _CLC_DECL void barrier(cl_mem_fence_flags flags); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/synchronization/cl_mem_fence_flags.h: -------------------------------------------------------------------------------- 1 | typedef uint cl_mem_fence_flags; 2 | 3 | #define CLK_LOCAL_MEM_FENCE 1 4 | #define CLK_GLOBAL_MEM_FENCE 2 5 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/workitem/get_global_id.h: -------------------------------------------------------------------------------- 1 | _CLC_DECL size_t get_global_id(uint dim); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/workitem/get_global_size.h: -------------------------------------------------------------------------------- 1 | _CLC_DECL size_t get_global_size(uint dim); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/workitem/get_group_id.h: -------------------------------------------------------------------------------- 1 | _CLC_DECL size_t get_group_id(uint dim); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/workitem/get_local_id.h: -------------------------------------------------------------------------------- 1 | _CLC_DECL size_t get_local_id(uint dim); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/workitem/get_local_size.h: -------------------------------------------------------------------------------- 1 | _CLC_DECL size_t get_local_size(uint dim); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/workitem/get_num_groups.h: -------------------------------------------------------------------------------- 1 | _CLC_DECL size_t get_num_groups(uint dim); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/clc/workitem/get_work_dim.h: -------------------------------------------------------------------------------- 1 | _CLC_DECL uint get_work_dim(); 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/math/clc_ldexp.h: -------------------------------------------------------------------------------- 1 | _CLC_DEF _CLC_OVERLOAD float __clc_ldexp(float, int); 2 | 3 | #ifdef cl_khr_fp64 4 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 5 | _CLC_DEF _CLC_OVERLOAD float __clc_ldexp(double, int); 6 | #endif 7 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/math/clc_nextafter.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION __clc_nextafter 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/include/math/clc_sqrt.inc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015 Advanced Micro Devices, Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_sqrt(__CLC_GENTYPE a); 24 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/async/async_work_group_copy.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifdef cl_khr_fp64 4 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 5 | #endif 6 | 7 | #define __CLC_BODY 8 | #include 9 | #undef __CLC_BODY 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/async/async_work_group_copy.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DEF event_t async_work_group_copy( 2 | local __CLC_GENTYPE *dst, 3 | const global __CLC_GENTYPE *src, 4 | size_t num_gentypes, 5 | event_t event) { 6 | 7 | return async_work_group_strided_copy(dst, src, num_gentypes, 1, event); 8 | } 9 | 10 | _CLC_OVERLOAD _CLC_DEF event_t async_work_group_copy( 11 | global __CLC_GENTYPE *dst, 12 | const local __CLC_GENTYPE *src, 13 | size_t num_gentypes, 14 | event_t event) { 15 | 16 | return async_work_group_strided_copy(dst, src, num_gentypes, 1, event); 17 | } 18 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/async/async_work_group_strided_copy.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifdef cl_khr_fp64 4 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 5 | #endif 6 | 7 | #define __CLC_BODY 8 | #include 9 | #undef __CLC_BODY 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/async/prefetch.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifdef cl_khr_fp64 4 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 5 | #endif 6 | 7 | #define __CLC_BODY 8 | #include 9 | #undef __CLC_BODY 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/async/prefetch.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DEF void prefetch(const global __CLC_GENTYPE *p, size_t num_gentypes) { } 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/async/wait_group_events.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DEF void wait_group_events(int num_events, event_t *event_list) { 4 | barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); 5 | } 6 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/atomic/atomic_xchg.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_OVERLOAD _CLC_DEF float atomic_xchg(volatile global float *p, float val) { 4 | return as_float(atomic_xchg((volatile global int *)p, as_int(val))); 5 | } 6 | 7 | _CLC_OVERLOAD _CLC_DEF float atomic_xchg(volatile local float *p, float val) { 8 | return as_float(atomic_xchg((volatile local int *)p, as_int(val))); 9 | } 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/cl_khr_global_int32_base_atomics/atom_add.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atom_add(global TYPE *p, TYPE val) { \ 5 | return atomic_add(p, val); \ 6 | } 7 | 8 | IMPL(int) 9 | IMPL(unsigned int) 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/cl_khr_global_int32_base_atomics/atom_cmpxchg.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(global TYPE *p, TYPE cmp, TYPE val) { \ 5 | return atomic_cmpxchg(p, cmp, val); \ 6 | } 7 | 8 | IMPL(int) 9 | IMPL(unsigned int) 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/cl_khr_global_int32_base_atomics/atom_dec.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atom_dec(global TYPE *p) { \ 5 | return atom_sub(p, 1); \ 6 | } 7 | 8 | IMPL(int) 9 | IMPL(unsigned int) 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/cl_khr_global_int32_base_atomics/atom_inc.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atom_inc(global TYPE *p) { \ 5 | return atom_add(p, 1); \ 6 | } 7 | 8 | IMPL(int) 9 | IMPL(unsigned int) 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/cl_khr_global_int32_base_atomics/atom_sub.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atom_sub(global TYPE *p, TYPE val) { \ 5 | return atomic_sub(p, val); \ 6 | } 7 | 8 | IMPL(int) 9 | IMPL(unsigned int) 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/cl_khr_global_int32_base_atomics/atom_xchg.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atom_xchg(global TYPE *p, TYPE val) { \ 5 | return atomic_xchg(p, val); \ 6 | } 7 | 8 | IMPL(int) 9 | IMPL(unsigned int) 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/cl_khr_global_int32_extended_atomics/atom_and.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atom_and(global TYPE *p, TYPE val) { \ 5 | return atomic_and(p, val); \ 6 | } 7 | 8 | IMPL(int) 9 | IMPL(unsigned int) -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/cl_khr_global_int32_extended_atomics/atom_max.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atom_max(global TYPE *p, TYPE val) { \ 5 | return atomic_max(p, val); \ 6 | } 7 | 8 | IMPL(int) 9 | IMPL(unsigned int) 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/cl_khr_global_int32_extended_atomics/atom_min.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atom_min(global TYPE *p, TYPE val) { \ 5 | return atomic_min(p, val); \ 6 | } 7 | 8 | IMPL(int) 9 | IMPL(unsigned int) 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/cl_khr_global_int32_extended_atomics/atom_or.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atom_or(global TYPE *p, TYPE val) { \ 5 | return atomic_or(p, val); \ 6 | } 7 | 8 | IMPL(int) 9 | IMPL(unsigned int) 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/cl_khr_global_int32_extended_atomics/atom_xor.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atom_xor(global TYPE *p, TYPE val) { \ 5 | return atomic_xor(p, val); \ 6 | } 7 | 8 | IMPL(int) 9 | IMPL(unsigned int) 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/cl_khr_local_int32_base_atomics/atom_add.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atom_add(local TYPE *p, TYPE val) { \ 5 | return atomic_add(p, val); \ 6 | } 7 | 8 | IMPL(int) 9 | IMPL(unsigned int) 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/cl_khr_local_int32_base_atomics/atom_cmpxchg.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(local TYPE *p, TYPE cmp, TYPE val) { \ 5 | return atomic_cmpxchg(p, cmp, val); \ 6 | } 7 | 8 | IMPL(int) 9 | IMPL(unsigned int) 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/cl_khr_local_int32_base_atomics/atom_dec.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atom_dec(local TYPE *p) { \ 5 | return atom_sub(p, 1); \ 6 | } 7 | 8 | IMPL(int) 9 | IMPL(unsigned int) 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/cl_khr_local_int32_base_atomics/atom_inc.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atom_inc(local TYPE *p) { \ 5 | return atom_add(p, 1); \ 6 | } 7 | 8 | IMPL(int) 9 | IMPL(unsigned int) 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/cl_khr_local_int32_base_atomics/atom_sub.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atom_sub(local TYPE *p, TYPE val) { \ 5 | return atomic_sub(p, val); \ 6 | } 7 | 8 | IMPL(int) 9 | IMPL(unsigned int) 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/cl_khr_local_int32_base_atomics/atom_xchg.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atom_xchg(local TYPE *p, TYPE val) { \ 5 | return atomic_xchg(p, val); \ 6 | } 7 | 8 | IMPL(int) 9 | IMPL(unsigned int) 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/cl_khr_local_int32_extended_atomics/atom_and.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atom_and(local TYPE *p, TYPE val) { \ 5 | return atomic_and(p, val); \ 6 | } 7 | 8 | IMPL(int) 9 | IMPL(unsigned int) -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/cl_khr_local_int32_extended_atomics/atom_max.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atom_max(local TYPE *p, TYPE val) { \ 5 | return atomic_max(p, val); \ 6 | } 7 | 8 | IMPL(int) 9 | IMPL(unsigned int) 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/cl_khr_local_int32_extended_atomics/atom_min.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atom_min(local TYPE *p, TYPE val) { \ 5 | return atomic_min(p, val); \ 6 | } 7 | 8 | IMPL(int) 9 | IMPL(unsigned int) 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/cl_khr_local_int32_extended_atomics/atom_or.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atom_or(local TYPE *p, TYPE val) { \ 5 | return atomic_or(p, val); \ 6 | } 7 | 8 | IMPL(int) 9 | IMPL(unsigned int) 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/cl_khr_local_int32_extended_atomics/atom_xor.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atom_xor(local TYPE *p, TYPE val) { \ 5 | return atomic_xor(p, val); \ 6 | } 7 | 8 | IMPL(int) 9 | IMPL(unsigned int) 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/common/mix.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifdef cl_khr_fp64 4 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 5 | #endif 6 | 7 | #define __CLC_BODY 8 | #include 9 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/common/mix.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE mix(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE a) { 2 | return mad( y - x, a, x ); 3 | } 4 | 5 | #ifndef __CLC_SCALAR 6 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE mix(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_SCALAR_GENTYPE a) { 7 | return mix(x, y, (__CLC_GENTYPE)a); 8 | } 9 | #endif 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/common/sign.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../clcmacro.h" 3 | 4 | #define SIGN(TYPE, F) \ 5 | _CLC_DEF _CLC_OVERLOAD TYPE sign(TYPE x) { \ 6 | if (isnan(x)) { \ 7 | return 0.0F; \ 8 | } \ 9 | if (x > 0.0F) { \ 10 | return 1.0F; \ 11 | } \ 12 | if (x < 0.0F) { \ 13 | return -1.0F; \ 14 | } \ 15 | return x; /* -0.0 or +0.0 */ \ 16 | } 17 | 18 | SIGN(float, f) 19 | _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, sign, float) 20 | 21 | #ifdef cl_khr_fp64 22 | 23 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 24 | 25 | SIGN(double, ) 26 | _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sign, double) 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/geometric/cross.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_OVERLOAD _CLC_DEF float3 cross(float3 p0, float3 p1) { 4 | return (float3)(p0.y*p1.z - p0.z*p1.y, p0.z*p1.x - p0.x*p1.z, 5 | p0.x*p1.y - p0.y*p1.x); 6 | } 7 | 8 | _CLC_OVERLOAD _CLC_DEF float4 cross(float4 p0, float4 p1) { 9 | return (float4)(p0.y*p1.z - p0.z*p1.y, p0.z*p1.x - p0.x*p1.z, 10 | p0.x*p1.y - p0.y*p1.x, 0.f); 11 | } 12 | 13 | #ifdef cl_khr_fp64 14 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 15 | 16 | _CLC_OVERLOAD _CLC_DEF double3 cross(double3 p0, double3 p1) { 17 | return (double3)(p0.y*p1.z - p0.z*p1.y, p0.z*p1.x - p0.x*p1.z, 18 | p0.x*p1.y - p0.y*p1.x); 19 | } 20 | 21 | _CLC_OVERLOAD _CLC_DEF double4 cross(double4 p0, double4 p1) { 22 | return (double4)(p0.y*p1.z - p0.z*p1.y, p0.z*p1.x - p0.x*p1.z, 23 | p0.x*p1.y - p0.y*p1.x, 0.f); 24 | } 25 | #endif 26 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/geometric/dot.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_OVERLOAD _CLC_DEF float dot(float p0, float p1) { 4 | return p0*p1; 5 | } 6 | 7 | _CLC_OVERLOAD _CLC_DEF float dot(float2 p0, float2 p1) { 8 | return p0.x*p1.x + p0.y*p1.y; 9 | } 10 | 11 | _CLC_OVERLOAD _CLC_DEF float dot(float3 p0, float3 p1) { 12 | return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z; 13 | } 14 | 15 | _CLC_OVERLOAD _CLC_DEF float dot(float4 p0, float4 p1) { 16 | return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z + p0.w*p1.w; 17 | } 18 | 19 | #ifdef cl_khr_fp64 20 | 21 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 22 | 23 | _CLC_OVERLOAD _CLC_DEF double dot(double p0, double p1) { 24 | return p0*p1; 25 | } 26 | 27 | _CLC_OVERLOAD _CLC_DEF double dot(double2 p0, double2 p1) { 28 | return p0.x*p1.x + p0.y*p1.y; 29 | } 30 | 31 | _CLC_OVERLOAD _CLC_DEF double dot(double3 p0, double3 p1) { 32 | return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z; 33 | } 34 | 35 | _CLC_OVERLOAD _CLC_DEF double dot(double4 p0, double4 p1) { 36 | return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z + p0.w*p1.w; 37 | } 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/image/get_image_dim.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_OVERLOAD _CLC_DEF int2 get_image_dim (image2d_t image) { 4 | return (int2)(get_image_width(image), get_image_height(image)); 5 | } 6 | _CLC_OVERLOAD _CLC_DEF int4 get_image_dim (image3d_t image) { 7 | return (int4)(get_image_width(image), get_image_height(image), 8 | get_image_depth(image), 0); 9 | } 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/integer/abs.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #include 5 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/integer/abs.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DEF __CLC_U_GENTYPE abs(__CLC_GENTYPE x) { 2 | return __builtin_astype((__CLC_GENTYPE)(x > (__CLC_GENTYPE)(0) ? x : -x), __CLC_U_GENTYPE); 3 | } 4 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/integer/abs_diff.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #include 5 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/integer/abs_diff.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DEF __CLC_U_GENTYPE abs_diff(__CLC_GENTYPE x, __CLC_GENTYPE y) { 2 | return __builtin_astype((__CLC_GENTYPE)(x > y ? x-y : y-x), __CLC_U_GENTYPE); 3 | } 4 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/integer/hadd.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #include 5 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/integer/hadd.inc: -------------------------------------------------------------------------------- 1 | //hadd = (x+y)>>1 2 | //This can be simplified to x>>1 + y>>1 + (1 if both x and y have the 1s bit set) 3 | //This saves us having to do any checks for overflow in the addition sum 4 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE hadd(__CLC_GENTYPE x, __CLC_GENTYPE y) { 5 | return (x>>(__CLC_GENTYPE)1)+(y>>(__CLC_GENTYPE)1)+(x&y&(__CLC_GENTYPE)1); 6 | } 7 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/integer/mad24.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #include 5 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/integer/mad24.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE mad24(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z){ 2 | return mul24(x, y) + z; 3 | } 4 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/integer/mul24.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #include 5 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/integer/mul24.inc: -------------------------------------------------------------------------------- 1 | 2 | // We need to use shifts here in order to mantain the sign bit for signed 3 | // integers. The compiler should optimize this to (x & 0x00FFFFFF) for 4 | // unsigned integers. 5 | #define CONVERT_TO_24BIT(x) (((x) << 8) >> 8) 6 | 7 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE mul24(__CLC_GENTYPE x, __CLC_GENTYPE y){ 8 | return CONVERT_TO_24BIT(x) * CONVERT_TO_24BIT(y); 9 | } 10 | 11 | #undef CONVERT_TO_24BIT 12 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/integer/rhadd.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #include 5 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/integer/rhadd.inc: -------------------------------------------------------------------------------- 1 | //rhadd = (x+y+1)>>1 2 | //This can be simplified to x>>1 + y>>1 + (1 if either x or y have the 1s bit set) 3 | //This saves us having to do any checks for overflow in the addition sums 4 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE rhadd(__CLC_GENTYPE x, __CLC_GENTYPE y) { 5 | return (x>>(__CLC_GENTYPE)1)+(y>>(__CLC_GENTYPE)1)+((x&(__CLC_GENTYPE)1)|(y&(__CLC_GENTYPE)1)); 6 | } 7 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/integer/rotate.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #include 5 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/math/acos.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifdef cl_khr_fp64 4 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 5 | #endif 6 | 7 | #define __CLC_BODY 8 | #include 9 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/math/acos.inc: -------------------------------------------------------------------------------- 1 | /* 2 | * There are multiple formulas for calculating arccosine of x: 3 | * 1) acos(x) = (1/2*pi) + i * ln(i*x + sqrt(1-x^2)) (notice the 'i'...) 4 | * 2) acos(x) = pi/2 + asin(-x) (asin isn't implemented yet) 5 | * 3) acos(x) = pi/2 - asin(x) (ditto) 6 | * 4) acos(x) = 2*atan2(sqrt(1-x), sqrt(1+x)) 7 | * 5) acos(x) = pi/2 - atan2(x, ( sqrt(1-x^2) ) ) 8 | * 9 | * Options 1-3 are not currently usable, #5 generates more concise radeonsi 10 | * bitcode and assembly than #4 (134 vs 132 instructions on radeonsi), but 11 | * precision of #4 may be better. 12 | */ 13 | 14 | #if __CLC_FPSIZE == 32 15 | #define __CLC_CONST(x) x ## f 16 | #else 17 | #define __CLC_CONST(x) x 18 | #endif 19 | 20 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE acos(__CLC_GENTYPE x) { 21 | return ( 22 | (__CLC_GENTYPE) __CLC_CONST(2.0) * atan2( 23 | sqrt((__CLC_GENTYPE) __CLC_CONST(1.0) - x), 24 | sqrt((__CLC_GENTYPE) __CLC_CONST(1.0) + x) 25 | ) 26 | ); 27 | } 28 | 29 | #undef __CLC_CONST 30 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/math/asin.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifdef cl_khr_fp64 4 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 5 | #endif 6 | 7 | #define __CLC_BODY 8 | #include 9 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/math/asin.inc: -------------------------------------------------------------------------------- 1 | 2 | #if __CLC_FPSIZE == 32 3 | #define __CLC_CONST(x) x ## f 4 | #else 5 | #define __CLC_CONST(x) x 6 | #endif 7 | 8 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE asin(__CLC_GENTYPE x) { 9 | return atan2(x, sqrt( (__CLC_GENTYPE)__CLC_CONST(1.0) -(x*x) )); 10 | } 11 | 12 | #undef __CLC_CONST 13 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/math/binary_impl.inc: -------------------------------------------------------------------------------- 1 | 2 | #ifndef __CLC_SCALAR 3 | 4 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE x, __CLC_GENTYPE y) { 5 | return FUNCTION_IMPL(x, y); 6 | } 7 | 8 | #endif 9 | 10 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE x, float y) { 11 | __CLC_GENTYPE vec_y = (__CLC_GENTYPE) (y); 12 | return FUNCTION_IMPL(x, vec_y); 13 | } 14 | 15 | #ifdef cl_khr_fp64 16 | 17 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE x, double y) { 18 | __CLC_GENTYPE vec_y = (__CLC_GENTYPE) (y); 19 | return FUNCTION_IMPL(x, vec_y); 20 | } 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/math/copysign.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../clcmacro.h" 3 | 4 | _CLC_DEFINE_BINARY_BUILTIN(float, copysign, __builtin_copysignf, float, float) 5 | 6 | #ifdef cl_khr_fp64 7 | 8 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 9 | 10 | _CLC_DEFINE_BINARY_BUILTIN(double, copysign, __builtin_copysign, double, double) 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/math/exp10.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifdef cl_khr_fp64 4 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 5 | #endif 6 | 7 | #define __CLC_BODY 8 | #include 9 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/math/exp10.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE exp10(__CLC_GENTYPE val) { 2 | // exp10(x) = exp2(x * log2(10)) 3 | #if __CLC_FPSIZE == 32 4 | return exp2(val * log2(10.0f)); 5 | #elif __CLC_FPSIZE == 64 6 | return exp2(val * log2(10.0)); 7 | #else 8 | #error unknown _CLC_FPSIZE 9 | #endif 10 | } 11 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/math/fdim.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "math.h" 4 | 5 | #ifdef cl_khr_fp64 6 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 7 | #endif 8 | 9 | #define __CLC_BODY 10 | #include 11 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/math/fmax.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "../clcmacro.h" 4 | 5 | _CLC_DEFINE_BINARY_BUILTIN(float, fmax, __builtin_fmaxf, float, float); 6 | 7 | #ifdef cl_khr_fp64 8 | 9 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 10 | 11 | _CLC_DEFINE_BINARY_BUILTIN(double, fmax, __builtin_fmax, double, double); 12 | 13 | #endif 14 | 15 | #define __CLC_BODY 16 | #include 17 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/math/fmax.inc: -------------------------------------------------------------------------------- 1 | 2 | #if !defined(__CLC_SCALAR) 3 | 4 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fmax(__CLC_GENTYPE x, float y) { 5 | return fmax(x, (__CLC_GENTYPE)((__CLC_SCALAR_GENTYPE)y)); 6 | } 7 | 8 | #ifdef cl_khr_fp64 9 | 10 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 11 | 12 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fmax(__CLC_GENTYPE x, double y) { 13 | return fmax(x, (__CLC_GENTYPE)((__CLC_SCALAR_GENTYPE)y)); 14 | } 15 | 16 | #endif // ifdef cl_khr_fp64 17 | 18 | #endif // !defined(__CLC_SCALAR) 19 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/math/fmin.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "../clcmacro.h" 4 | 5 | _CLC_DEFINE_BINARY_BUILTIN(float, fmin, __builtin_fminf, float, float); 6 | 7 | #ifdef cl_khr_fp64 8 | 9 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 10 | 11 | _CLC_DEFINE_BINARY_BUILTIN(double, fmin, __builtin_fmin, double, double); 12 | 13 | #endif 14 | 15 | #define __CLC_BODY 16 | #include 17 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/math/fmin.inc: -------------------------------------------------------------------------------- 1 | 2 | #if !defined(__CLC_SCALAR) 3 | 4 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fmin(__CLC_GENTYPE x, float y) { 5 | return fmin(x, (__CLC_GENTYPE)((__CLC_SCALAR_GENTYPE)y)); 6 | } 7 | 8 | #ifdef cl_khr_fp64 9 | 10 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 11 | 12 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fmin(__CLC_GENTYPE x, double y) { 13 | return fmin(x, (__CLC_GENTYPE)((__CLC_SCALAR_GENTYPE)y)); 14 | } 15 | 16 | #endif // ifdef cl_khr_fp64 17 | 18 | #endif // !defined(__CLC_SCALAR) 19 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/math/fmod.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../clcmacro.h" 3 | 4 | _CLC_DEFINE_BINARY_BUILTIN(float, fmod, __builtin_fmodf, float, float) 5 | 6 | #ifdef cl_khr_fp64 7 | 8 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 9 | 10 | _CLC_DEFINE_BINARY_BUILTIN(double, fmod, __builtin_fmod, double, double) 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/math/frexp.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "math.h" 4 | 5 | #ifdef cl_khr_fp64 6 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 7 | #endif 8 | 9 | #define __CLC_BODY 10 | #include 11 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/math/hypot.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifdef cl_khr_fp64 4 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 5 | #endif 6 | 7 | #define __CLC_BODY 8 | #include 9 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/math/hypot.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE hypot(__CLC_GENTYPE x, __CLC_GENTYPE y) { 2 | return sqrt(x*x + y*y); 3 | } 4 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/math/log.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../clcmacro.h" 3 | 4 | /* 5 | *log(x) = log2(x) * (1/log2(e)) 6 | */ 7 | 8 | _CLC_OVERLOAD _CLC_DEF float log(float x) 9 | { 10 | return log2(x) * (1.0f / M_LOG2E_F); 11 | } 12 | 13 | _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, log, float); 14 | 15 | #ifdef cl_khr_fp64 16 | 17 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 18 | 19 | _CLC_OVERLOAD _CLC_DEF double log(double x) 20 | { 21 | return log2(x) * (1.0 / M_LOG2E); 22 | } 23 | 24 | _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, log, double); 25 | 26 | #endif // cl_khr_fp64 27 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/math/log10.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifdef cl_khr_fp64 4 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 5 | #endif 6 | 7 | #define __CLC_BODY 8 | #include 9 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/math/log10.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE log10(__CLC_GENTYPE val) { 2 | // log10(x) = log2(x) / log2(10) 3 | // 1 / log2(10) = 0.30102999566 = log10(2) 4 | // SP representation is 0.30103 (0x1.344136p-2) 5 | // DP representation is 0.301029995659999993762312442414(0x1.34413509E61D8p-2) 6 | #if __CLC_FPSIZE == 32 7 | return log2(val) * 0x1.344136p-2f; 8 | #elif __CLC_FPSIZE == 64 9 | return log2(val) * 0x1.34413509E61D8p-2; 10 | #else 11 | #error unknown _CLC_FPSIZE 12 | #endif 13 | } 14 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/math/mad.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifdef cl_khr_fp64 4 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 5 | #endif 6 | 7 | #define __CLC_BODY 8 | #include 9 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/math/mad.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE mad(__CLC_GENTYPE a, __CLC_GENTYPE b, __CLC_GENTYPE c) { 2 | return a * b + c; 3 | } 4 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/math/nextafter.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../clcmacro.h" 3 | 4 | _CLC_DEFINE_BINARY_BUILTIN(float, nextafter, __builtin_nextafterf, float, float) 5 | 6 | #ifdef cl_khr_fp64 7 | 8 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 9 | 10 | _CLC_DEFINE_BINARY_BUILTIN(double, nextafter, __builtin_nextafter, double, double) 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/math/pown.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../clcmacro.h" 3 | 4 | _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, pown, float, int) 5 | 6 | #ifdef cl_khr_fp64 7 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 8 | 9 | _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, pown, double, int) 10 | #endif 11 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/math/sincos.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifdef cl_khr_fp64 4 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 5 | #endif 6 | 7 | #define __CLC_BODY 8 | #include 9 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/math/sincos.inc: -------------------------------------------------------------------------------- 1 | #define __CLC_DECLARE_SINCOS(ADDRSPACE, TYPE) \ 2 | _CLC_OVERLOAD _CLC_DEF TYPE sincos (TYPE x, ADDRSPACE TYPE * cosval) { \ 3 | *cosval = cos(x); \ 4 | return sin(x); \ 5 | } 6 | 7 | __CLC_DECLARE_SINCOS(global, __CLC_GENTYPE) 8 | __CLC_DECLARE_SINCOS(local, __CLC_GENTYPE) 9 | __CLC_DECLARE_SINCOS(private, __CLC_GENTYPE) 10 | 11 | #undef __CLC_DECLARE_SINCOS 12 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/math/tan.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifdef cl_khr_fp64 4 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 5 | #endif 6 | 7 | #define __CLC_BODY 8 | #include 9 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/math/tan.inc: -------------------------------------------------------------------------------- 1 | /* 2 | * Note: tan(x) = sin(x)/cos(x) also, but the final assembly ends up being 3 | * twice as long for R600 (maybe for others as well). 4 | */ 5 | 6 | #if __CLC_FPSIZE == 32 7 | #define __CLC_CONST(x) x ## f 8 | #else 9 | #define __CLC_CONST(x) x 10 | #endif 11 | 12 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE tan(__CLC_GENTYPE x) { 13 | __CLC_GENTYPE sinx = sin(x); 14 | return sinx / sqrt( (__CLC_GENTYPE) __CLC_CONST(1.0) - (sinx*sinx) ); 15 | } 16 | 17 | #undef __CLC_CONST 18 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/relational/all.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define _CLC_ALL(v) (((v) >> ((sizeof(v) * 8) - 1)) & 0x1) 4 | #define _CLC_ALL2(v) (_CLC_ALL((v).s0) & _CLC_ALL((v).s1)) 5 | #define _CLC_ALL3(v) (_CLC_ALL2((v)) & _CLC_ALL((v).s2)) 6 | #define _CLC_ALL4(v) (_CLC_ALL3((v)) & _CLC_ALL((v).s3)) 7 | #define _CLC_ALL8(v) (_CLC_ALL4((v)) & _CLC_ALL((v).s4) & _CLC_ALL((v).s5) \ 8 | & _CLC_ALL((v).s6) & _CLC_ALL((v).s7)) 9 | #define _CLC_ALL16(v) (_CLC_ALL8((v)) & _CLC_ALL((v).s8) & _CLC_ALL((v).s9) \ 10 | & _CLC_ALL((v).sA) & _CLC_ALL((v).sB) \ 11 | & _CLC_ALL((v).sC) & _CLC_ALL((v).sD) \ 12 | & _CLC_ALL((v).sE) & _CLC_ALL((v).sf)) 13 | 14 | 15 | #define ALL_ID(TYPE) \ 16 | _CLC_OVERLOAD _CLC_DEF int all(TYPE v) 17 | 18 | #define ALL_VECTORIZE(TYPE) \ 19 | ALL_ID(TYPE) { return _CLC_ALL(v); } \ 20 | ALL_ID(TYPE##2) { return _CLC_ALL2(v); } \ 21 | ALL_ID(TYPE##3) { return _CLC_ALL3(v); } \ 22 | ALL_ID(TYPE##4) { return _CLC_ALL4(v); } \ 23 | ALL_ID(TYPE##8) { return _CLC_ALL8(v); } \ 24 | ALL_ID(TYPE##16) { return _CLC_ALL16(v); } 25 | 26 | ALL_VECTORIZE(char) 27 | ALL_VECTORIZE(short) 28 | ALL_VECTORIZE(int) 29 | ALL_VECTORIZE(long) 30 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/relational/any.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define _CLC_ANY(v) (((v) >> ((sizeof(v) * 8) - 1)) & 0x1) 4 | #define _CLC_ANY2(v) (_CLC_ANY((v).s0) | _CLC_ANY((v).s1)) 5 | #define _CLC_ANY3(v) (_CLC_ANY2((v)) | _CLC_ANY((v).s2)) 6 | #define _CLC_ANY4(v) (_CLC_ANY3((v)) | _CLC_ANY((v).s3)) 7 | #define _CLC_ANY8(v) (_CLC_ANY4((v)) | _CLC_ANY((v).s4) | _CLC_ANY((v).s5) \ 8 | | _CLC_ANY((v).s6) | _CLC_ANY((v).s7)) 9 | #define _CLC_ANY16(v) (_CLC_ANY8((v)) | _CLC_ANY((v).s8) | _CLC_ANY((v).s9) \ 10 | | _CLC_ANY((v).sA) | _CLC_ANY((v).sB) \ 11 | | _CLC_ANY((v).sC) | _CLC_ANY((v).sD) \ 12 | | _CLC_ANY((v).sE) | _CLC_ANY((v).sf)) 13 | 14 | 15 | #define ANY_ID(TYPE) \ 16 | _CLC_OVERLOAD _CLC_DEF int any(TYPE v) 17 | 18 | #define ANY_VECTORIZE(TYPE) \ 19 | ANY_ID(TYPE) { return _CLC_ANY(v); } \ 20 | ANY_ID(TYPE##2) { return _CLC_ANY2(v); } \ 21 | ANY_ID(TYPE##3) { return _CLC_ANY3(v); } \ 22 | ANY_ID(TYPE##4) { return _CLC_ANY4(v); } \ 23 | ANY_ID(TYPE##8) { return _CLC_ANY8(v); } \ 24 | ANY_ID(TYPE##16) { return _CLC_ANY16(v); } 25 | 26 | ANY_VECTORIZE(char) 27 | ANY_VECTORIZE(short) 28 | ANY_VECTORIZE(int) 29 | ANY_VECTORIZE(long) 30 | 31 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/relational/isequal.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define _CLC_DEFINE_ISEQUAL(RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \ 4 | _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \ 5 | return (x == y); \ 6 | } \ 7 | 8 | _CLC_DEFINE_ISEQUAL(int, isequal, float, float) 9 | _CLC_DEFINE_ISEQUAL(int2, isequal, float2, float2) 10 | _CLC_DEFINE_ISEQUAL(int3, isequal, float3, float3) 11 | _CLC_DEFINE_ISEQUAL(int4, isequal, float4, float4) 12 | _CLC_DEFINE_ISEQUAL(int8, isequal, float8, float8) 13 | _CLC_DEFINE_ISEQUAL(int16, isequal, float16, float16) 14 | 15 | #ifdef cl_khr_fp64 16 | 17 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 18 | 19 | // The scalar version of isequal(double) returns an int, but the vector versions 20 | // return long. 21 | _CLC_DEFINE_ISEQUAL(int, isequal, double, double) 22 | _CLC_DEFINE_ISEQUAL(long2, isequal, double2, double2) 23 | _CLC_DEFINE_ISEQUAL(long3, isequal, double3, double3) 24 | _CLC_DEFINE_ISEQUAL(long4, isequal, double4, double4) 25 | _CLC_DEFINE_ISEQUAL(long8, isequal, double8, double8) 26 | _CLC_DEFINE_ISEQUAL(long16, isequal, double16, double16) 27 | 28 | #endif 29 | 30 | #undef _CLC_DEFINE_ISEQUAL -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/relational/isfinite.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "relational.h" 3 | 4 | _CLC_DEFINE_RELATIONAL_UNARY(int, isfinite, __builtin_isfinite, float) 5 | 6 | #ifdef cl_khr_fp64 7 | 8 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 9 | 10 | // The scalar version of isfinite(double) returns an int, but the vector versions 11 | // return long. 12 | _CLC_DEF _CLC_OVERLOAD int isfinite(double x) { 13 | return __builtin_isfinite(x); 14 | } 15 | 16 | _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, isfinite, double) 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/relational/isgreater.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "relational.h" 3 | 4 | //Note: It would be nice to use __builtin_isgreater with vector inputs, but it seems to only take scalar values as 5 | // input, which will produce incorrect output for vector input types. 6 | 7 | _CLC_DEFINE_RELATIONAL_BINARY(int, isgreater, __builtin_isgreater, float, float) 8 | 9 | #ifdef cl_khr_fp64 10 | 11 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 12 | 13 | // The scalar version of isgreater(double, double) returns an int, but the vector versions 14 | // return long. 15 | 16 | _CLC_DEF _CLC_OVERLOAD int isgreater(double x, double y){ 17 | return __builtin_isgreater(x, y); 18 | } 19 | 20 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isgreater, double, double) 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/relational/isgreaterequal.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "relational.h" 3 | 4 | //Note: It would be nice to use __builtin_isgreaterequal with vector inputs, but it seems to only take scalar values as 5 | // input, which will produce incorrect output for vector input types. 6 | 7 | _CLC_DEFINE_RELATIONAL_BINARY(int, isgreaterequal, __builtin_isgreaterequal, float, float) 8 | 9 | #ifdef cl_khr_fp64 10 | 11 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 12 | 13 | // The scalar version of isgreaterequal(double, double) returns an int, but the vector versions 14 | // return long. 15 | 16 | _CLC_DEF _CLC_OVERLOAD int isgreaterequal(double x, double y){ 17 | return __builtin_isgreaterequal(x, y); 18 | } 19 | 20 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isgreaterequal, double, double) 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/relational/isinf.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "relational.h" 3 | 4 | _CLC_DEFINE_RELATIONAL_UNARY(int, isinf, __builtin_isinf, float) 5 | 6 | #ifdef cl_khr_fp64 7 | 8 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 9 | 10 | // The scalar version of isinf(double) returns an int, but the vector versions 11 | // return long. 12 | _CLC_DEF _CLC_OVERLOAD int isinf(double x) { 13 | return __builtin_isinf(x); 14 | } 15 | 16 | _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, isinf, double) 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/relational/isless.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "relational.h" 3 | 4 | //Note: It would be nice to use __builtin_isless with vector inputs, but it seems to only take scalar values as 5 | // input, which will produce incorrect output for vector input types. 6 | 7 | _CLC_DEFINE_RELATIONAL_BINARY(int, isless, __builtin_isless, float, float) 8 | 9 | #ifdef cl_khr_fp64 10 | 11 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 12 | 13 | // The scalar version of isless(double, double) returns an int, but the vector versions 14 | // return long. 15 | 16 | _CLC_DEF _CLC_OVERLOAD int isless(double x, double y){ 17 | return __builtin_isless(x, y); 18 | } 19 | 20 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isless, double, double) 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/relational/islessequal.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "relational.h" 3 | 4 | //Note: It would be nice to use __builtin_islessequal with vector inputs, but it seems to only take scalar values as 5 | // input, which will produce incorrect output for vector input types. 6 | 7 | _CLC_DEFINE_RELATIONAL_BINARY(int, islessequal, __builtin_islessequal, float, float) 8 | 9 | #ifdef cl_khr_fp64 10 | 11 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 12 | 13 | // The scalar version of islessequal(double, double) returns an int, but the vector versions 14 | // return long. 15 | 16 | _CLC_DEF _CLC_OVERLOAD int islessequal(double x, double y){ 17 | return __builtin_islessequal(x, y); 18 | } 19 | 20 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, islessequal, double, double) 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/relational/islessgreater.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "relational.h" 3 | 4 | //Note: It would be nice to use __builtin_islessgreater with vector inputs, but it seems to only take scalar values as 5 | // input, which will produce incorrect output for vector input types. 6 | 7 | _CLC_DEFINE_RELATIONAL_BINARY(int, islessgreater, __builtin_islessgreater, float, float) 8 | 9 | #ifdef cl_khr_fp64 10 | 11 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 12 | 13 | // The scalar version of islessgreater(double, double) returns an int, but the vector versions 14 | // return long. 15 | 16 | _CLC_DEF _CLC_OVERLOAD int islessgreater(double x, double y){ 17 | return __builtin_islessgreater(x, y); 18 | } 19 | 20 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, islessgreater, double, double) 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/relational/isnan.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "relational.h" 3 | 4 | _CLC_DEFINE_RELATIONAL_UNARY(int, isnan, __builtin_isnan, float) 5 | 6 | #ifdef cl_khr_fp64 7 | 8 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 9 | 10 | // The scalar version of isnan(double) returns an int, but the vector versions 11 | // return long. 12 | _CLC_DEF _CLC_OVERLOAD int isnan(double x) { 13 | return __builtin_isnan(x); 14 | } 15 | 16 | _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, isnan, double) 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/relational/isnormal.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "relational.h" 3 | 4 | _CLC_DEFINE_RELATIONAL_UNARY(int, isnormal, __builtin_isnormal, float) 5 | 6 | #ifdef cl_khr_fp64 7 | 8 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 9 | 10 | // The scalar version of isnormal(double) returns an int, but the vector versions 11 | // return long. 12 | _CLC_DEF _CLC_OVERLOAD int isnormal(double x) { 13 | return __builtin_isnormal(x); 14 | } 15 | 16 | _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, isnormal, double) 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/relational/isnotequal.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "relational.h" 3 | 4 | #define _CLC_DEFINE_ISNOTEQUAL(RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \ 5 | _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \ 6 | return (x != y); \ 7 | } \ 8 | 9 | _CLC_DEFINE_ISNOTEQUAL(int, isnotequal, float, float) 10 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(int, isnotequal, float, float) 11 | 12 | #ifdef cl_khr_fp64 13 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 14 | 15 | // The scalar version of isnotequal(double, double) returns an int, but the vector versions 16 | // return long. 17 | 18 | _CLC_DEFINE_ISNOTEQUAL(int, isnotequal, double, double) 19 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isnotequal, double, double) 20 | 21 | #endif 22 | 23 | #undef _CLC_DEFINE_ISNOTEQUAL 24 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/relational/isordered.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "relational.h" 3 | 4 | #define _CLC_DEFINE_ISORDERED(RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \ 5 | _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \ 6 | return isequal(x, x) && isequal(y, y); \ 7 | } \ 8 | 9 | _CLC_DEFINE_ISORDERED(int, isordered, float, float) 10 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(int, isordered, float, float) 11 | 12 | #ifdef cl_khr_fp64 13 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 14 | 15 | // The scalar version of isordered(double, double) returns an int, but the vector versions 16 | // return long. 17 | 18 | _CLC_DEFINE_ISORDERED(int, isordered, double, double) 19 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isordered, double, double) 20 | 21 | #endif 22 | 23 | #undef _CLC_DEFINE_ISORDERED 24 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/relational/isunordered.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "relational.h" 3 | 4 | //Note: It would be nice to use __builtin_isunordered with vector inputs, but it seems to only take scalar values as 5 | // input, which will produce incorrect output for vector input types. 6 | 7 | _CLC_DEFINE_RELATIONAL_BINARY(int, isunordered, __builtin_isunordered, float, float) 8 | 9 | #ifdef cl_khr_fp64 10 | 11 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 12 | 13 | // The scalar version of isunordered(double, double) returns an int, but the vector versions 14 | // return long. 15 | 16 | _CLC_DEF _CLC_OVERLOAD int isunordered(double x, double y){ 17 | return __builtin_isunordered(x, y); 18 | } 19 | 20 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isunordered, double, double) 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/relational/signbit.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "relational.h" 3 | 4 | _CLC_DEFINE_RELATIONAL_UNARY(int, signbit, __builtin_signbitf, float) 5 | 6 | #ifdef cl_khr_fp64 7 | 8 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 9 | 10 | // The scalar version of signbit(double) returns an int, but the vector versions 11 | // return long. 12 | 13 | _CLC_DEF _CLC_OVERLOAD int signbit(double x){ 14 | return __builtin_signbit(x); 15 | } 16 | 17 | _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, signbit, double) 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/shared/clamp.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #include 5 | 6 | #ifdef cl_khr_fp64 7 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 8 | #endif 9 | 10 | #define __CLC_BODY 11 | #include 12 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/shared/clamp.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE clamp(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z) { 2 | return (x > z ? z : (x < y ? y : x)); 3 | } 4 | 5 | #ifndef __CLC_SCALAR 6 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE clamp(__CLC_GENTYPE x, __CLC_SCALAR_GENTYPE y, __CLC_SCALAR_GENTYPE z) { 7 | return (x > (__CLC_GENTYPE)z ? (__CLC_GENTYPE)z : (x < (__CLC_GENTYPE)y ? (__CLC_GENTYPE)y : x)); 8 | } 9 | #endif 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/shared/max.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #include 5 | 6 | #ifdef cl_khr_fp64 7 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 8 | #endif 9 | 10 | #define __CLC_BODY 11 | #include 12 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/shared/max.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE max(__CLC_GENTYPE a, __CLC_GENTYPE b) { 2 | return (a > b ? a : b); 3 | } 4 | 5 | #ifndef __CLC_SCALAR 6 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE max(__CLC_GENTYPE a, __CLC_SCALAR_GENTYPE b) { 7 | return (a > (__CLC_GENTYPE)b ? a : (__CLC_GENTYPE)b); 8 | } 9 | #endif 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/shared/min.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #include 5 | 6 | #ifdef cl_khr_fp64 7 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 8 | #endif 9 | 10 | #define __CLC_BODY 11 | #include 12 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/shared/min.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE min(__CLC_GENTYPE a, __CLC_GENTYPE b) { 2 | return (a < b ? a : b); 3 | } 4 | 5 | #ifndef __CLC_SCALAR 6 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE min(__CLC_GENTYPE a, __CLC_SCALAR_GENTYPE b) { 7 | return (a < (__CLC_GENTYPE)b ? a : (__CLC_GENTYPE)b); 8 | } 9 | #endif 10 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/subnormal_disable.ll: -------------------------------------------------------------------------------- 1 | @__CLC_SUBNORMAL_DISABLE = unnamed_addr constant i1 true 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/subnormal_helper_func.ll: -------------------------------------------------------------------------------- 1 | @__CLC_SUBNORMAL_DISABLE = external global i1 2 | 3 | define i1 @__clc_subnormals_disabled() #0 { 4 | %disable = load i1, i1* @__CLC_SUBNORMAL_DISABLE 5 | ret i1 %disable 6 | } 7 | 8 | attributes #0 = { alwaysinline } 9 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/subnormal_use_default.ll: -------------------------------------------------------------------------------- 1 | @__CLC_SUBNORMAL_DISABLE = unnamed_addr constant i1 false 2 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/workitem/get_global_id.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DEF size_t get_global_id(uint dim) { 4 | return get_group_id(dim)*get_local_size(dim) + get_local_id(dim); 5 | } 6 | -------------------------------------------------------------------------------- /libamdgcn/libclc/generic/lib/workitem/get_global_size.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DEF size_t get_global_size(uint dim) { 4 | return get_num_groups(dim)*get_local_size(dim); 5 | } 6 | -------------------------------------------------------------------------------- /libamdgcn/libclc_overrides/README: -------------------------------------------------------------------------------- 1 | 2 | This directory includes changes needed on top of the libclc for amdgcn builtins. 3 | 4 | The only changes are some include files and the workitem functions. 5 | -------------------------------------------------------------------------------- /libamdgcn/libclc_overrides/include/clc/workitem/get_global_id.h: -------------------------------------------------------------------------------- 1 | _CLC_DECL size_t get_global_id_ll(uint dim); 2 | _CLC_OVERLOAD _CLC_DECL size_t get_global_id(uint dim); 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc_overrides/include/clc/workitem/get_group_id.h: -------------------------------------------------------------------------------- 1 | _CLC_DECL size_t get_group_id_ll(uint dim); 2 | _CLC_OVERLOAD _CLC_DECL size_t get_group_id(uint dim); 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc_overrides/include/clc/workitem/get_local_id.h: -------------------------------------------------------------------------------- 1 | _CLC_DECL size_t get_local_id_ll(uint dim); 2 | _CLC_OVERLOAD _CLC_DECL size_t get_local_id(uint dim); 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc_overrides/include/clc/workitem/get_local_size.h: -------------------------------------------------------------------------------- 1 | _CLC_DECL size_t get_local_size_ll(uint dim); 2 | _CLC_OVERLOAD _CLC_DECL size_t get_local_size(uint dim); 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc_overrides/include/clc/workitem/get_num_groups.h: -------------------------------------------------------------------------------- 1 | _CLC_DECL size_t get_num_groups_ll(uint dim); 2 | _CLC_OVERLOAD _CLC_DECL size_t get_num_groups(uint dim); 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc_overrides/include/clc/workitem/get_reserved_device_mem.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _OMPTARGET_AMDGCN_OPTION_H_ 3 | typedef char int8_t; 4 | typedef unsigned char uint8_t; 5 | typedef short int16_t; 6 | typedef unsigned short uint16_t; 7 | typedef int int32_t; 8 | typedef unsigned int uint32_t; 9 | typedef long int64_t; 10 | typedef unsigned long uint64_t; 11 | #endif 12 | 13 | /* get_reserved_device_mem_ptr returns pointer to the 128byte reserved memory area */ 14 | _CLC_DEF __global void* get_reserved_device_mem_ptr(); 15 | /* the 128byte reserved memory area holds 16 64-bit global pointers */ 16 | /* get_reserved_device_mem gets a particular 64-bit pointer specified by the index argument */ 17 | _CLC_DEF __global void* get_reserved_device_mem(uint index); 18 | _CLC_DEF uint64_t get_reserved_device_mem_64(uint index); 19 | _CLC_DEF uint32_t get_reserved_device_mem_32(uint index); 20 | -------------------------------------------------------------------------------- /libamdgcn/libclc_overrides/include/clc/workitem/get_work_dim.h: -------------------------------------------------------------------------------- 1 | _CLC_DECL size_t get_work_dim_ll(void); 2 | _CLC_OVERLOAD _CLC_DECL size_t get_work_dim(void); 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc_overrides/lib/workitem/get_dispatch_ptr.ll: -------------------------------------------------------------------------------- 1 | declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0 2 | 3 | ; Function Attrs: alwaysinline nounwind readnone 4 | define i8 addrspace(2)* @get_dispatch_ptr() #1 { 5 | %dispatch_ptr = call noalias nonnull dereferenceable(64) i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() 6 | ret i8 addrspace(2)* %dispatch_ptr 7 | } 8 | 9 | attributes #0 = { nounwind readnone } 10 | attributes #1 = { alwaysinline nounwind readnone } 11 | -------------------------------------------------------------------------------- /libamdgcn/libclc_overrides/lib/workitem/get_global_id.cl: -------------------------------------------------------------------------------- 1 | #include 2 | _CLC_OVERLOAD _CLC_DEF size_t get_global_id(uint dim) { 3 | return get_group_id(dim)*get_local_size(dim) + get_local_id(dim); 4 | } 5 | -------------------------------------------------------------------------------- /libamdgcn/libclc_overrides/lib/workitem/get_group_id.cl: -------------------------------------------------------------------------------- 1 | #include 2 | _CLC_OVERLOAD _CLC_DEF size_t get_group_id(uint dim) { return (get_group_id_ll(dim)); } 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc_overrides/lib/workitem/get_group_id_ll.ll: -------------------------------------------------------------------------------- 1 | declare i32 @llvm.amdgcn.workgroup.id.x() #0 2 | declare i32 @llvm.amdgcn.workgroup.id.y() #0 3 | declare i32 @llvm.amdgcn.workgroup.id.z() #0 4 | 5 | define i32 @get_group_id_ll(i32 %dim) #1 { 6 | switch i32 %dim, label %default [ 7 | i32 0, label %x_dim 8 | i32 1, label %y_dim 9 | i32 2, label %z_dim 10 | ] 11 | 12 | x_dim: 13 | %x = tail call i32 @llvm.amdgcn.workgroup.id.x() 14 | ret i32 %x 15 | 16 | y_dim: 17 | %y = tail call i32 @llvm.amdgcn.workgroup.id.y() 18 | ret i32 %y 19 | 20 | z_dim: 21 | %z = tail call i32 @llvm.amdgcn.workgroup.id.z() 22 | ret i32 %z 23 | 24 | default: 25 | ret i32 0 26 | } 27 | 28 | attributes #0 = { nounwind readnone } 29 | attributes #1 = { alwaysinline norecurse nounwind } 30 | -------------------------------------------------------------------------------- /libamdgcn/libclc_overrides/lib/workitem/get_local_id.cl: -------------------------------------------------------------------------------- 1 | #include 2 | _CLC_OVERLOAD _CLC_DEF size_t get_local_id(uint dim) { return (get_local_id_ll(dim)); } 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc_overrides/lib/workitem/get_local_id_ll.ll: -------------------------------------------------------------------------------- 1 | declare i32 @llvm.amdgcn.workitem.id.x() #0 2 | declare i32 @llvm.amdgcn.workitem.id.y() #0 3 | declare i32 @llvm.amdgcn.workitem.id.z() #0 4 | 5 | define i32 @get_local_id_ll(i32 %dim) #1 { 6 | switch i32 %dim, label %default [ 7 | i32 0, label %x_dim 8 | i32 1, label %y_dim 9 | i32 2, label %z_dim 10 | ] 11 | 12 | x_dim: 13 | %x = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0 14 | ret i32 %x 15 | 16 | y_dim: 17 | %y = tail call i32 @llvm.amdgcn.workitem.id.y(), !range !0 18 | ret i32 %y 19 | 20 | z_dim: 21 | %z = tail call i32 @llvm.amdgcn.workitem.id.z(), !range !0 22 | ret i32 %z 23 | 24 | default: 25 | ret i32 0 26 | } 27 | 28 | attributes #0 = { nounwind readnone } 29 | attributes #1 = { alwaysinline norecurse nounwind readnone } 30 | 31 | !0 = !{ i32 0, i32 2048 } 32 | -------------------------------------------------------------------------------- /libamdgcn/libclc_overrides/lib/workitem/get_local_size.cl: -------------------------------------------------------------------------------- 1 | #include 2 | _CLC_OVERLOAD _CLC_DEF size_t get_local_size(uint dim) { return (get_local_size_ll(dim)); } 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc_overrides/lib/workitem/get_num_groups.cl: -------------------------------------------------------------------------------- 1 | #include 2 | _CLC_OVERLOAD _CLC_DEF size_t get_num_groups(uint dim) { return (get_num_groups_ll(dim)); } 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc_overrides/lib/workitem/get_work_dim.cl: -------------------------------------------------------------------------------- 1 | #include 2 | _CLC_OVERLOAD _CLC_DEF size_t get_work_dim() { return (get_work_dim_ll()); } 3 | -------------------------------------------------------------------------------- /libamdgcn/libclc_overrides/lib/workitem/get_work_dim_ll.ll: -------------------------------------------------------------------------------- 1 | declare i32 @llvm.AMDGPU.read.workdim() nounwind readnone 2 | 3 | define i32 @get_work_dim_ll() nounwind readnone alwaysinline { 4 | %x = call i32 @llvm.AMDGPU.read.workdim() nounwind readnone , !range !0 5 | ret i32 %x 6 | } 7 | 8 | !0 = !{ i32 1, i32 4 } 9 | -------------------------------------------------------------------------------- /packages/fedora/libatmi-runtime-0.3-3.x86_64.rpm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HSAFoundation/CLOC/fe5372f02adb9e92812f89026f6a9ac4f462b055/packages/fedora/libatmi-runtime-0.3-3.x86_64.rpm -------------------------------------------------------------------------------- /packages/fedora/mymcpu-0.1-4.noarch.rpm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HSAFoundation/CLOC/fe5372f02adb9e92812f89026f6a9ac4f462b055/packages/fedora/mymcpu-0.1-4.noarch.rpm -------------------------------------------------------------------------------- /packages/ubuntu/hlc_3.2-4_amd64.deb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HSAFoundation/CLOC/fe5372f02adb9e92812f89026f6a9ac4f462b055/packages/ubuntu/hlc_3.2-4_amd64.deb -------------------------------------------------------------------------------- /packages/ubuntu/libatmi-runtime_0.3-4_amd64.deb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HSAFoundation/CLOC/fe5372f02adb9e92812f89026f6a9ac4f462b055/packages/ubuntu/libatmi-runtime_0.3-4_amd64.deb -------------------------------------------------------------------------------- /packages/ubuntu/mymcpu_0.1-4_all.deb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HSAFoundation/CLOC/fe5372f02adb9e92812f89026f6a9ac4f462b055/packages/ubuntu/mymcpu_0.1-4_all.deb --------------------------------------------------------------------------------