├── .gitignore ├── .travis.yml ├── CMakeLists.txt ├── CREDITS.TXT ├── LICENSE.TXT ├── README.TXT ├── amdgcn-amdhsa └── lib │ ├── SOURCES │ └── workitem │ ├── get_global_size.cl │ ├── get_local_size.cl │ └── get_num_groups.cl ├── amdgcn-mesa3d ├── amdgcn └── lib │ ├── SOURCES │ ├── cl_khr_int64_extended_atomics │ └── minmax_helpers.ll │ ├── integer │ ├── popcount.cl │ └── popcount.inc │ ├── math │ ├── fmax.cl │ ├── fmin.cl │ └── ldexp.cl │ ├── mem_fence │ └── fence.cl │ ├── synchronization │ └── barrier.cl │ └── workitem │ ├── get_global_offset.cl │ ├── get_global_size.cl │ ├── get_group_id.cl │ ├── get_local_id.cl │ ├── get_local_size.cl │ ├── get_num_groups.cl │ └── get_work_dim.cl ├── amdgpu └── lib │ ├── SOURCES │ ├── SOURCES_3.9 │ ├── SOURCES_4.0 │ ├── SOURCES_5.0 │ ├── math │ ├── half_exp.cl │ ├── half_exp10.cl │ ├── half_exp2.cl │ ├── half_log.cl │ ├── half_log10.cl │ ├── half_log2.cl │ ├── half_native_unary.inc │ ├── half_recip.cl │ ├── half_rsqrt.cl │ ├── half_sqrt.cl │ ├── native_exp.cl │ ├── native_exp.inc │ ├── native_log.cl │ ├── native_log.inc │ ├── native_log10.cl │ ├── native_log10.inc │ ├── nextafter.cl │ └── sqrt.cl │ └── shared │ ├── vload_half_helpers.ll │ └── vstore_half_helpers.ll ├── build ├── metabuild.py └── ninja_syntax.py ├── check_external_calls.sh ├── cmake ├── CMakeCLCCompiler.cmake.in ├── CMakeCLCInformation.cmake ├── CMakeDetermineCLCCompiler.cmake ├── CMakeDetermineLLAsmCompiler.cmake ├── CMakeLLAsmCompiler.cmake.in ├── CMakeLLAsmInformation.cmake ├── CMakeTestCLCCompiler.cmake └── CMakeTestLLAsmCompiler.cmake ├── compile-test.sh ├── configure.py ├── generic ├── include │ ├── clc │ │ ├── as_type.h │ │ ├── async │ │ │ ├── async_work_group_copy.h │ │ │ ├── async_work_group_copy.inc │ │ │ ├── async_work_group_strided_copy.h │ │ │ ├── async_work_group_strided_copy.inc │ │ │ ├── gentype.inc │ │ │ ├── prefetch.h │ │ │ ├── prefetch.inc │ │ │ └── wait_group_events.h │ │ ├── atom_decl_int32.inc │ │ ├── atom_decl_int64.inc │ │ ├── atomic │ │ │ ├── atomic_add.h │ │ │ ├── atomic_and.h │ │ │ ├── atomic_cmpxchg.h │ │ │ ├── atomic_dec.h │ │ │ ├── atomic_decl.inc │ │ │ ├── atomic_inc.h │ │ │ ├── atomic_max.h │ │ │ ├── atomic_min.h │ │ │ ├── atomic_or.h │ │ │ ├── atomic_sub.h │ │ │ ├── atomic_xchg.h │ │ │ └── atomic_xor.h │ │ ├── cl_khr_global_int32_base_atomics │ │ │ ├── atom_add.h │ │ │ ├── atom_cmpxchg.h │ │ │ ├── atom_dec.h │ │ │ ├── atom_inc.h │ │ │ ├── atom_sub.h │ │ │ └── atom_xchg.h │ │ ├── cl_khr_global_int32_extended_atomics │ │ │ ├── atom_and.h │ │ │ ├── atom_max.h │ │ │ ├── atom_min.h │ │ │ ├── atom_or.h │ │ │ └── atom_xor.h │ │ ├── cl_khr_int64_base_atomics │ │ │ ├── atom_add.h │ │ │ ├── atom_cmpxchg.h │ │ │ ├── atom_dec.h │ │ │ ├── atom_inc.h │ │ │ ├── atom_sub.h │ │ │ └── atom_xchg.h │ │ ├── cl_khr_int64_extended_atomics │ │ │ ├── atom_and.h │ │ │ ├── atom_max.h │ │ │ ├── atom_min.h │ │ │ ├── atom_or.h │ │ │ └── atom_xor.h │ │ ├── cl_khr_local_int32_base_atomics │ │ │ ├── atom_add.h │ │ │ ├── atom_cmpxchg.h │ │ │ ├── atom_dec.h │ │ │ ├── atom_inc.h │ │ │ ├── atom_sub.h │ │ │ └── atom_xchg.h │ │ ├── cl_khr_local_int32_extended_atomics │ │ │ ├── atom_and.h │ │ │ ├── atom_max.h │ │ │ ├── atom_min.h │ │ │ ├── atom_or.h │ │ │ └── atom_xor.h │ │ ├── clc.h │ │ ├── clcfunc.h │ │ ├── clcmacros.h │ │ ├── clctypes.h │ │ ├── common │ │ │ ├── degrees.h │ │ │ ├── degrees.inc │ │ │ ├── mix.h │ │ │ ├── mix.inc │ │ │ ├── radians.h │ │ │ ├── radians.inc │ │ │ ├── sign.h │ │ │ ├── smoothstep.h │ │ │ ├── smoothstep.inc │ │ │ ├── step.h │ │ │ └── step.inc │ │ ├── convert.h │ │ ├── explicit_fence │ │ │ └── explicit_memory_fence.h │ │ ├── float │ │ │ └── definitions.h │ │ ├── geometric │ │ │ ├── cross.h │ │ │ ├── distance.h │ │ │ ├── distance.inc │ │ │ ├── dot.h │ │ │ ├── dot.inc │ │ │ ├── fast_distance.h │ │ │ ├── fast_distance.inc │ │ │ ├── fast_length.h │ │ │ ├── fast_length.inc │ │ │ ├── fast_normalize.h │ │ │ ├── fast_normalize.inc │ │ │ ├── floatn.inc │ │ │ ├── length.h │ │ │ ├── length.inc │ │ │ ├── normalize.h │ │ │ └── normalize.inc │ │ ├── image │ │ │ ├── image.h │ │ │ └── image_defines.h │ │ ├── integer │ │ │ ├── abs.h │ │ │ ├── abs.inc │ │ │ ├── abs_diff.h │ │ │ ├── abs_diff.inc │ │ │ ├── add_sat.h │ │ │ ├── add_sat.inc │ │ │ ├── clz.h │ │ │ ├── clz.inc │ │ │ ├── definitions.h │ │ │ ├── gentype.inc │ │ │ ├── hadd.h │ │ │ ├── hadd.inc │ │ │ ├── integer-gentype.inc │ │ │ ├── mad24.h │ │ │ ├── mad24.inc │ │ │ ├── mad_hi.h │ │ │ ├── mad_sat.h │ │ │ ├── mad_sat.inc │ │ │ ├── mul24.h │ │ │ ├── mul24.inc │ │ │ ├── mul_hi.h │ │ │ ├── mul_hi.inc │ │ │ ├── popcount.h │ │ │ ├── rhadd.h │ │ │ ├── rhadd.inc │ │ │ ├── rotate.h │ │ │ ├── rotate.inc │ │ │ ├── sub_sat.h │ │ │ ├── sub_sat.inc │ │ │ ├── unary.inc │ │ │ └── upsample.h │ │ ├── math │ │ │ ├── acos.h │ │ │ ├── acosh.h │ │ │ ├── acospi.h │ │ │ ├── asin.h │ │ │ ├── asinh.h │ │ │ ├── asinpi.h │ │ │ ├── atan.h │ │ │ ├── atan2.h │ │ │ ├── atan2pi.h │ │ │ ├── atanh.h │ │ │ ├── atanpi.h │ │ │ ├── binary_decl.inc │ │ │ ├── binary_decl_tt.inc │ │ │ ├── cbrt.h │ │ │ ├── ceil.h │ │ │ ├── copysign.h │ │ │ ├── cos.h │ │ │ ├── cosh.h │ │ │ ├── cospi.h │ │ │ ├── erf.h │ │ │ ├── erfc.h │ │ │ ├── exp.h │ │ │ ├── exp10.h │ │ │ ├── exp2.h │ │ │ ├── expm1.h │ │ │ ├── fabs.h │ │ │ ├── fdim.h │ │ │ ├── floor.h │ │ │ ├── fma.h │ │ │ ├── fmax.h │ │ │ ├── fmin.h │ │ │ ├── fmod.h │ │ │ ├── fract.h │ │ │ ├── fract.inc │ │ │ ├── frexp.h │ │ │ ├── frexp.inc │ │ │ ├── gentype.inc │ │ │ ├── half_cos.h │ │ │ ├── half_divide.h │ │ │ ├── half_exp.h │ │ │ ├── half_exp10.h │ │ │ ├── half_exp2.h │ │ │ ├── half_log.h │ │ │ ├── half_log10.h │ │ │ ├── half_log2.h │ │ │ ├── half_powr.h │ │ │ ├── half_recip.h │ │ │ ├── half_rsqrt.h │ │ │ ├── half_sin.h │ │ │ ├── half_sqrt.h │ │ │ ├── half_tan.h │ │ │ ├── hypot.h │ │ │ ├── ilogb.h │ │ │ ├── ilogb.inc │ │ │ ├── ldexp.h │ │ │ ├── ldexp.inc │ │ │ ├── lgamma.h │ │ │ ├── lgamma_r.h │ │ │ ├── lgamma_r.inc │ │ │ ├── log.h │ │ │ ├── log10.h │ │ │ ├── log1p.h │ │ │ ├── log2.h │ │ │ ├── logb.h │ │ │ ├── mad.h │ │ │ ├── maxmag.h │ │ │ ├── minmag.h │ │ │ ├── modf.h │ │ │ ├── modf.inc │ │ │ ├── nan.h │ │ │ ├── nan.inc │ │ │ ├── native_cos.h │ │ │ ├── native_divide.h │ │ │ ├── native_exp.h │ │ │ ├── native_exp10.h │ │ │ ├── native_exp2.h │ │ │ ├── native_log.h │ │ │ ├── native_log10.h │ │ │ ├── native_log2.h │ │ │ ├── native_powr.h │ │ │ ├── native_recip.h │ │ │ ├── native_rsqrt.h │ │ │ ├── native_sin.h │ │ │ ├── native_sqrt.h │ │ │ ├── native_tan.h │ │ │ ├── nextafter.h │ │ │ ├── pow.h │ │ │ ├── pown.h │ │ │ ├── pown.inc │ │ │ ├── powr.h │ │ │ ├── remainder.h │ │ │ ├── remquo.h │ │ │ ├── remquo.inc │ │ │ ├── rint.h │ │ │ ├── rootn.h │ │ │ ├── rootn.inc │ │ │ ├── round.h │ │ │ ├── rsqrt.h │ │ │ ├── sin.h │ │ │ ├── sincos.h │ │ │ ├── sincos.inc │ │ │ ├── sinh.h │ │ │ ├── sinpi.h │ │ │ ├── sqrt.h │ │ │ ├── tan.h │ │ │ ├── tanh.h │ │ │ ├── tanpi.h │ │ │ ├── ternary_decl.inc │ │ │ ├── tgamma.h │ │ │ ├── trunc.h │ │ │ └── unary_decl.inc │ │ ├── misc │ │ │ ├── shuffle.h │ │ │ └── shuffle2.h │ │ ├── relational │ │ │ ├── all.h │ │ │ ├── any.h │ │ │ ├── binary_decl.inc │ │ │ ├── bitselect.h │ │ │ ├── bitselect.inc │ │ │ ├── floatn.inc │ │ │ ├── isequal.h │ │ │ ├── isfinite.h │ │ │ ├── isgreater.h │ │ │ ├── isgreaterequal.h │ │ │ ├── isinf.h │ │ │ ├── isless.h │ │ │ ├── islessequal.h │ │ │ ├── islessgreater.h │ │ │ ├── isnan.h │ │ │ ├── isnormal.h │ │ │ ├── isnotequal.h │ │ │ ├── isordered.h │ │ │ ├── isunordered.h │ │ │ ├── select.h │ │ │ ├── select.inc │ │ │ ├── signbit.h │ │ │ └── unary_decl.inc │ │ ├── shared │ │ │ ├── clamp.h │ │ │ ├── clamp.inc │ │ │ ├── max.h │ │ │ ├── max.inc │ │ │ ├── min.h │ │ │ ├── min.inc │ │ │ ├── vload.h │ │ │ └── vstore.h │ │ ├── synchronization │ │ │ ├── barrier.h │ │ │ └── cl_mem_fence_flags.h │ │ └── workitem │ │ │ ├── get_global_id.h │ │ │ ├── get_global_offset.h │ │ │ ├── get_global_size.h │ │ │ ├── get_group_id.h │ │ │ ├── get_local_id.h │ │ │ ├── get_local_size.h │ │ │ ├── get_num_groups.h │ │ │ └── get_work_dim.h │ ├── config.h │ ├── integer │ │ ├── popcount.h │ │ └── unary_intrin.inc │ ├── math │ │ ├── binary_intrin.inc │ │ ├── clc_exp10.h │ │ ├── clc_fma.h │ │ ├── clc_fmod.h │ │ ├── clc_hypot.h │ │ ├── clc_ldexp.h │ │ ├── clc_nextafter.h │ │ ├── clc_pow.h │ │ ├── clc_pown.h │ │ ├── clc_pown.inc │ │ ├── clc_powr.h │ │ ├── clc_remainder.h │ │ ├── clc_remquo.h │ │ ├── clc_rootn.h │ │ ├── clc_rootn.inc │ │ ├── clc_sqrt.h │ │ ├── clc_tan.h │ │ ├── clc_tanpi.h │ │ ├── ternary_intrin.inc │ │ └── unary_intrin.inc │ └── utils.h └── lib │ ├── SOURCES │ ├── async │ ├── async_work_group_copy.cl │ ├── async_work_group_copy.inc │ ├── async_work_group_strided_copy.cl │ ├── async_work_group_strided_copy.inc │ ├── prefetch.cl │ ├── prefetch.inc │ └── wait_group_events.cl │ ├── atom_int32_binary.inc │ ├── atomic │ ├── atomic_add.cl │ ├── atomic_and.cl │ ├── atomic_cmpxchg.cl │ ├── atomic_dec.cl │ ├── atomic_inc.cl │ ├── atomic_max.cl │ ├── atomic_min.cl │ ├── atomic_or.cl │ ├── atomic_sub.cl │ ├── atomic_xchg.cl │ └── atomic_xor.cl │ ├── cl_khr_global_int32_base_atomics │ ├── atom_add.cl │ ├── atom_cmpxchg.cl │ ├── atom_dec.cl │ ├── atom_inc.cl │ ├── atom_sub.cl │ └── atom_xchg.cl │ ├── cl_khr_global_int32_extended_atomics │ ├── atom_and.cl │ ├── atom_max.cl │ ├── atom_min.cl │ ├── atom_or.cl │ └── atom_xor.cl │ ├── cl_khr_int64_base_atomics │ ├── atom_add.cl │ ├── atom_cmpxchg.cl │ ├── atom_dec.cl │ ├── atom_inc.cl │ ├── atom_sub.cl │ └── atom_xchg.cl │ ├── cl_khr_int64_extended_atomics │ ├── atom_and.cl │ ├── atom_max.cl │ ├── atom_min.cl │ ├── atom_or.cl │ └── atom_xor.cl │ ├── cl_khr_local_int32_base_atomics │ ├── atom_add.cl │ ├── atom_cmpxchg.cl │ ├── atom_dec.cl │ ├── atom_inc.cl │ ├── atom_sub.cl │ └── atom_xchg.cl │ ├── cl_khr_local_int32_extended_atomics │ ├── atom_and.cl │ ├── atom_max.cl │ ├── atom_min.cl │ ├── atom_or.cl │ └── atom_xor.cl │ ├── clc_unary.inc │ ├── clcmacro.h │ ├── common │ ├── degrees.cl │ ├── mix.cl │ ├── mix.inc │ ├── radians.cl │ ├── sign.cl │ ├── smoothstep.cl │ └── step.cl │ ├── gen_convert.py │ ├── geometric │ ├── cross.cl │ ├── distance.cl │ ├── distance.inc │ ├── dot.cl │ ├── fast_distance.cl │ ├── fast_distance.inc │ ├── fast_length.cl │ ├── fast_normalize.cl │ ├── fast_normalize.inc │ ├── length.cl │ └── normalize.cl │ ├── integer │ ├── abs.cl │ ├── abs.inc │ ├── abs_diff.cl │ ├── abs_diff.inc │ ├── add_sat.cl │ ├── clz.cl │ ├── hadd.cl │ ├── hadd.inc │ ├── mad24.cl │ ├── mad24.inc │ ├── mad_sat.cl │ ├── mul24.cl │ ├── mul24.inc │ ├── mul_hi.cl │ ├── popcount.cl │ ├── rhadd.cl │ ├── rhadd.inc │ ├── rotate.cl │ ├── rotate.inc │ ├── sub_sat.cl │ └── upsample.cl │ ├── math │ ├── acos.cl │ ├── acos.inc │ ├── acosh.cl │ ├── acospi.cl │ ├── asin.cl │ ├── asin.inc │ ├── asinh.cl │ ├── asinpi.cl │ ├── atan.cl │ ├── atan2.cl │ ├── atan2pi.cl │ ├── atanh.cl │ ├── atanpi.cl │ ├── binary_impl.inc │ ├── cbrt.cl │ ├── ceil.cl │ ├── clc_exp10.cl │ ├── clc_fma.cl │ ├── clc_fmod.cl │ ├── clc_hypot.cl │ ├── clc_ldexp.cl │ ├── clc_nextafter.cl │ ├── clc_pow.cl │ ├── clc_pown.cl │ ├── clc_powr.cl │ ├── clc_remainder.cl │ ├── clc_remquo.cl │ ├── clc_rootn.cl │ ├── clc_sqrt.cl │ ├── clc_sqrt_impl.inc │ ├── clc_sw_binary.inc │ ├── clc_sw_unary.inc │ ├── clc_tan.cl │ ├── clc_tanpi.cl │ ├── copysign.cl │ ├── cos.cl │ ├── cosh.cl │ ├── cospi.cl │ ├── ep_log.cl │ ├── ep_log.h │ ├── erf.cl │ ├── erfc.cl │ ├── exp.cl │ ├── exp10.cl │ ├── exp2.cl │ ├── exp_helper.cl │ ├── exp_helper.h │ ├── expm1.cl │ ├── fabs.cl │ ├── fdim.cl │ ├── fdim.inc │ ├── floor.cl │ ├── fma.cl │ ├── fma.inc │ ├── fmax.cl │ ├── fmax.inc │ ├── fmin.cl │ ├── fmin.inc │ ├── fmod.cl │ ├── fract.cl │ ├── fract.inc │ ├── frexp.cl │ ├── frexp.inc │ ├── half_binary.inc │ ├── half_cos.cl │ ├── half_divide.cl │ ├── half_exp.cl │ ├── half_exp10.cl │ ├── half_exp2.cl │ ├── half_log.cl │ ├── half_log10.cl │ ├── half_log2.cl │ ├── half_powr.cl │ ├── half_recip.cl │ ├── half_rsqrt.cl │ ├── half_sin.cl │ ├── half_sqrt.cl │ ├── half_tan.cl │ ├── half_unary.inc │ ├── hypot.cl │ ├── ilogb.cl │ ├── ldexp.cl │ ├── ldexp.inc │ ├── lgamma.cl │ ├── lgamma_r.cl │ ├── lgamma_r.inc │ ├── log.cl │ ├── log10.cl │ ├── log1p.cl │ ├── log2.cl │ ├── log_base.h │ ├── logb.cl │ ├── mad.cl │ ├── mad.inc │ ├── math.h │ ├── maxmag.cl │ ├── maxmag.inc │ ├── minmag.cl │ ├── minmag.inc │ ├── modf.cl │ ├── modf.inc │ ├── nan.cl │ ├── nan.inc │ ├── native_cos.cl │ ├── native_divide.cl │ ├── native_divide.inc │ ├── native_exp.cl │ ├── native_exp10.cl │ ├── native_exp10.inc │ ├── native_exp2.cl │ ├── native_log.cl │ ├── native_log10.cl │ ├── native_log2.cl │ ├── native_powr.cl │ ├── native_powr.inc │ ├── native_recip.cl │ ├── native_recip.inc │ ├── native_rsqrt.cl │ ├── native_rsqrt.inc │ ├── native_sin.cl │ ├── native_sqrt.cl │ ├── native_tan.cl │ ├── native_tan.inc │ ├── native_unary_intrinsic.inc │ ├── nextafter.cl │ ├── pow.cl │ ├── pown.cl │ ├── pown.inc │ ├── powr.cl │ ├── remainder.cl │ ├── remquo.cl │ ├── remquo.inc │ ├── rint.cl │ ├── rootn.cl │ ├── rootn.inc │ ├── round.cl │ ├── sin.cl │ ├── sincos.cl │ ├── sincos.inc │ ├── sincosD_piby4.h │ ├── sincos_helpers.cl │ ├── sincos_helpers.h │ ├── sincospiF_piby4.h │ ├── sinh.cl │ ├── sinpi.cl │ ├── sqrt.cl │ ├── tables.cl │ ├── tables.h │ ├── tan.cl │ ├── tanh.cl │ ├── tanpi.cl │ ├── tgamma.cl │ ├── trunc.cl │ └── unary_builtin.inc │ ├── misc │ ├── shuffle.cl │ └── shuffle2.cl │ ├── relational │ ├── all.cl │ ├── any.cl │ ├── bitselect.cl │ ├── bitselect.inc │ ├── isequal.cl │ ├── isfinite.cl │ ├── isgreater.cl │ ├── isgreaterequal.cl │ ├── isinf.cl │ ├── isless.cl │ ├── islessequal.cl │ ├── islessgreater.cl │ ├── isnan.cl │ ├── isnormal.cl │ ├── isnotequal.cl │ ├── isordered.cl │ ├── isunordered.cl │ ├── relational.h │ ├── select.cl │ ├── select.inc │ └── signbit.cl │ ├── shared │ ├── clamp.cl │ ├── clamp.inc │ ├── max.cl │ ├── max.inc │ ├── min.cl │ ├── min.inc │ ├── vload.cl │ ├── vload_half.inc │ ├── vstore.cl │ └── vstore_half.inc │ ├── subnormal_config.cl │ ├── subnormal_disable.ll │ ├── subnormal_helper_func.ll │ ├── subnormal_use_default.ll │ └── workitem │ ├── get_global_id.cl │ └── get_global_size.cl ├── libclc.pc.in ├── ptx-nvidiacl └── lib │ ├── SOURCES │ ├── mem_fence │ └── fence.cl │ ├── synchronization │ └── barrier.cl │ └── workitem │ ├── get_global_id.cl │ ├── get_group_id.cl │ ├── get_local_id.cl │ ├── get_local_size.cl │ └── get_num_groups.cl ├── ptx └── lib │ ├── SOURCES │ ├── SOURCES_3.9 │ ├── SOURCES_4.0 │ ├── SOURCES_5.0 │ ├── math │ └── nextafter.cl │ └── shared │ ├── vload_half_helpers.ll │ └── vstore_half_helpers.ll ├── r600 └── lib │ ├── SOURCES │ ├── SOURCES_3.9 │ ├── image │ ├── get_image_attributes_impl.ll │ ├── get_image_channel_data_type.cl │ ├── get_image_channel_order.cl │ ├── get_image_depth.cl │ ├── get_image_dim.cl │ ├── get_image_height.cl │ ├── get_image_width.cl │ ├── read_image_impl.ll │ ├── read_imagef.cl │ ├── read_imagei.cl │ ├── read_imageui.cl │ ├── write_image_impl.ll │ ├── write_imagef.cl │ ├── write_imagei.cl │ └── write_imageui.cl │ ├── math │ ├── fmax.cl │ └── fmin.cl │ ├── synchronization │ └── barrier.cl │ └── workitem │ ├── get_global_offset.cl │ ├── get_global_size.cl │ ├── get_group_id.cl │ ├── get_local_id.cl │ ├── get_local_size.cl │ ├── get_num_groups.cl │ └── get_work_dim.cl ├── test ├── add_sat.cl ├── as_type.cl ├── convert.cl ├── cos.cl ├── cross.cl ├── fabs.cl ├── get_group_id.cl ├── rsqrt.cl └── subsat.cl ├── utils └── prepare-builtins.cpp └── www └── index.html /.gitignore: -------------------------------------------------------------------------------- 1 | Makefile 2 | amdgcn-- 3 | amdgcn--amdhsa 4 | amdgcn-mesa-mesa3d 5 | build/*.pyc 6 | built_libs/ 7 | generic-- 8 | generic/lib/convert.cl 9 | libclc.pc 10 | nvptx--nvidiacl 11 | nvptx64--nvidiacl 12 | r600-- 13 | utils/prepare-builtins 14 | utils/prepare-builtins.o 15 | utils/prepare-builtins.o.d 16 | -------------------------------------------------------------------------------- /CREDITS.TXT: -------------------------------------------------------------------------------- 1 | N: Peter Collingbourne 2 | E: peter@pcc.me.uk 3 | -------------------------------------------------------------------------------- /amdgcn-amdhsa/lib/SOURCES: -------------------------------------------------------------------------------- 1 | workitem/get_global_size.cl 2 | workitem/get_local_size.cl 3 | workitem/get_num_groups.cl 4 | -------------------------------------------------------------------------------- /amdgcn-amdhsa/lib/workitem/get_global_size.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #if __clang_major__ >= 8 4 | #define CONST_AS __constant 5 | #elif __clang_major__ >= 7 6 | #define CONST_AS __attribute__((address_space(4))) 7 | #else 8 | #define CONST_AS __attribute__((address_space(2))) 9 | #endif 10 | 11 | #if __clang_major__ >= 6 12 | #define __dispatch_ptr __builtin_amdgcn_dispatch_ptr 13 | #else 14 | #define __dispatch_ptr __clc_amdgcn_dispatch_ptr 15 | CONST_AS uchar * __clc_amdgcn_dispatch_ptr(void) __asm("llvm.amdgcn.dispatch.ptr"); 16 | #endif 17 | 18 | _CLC_DEF size_t get_global_size(uint dim) 19 | { 20 | CONST_AS uint * ptr = (CONST_AS uint *) __dispatch_ptr(); 21 | if (dim < 3) 22 | return ptr[3 + dim]; 23 | return 1; 24 | } 25 | -------------------------------------------------------------------------------- /amdgcn-amdhsa/lib/workitem/get_local_size.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #if __clang_major__ >= 8 4 | #define CONST_AS __constant 5 | #elif __clang_major__ >= 7 6 | #define CONST_AS __attribute__((address_space(4))) 7 | #else 8 | #define CONST_AS __attribute__((address_space(2))) 9 | #endif 10 | 11 | #if __clang_major__ >= 6 12 | #define __dispatch_ptr __builtin_amdgcn_dispatch_ptr 13 | #else 14 | #define __dispatch_ptr __clc_amdgcn_dispatch_ptr 15 | CONST_AS char * __clc_amdgcn_dispatch_ptr(void) __asm("llvm.amdgcn.dispatch.ptr"); 16 | #endif 17 | 18 | _CLC_DEF size_t get_local_size(uint dim) 19 | { 20 | CONST_AS uint * ptr = (CONST_AS uint *) __dispatch_ptr(); 21 | switch (dim) { 22 | case 0: 23 | return ptr[1] & 0xffffu; 24 | case 1: 25 | return ptr[1] >> 16; 26 | case 2: 27 | return ptr[2] & 0xffffu; 28 | } 29 | return 1; 30 | } 31 | -------------------------------------------------------------------------------- /amdgcn-amdhsa/lib/workitem/get_num_groups.cl: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | _CLC_DEF size_t get_num_groups(uint dim) { 5 | size_t global_size = get_global_size(dim); 6 | size_t local_size = get_local_size(dim); 7 | size_t num_groups = global_size / local_size; 8 | if (global_size % local_size != 0) { 9 | num_groups++; 10 | } 11 | return num_groups; 12 | } 13 | -------------------------------------------------------------------------------- /amdgcn-mesa3d: -------------------------------------------------------------------------------- 1 | amdgcn-amdhsa -------------------------------------------------------------------------------- /amdgcn/lib/SOURCES: -------------------------------------------------------------------------------- 1 | cl_khr_int64_extended_atomics/minmax_helpers.ll 2 | integer/popcount.cl 3 | math/fmax.cl 4 | math/fmin.cl 5 | math/ldexp.cl 6 | mem_fence/fence.cl 7 | synchronization/barrier.cl 8 | workitem/get_global_offset.cl 9 | workitem/get_group_id.cl 10 | workitem/get_global_size.cl 11 | workitem/get_local_id.cl 12 | workitem/get_local_size.cl 13 | workitem/get_num_groups.cl 14 | workitem/get_work_dim.cl 15 | -------------------------------------------------------------------------------- /amdgcn/lib/integer/popcount.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define __CLC_BODY "popcount.inc" 6 | #include 7 | -------------------------------------------------------------------------------- /amdgcn/lib/integer/popcount.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE popcount(__CLC_GENTYPE x) { 2 | /* LLVM-4+ implements i16 ops for VI+ ASICs. However, ctpop implementation 3 | * is missing until r326535. Therefore we have to convert sub i32 types to uint 4 | * as a workaround. */ 5 | #if __clang_major__ < 7 && __clang_major__ > 3 && __CLC_GENSIZE < 32 6 | /* Prevent sign extension on uint conversion */ 7 | const __CLC_U_GENTYPE y = __CLC_XCONCAT(as_, __CLC_U_GENTYPE)(x); 8 | /* Convert to uintX */ 9 | const __CLC_XCONCAT(uint, __CLC_VECSIZE) z = __CLC_XCONCAT(convert_uint, __CLC_VECSIZE)(y); 10 | /* Call popcount on uintX type */ 11 | const __CLC_XCONCAT(uint, __CLC_VECSIZE) res = __clc_native_popcount(z); 12 | /* Convert the result back to gentype. */ 13 | return __CLC_XCONCAT(convert_, __CLC_GENTYPE)(res); 14 | #else 15 | return __clc_native_popcount(x); 16 | #endif 17 | } 18 | -------------------------------------------------------------------------------- /amdgcn/lib/synchronization/barrier.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DEF void barrier(cl_mem_fence_flags flags) 4 | { 5 | mem_fence(flags); 6 | __builtin_amdgcn_s_barrier(); 7 | } 8 | -------------------------------------------------------------------------------- /amdgcn/lib/workitem/get_global_offset.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #if __clang_major__ >= 8 4 | #define CONST_AS __constant 5 | #elif __clang_major__ >= 7 6 | #define CONST_AS __attribute__((address_space(4))) 7 | #else 8 | #define CONST_AS __attribute__((address_space(2))) 9 | #endif 10 | 11 | _CLC_DEF size_t get_global_offset(uint dim) 12 | { 13 | CONST_AS uint * ptr = 14 | (CONST_AS uint *) __builtin_amdgcn_implicitarg_ptr(); 15 | if (dim < 3) 16 | return ptr[dim + 1]; 17 | return 0; 18 | } 19 | -------------------------------------------------------------------------------- /amdgcn/lib/workitem/get_global_size.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | uint __clc_amdgcn_get_global_size_x(void) __asm("llvm.r600.read.global.size.x"); 4 | uint __clc_amdgcn_get_global_size_y(void) __asm("llvm.r600.read.global.size.y"); 5 | uint __clc_amdgcn_get_global_size_z(void) __asm("llvm.r600.read.global.size.z"); 6 | 7 | _CLC_DEF size_t get_global_size(uint dim) 8 | { 9 | switch (dim) { 10 | case 0: return __clc_amdgcn_get_global_size_x(); 11 | case 1: return __clc_amdgcn_get_global_size_y(); 12 | case 2: return __clc_amdgcn_get_global_size_z(); 13 | default: return 1; 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /amdgcn/lib/workitem/get_group_id.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DEF size_t get_group_id(uint dim) 4 | { 5 | switch(dim) { 6 | case 0: return __builtin_amdgcn_workgroup_id_x(); 7 | case 1: return __builtin_amdgcn_workgroup_id_y(); 8 | case 2: return __builtin_amdgcn_workgroup_id_z(); 9 | default: return 1; 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /amdgcn/lib/workitem/get_local_id.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DEF size_t get_local_id(uint dim) 4 | { 5 | switch(dim) { 6 | case 0: return __builtin_amdgcn_workitem_id_x(); 7 | case 1: return __builtin_amdgcn_workitem_id_y(); 8 | case 2: return __builtin_amdgcn_workitem_id_z(); 9 | default: return 1; 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /amdgcn/lib/workitem/get_local_size.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | uint __clc_amdgcn_get_local_size_x(void) __asm("llvm.r600.read.local.size.x"); 4 | uint __clc_amdgcn_get_local_size_y(void) __asm("llvm.r600.read.local.size.y"); 5 | uint __clc_amdgcn_get_local_size_z(void) __asm("llvm.r600.read.local.size.z"); 6 | 7 | _CLC_DEF size_t get_local_size(uint dim) 8 | { 9 | switch (dim) { 10 | case 0: return __clc_amdgcn_get_local_size_x(); 11 | case 1: return __clc_amdgcn_get_local_size_y(); 12 | case 2: return __clc_amdgcn_get_local_size_z(); 13 | default: return 1; 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /amdgcn/lib/workitem/get_num_groups.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | uint __clc_amdgcn_get_num_groups_x(void) __asm("llvm.r600.read.ngroups.x"); 4 | uint __clc_amdgcn_get_num_groups_y(void) __asm("llvm.r600.read.ngroups.y"); 5 | uint __clc_amdgcn_get_num_groups_z(void) __asm("llvm.r600.read.ngroups.z"); 6 | 7 | _CLC_DEF size_t get_num_groups(uint dim) 8 | { 9 | switch (dim) { 10 | case 0: return __clc_amdgcn_get_num_groups_x(); 11 | case 1: return __clc_amdgcn_get_num_groups_y(); 12 | case 2: return __clc_amdgcn_get_num_groups_z(); 13 | default: return 1; 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /amdgcn/lib/workitem/get_work_dim.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #if __clang_major__ >= 8 4 | #define CONST_AS __constant 5 | #elif __clang_major__ >= 7 6 | #define CONST_AS __attribute__((address_space(4))) 7 | #else 8 | #define CONST_AS __attribute__((address_space(2))) 9 | #endif 10 | 11 | _CLC_DEF uint get_work_dim(void) 12 | { 13 | CONST_AS uint * ptr = 14 | (CONST_AS uint *) __builtin_amdgcn_implicitarg_ptr(); 15 | return ptr[0]; 16 | } 17 | -------------------------------------------------------------------------------- /amdgpu/lib/SOURCES: -------------------------------------------------------------------------------- 1 | math/native_exp.cl 2 | math/native_log.cl 3 | math/native_log10.cl 4 | math/half_exp.cl 5 | math/half_exp10.cl 6 | math/half_exp2.cl 7 | math/half_log.cl 8 | math/half_log10.cl 9 | math/half_log2.cl 10 | math/half_recip.cl 11 | math/half_rsqrt.cl 12 | math/half_sqrt.cl 13 | math/nextafter.cl 14 | math/sqrt.cl 15 | -------------------------------------------------------------------------------- /amdgpu/lib/SOURCES_3.9: -------------------------------------------------------------------------------- 1 | shared/vload_half_helpers.ll 2 | shared/vstore_half_helpers.ll 3 | -------------------------------------------------------------------------------- /amdgpu/lib/SOURCES_4.0: -------------------------------------------------------------------------------- 1 | shared/vload_half_helpers.ll 2 | shared/vstore_half_helpers.ll 3 | -------------------------------------------------------------------------------- /amdgpu/lib/SOURCES_5.0: -------------------------------------------------------------------------------- 1 | shared/vload_half_helpers.ll 2 | shared/vstore_half_helpers.ll 3 | -------------------------------------------------------------------------------- /amdgpu/lib/math/half_exp.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_FUNC exp 4 | #define __FLOAT_ONLY 5 | #define __CLC_BODY 6 | #include 7 | -------------------------------------------------------------------------------- /amdgpu/lib/math/half_exp10.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_FUNC exp10 4 | #define __FLOAT_ONLY 5 | #define __CLC_BODY 6 | #include 7 | -------------------------------------------------------------------------------- /amdgpu/lib/math/half_exp2.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_FUNC exp2 4 | #define __FLOAT_ONLY 5 | #define __CLC_BODY 6 | #include 7 | -------------------------------------------------------------------------------- /amdgpu/lib/math/half_log.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_FUNC log 4 | #define __FLOAT_ONLY 5 | #define __CLC_BODY 6 | #include 7 | -------------------------------------------------------------------------------- /amdgpu/lib/math/half_log10.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_FUNC log10 4 | #define __FLOAT_ONLY 5 | #define __CLC_BODY 6 | #include 7 | -------------------------------------------------------------------------------- /amdgpu/lib/math/half_log2.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_FUNC log2 4 | #define __FLOAT_ONLY 5 | #define __CLC_BODY 6 | #include 7 | -------------------------------------------------------------------------------- /amdgpu/lib/math/half_native_unary.inc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_HALF_FUNC(x) __CLC_CONCAT(half_, x) 4 | #define __CLC_NATIVE_FUNC(x) __CLC_CONCAT(native_, x) 5 | 6 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_HALF_FUNC(__CLC_FUNC)(__CLC_GENTYPE val) { 7 | return __CLC_NATIVE_FUNC(__CLC_FUNC)(val); 8 | } 9 | 10 | #undef __CLC_NATIVE_FUNC 11 | #undef __CLC_HALF_FUNC 12 | -------------------------------------------------------------------------------- /amdgpu/lib/math/half_recip.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_FUNC recip 4 | #define __FLOAT_ONLY 5 | #define __CLC_BODY 6 | #include 7 | -------------------------------------------------------------------------------- /amdgpu/lib/math/half_rsqrt.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_FUNC rsqrt 4 | #define __FLOAT_ONLY 5 | #define __CLC_BODY 6 | #include 7 | -------------------------------------------------------------------------------- /amdgpu/lib/math/half_sqrt.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_FUNC sqrt 4 | #define __FLOAT_ONLY 5 | #define __CLC_BODY 6 | #include 7 | -------------------------------------------------------------------------------- /amdgpu/lib/math/native_exp.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #define __FLOAT_ONLY 5 | #include 6 | -------------------------------------------------------------------------------- /amdgpu/lib/math/native_exp.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE native_exp(__CLC_GENTYPE val) { 2 | return native_exp2(val * M_LOG2E_F); 3 | } 4 | -------------------------------------------------------------------------------- /amdgpu/lib/math/native_log.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #define __FLOAT_ONLY 5 | #include 6 | -------------------------------------------------------------------------------- /amdgpu/lib/math/native_log.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE native_log(__CLC_GENTYPE val) { 2 | return native_log2(val) * (1.0f / M_LOG2E_F); 3 | } 4 | -------------------------------------------------------------------------------- /amdgpu/lib/math/native_log10.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #define __FLOAT_ONLY 5 | #include 6 | -------------------------------------------------------------------------------- /amdgpu/lib/math/native_log10.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE native_log10(__CLC_GENTYPE val) { 2 | return native_log2(val) * (M_LN2_F / M_LN10_F); 3 | } 4 | -------------------------------------------------------------------------------- /amdgpu/lib/math/nextafter.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../lib/clcmacro.h" 3 | #include 4 | 5 | _CLC_DEFINE_BINARY_BUILTIN(float, nextafter, __clc_nextafter, float, float) 6 | 7 | #ifdef cl_khr_fp64 8 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 9 | _CLC_DEFINE_BINARY_BUILTIN(double, nextafter, __clc_nextafter, double, double) 10 | #endif 11 | 12 | #ifdef cl_khr_fp16 13 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable 14 | _CLC_DEFINE_BINARY_BUILTIN(half, nextafter, __clc_nextafter, half, half) 15 | #endif 16 | -------------------------------------------------------------------------------- /amdgpu/lib/shared/vload_half_helpers.ll: -------------------------------------------------------------------------------- 1 | define float @__clc_vload_half_float_helper__private(half addrspace(0)* nocapture %ptr) nounwind alwaysinline { 2 | %data = load half, half addrspace(0)* %ptr 3 | %res = fpext half %data to float 4 | ret float %res 5 | } 6 | 7 | define float @__clc_vload_half_float_helper__global(half addrspace(1)* nocapture %ptr) nounwind alwaysinline { 8 | %data = load half, half addrspace(1)* %ptr 9 | %res = fpext half %data to float 10 | ret float %res 11 | } 12 | 13 | define float @__clc_vload_half_float_helper__local(half addrspace(3)* nocapture %ptr) nounwind alwaysinline { 14 | %data = load half, half addrspace(3)* %ptr 15 | %res = fpext half %data to float 16 | ret float %res 17 | } 18 | 19 | define float @__clc_vload_half_float_helper__constant(half addrspace(2)* nocapture %ptr) nounwind alwaysinline { 20 | %data = load half, half addrspace(2)* %ptr 21 | %res = fpext half %data to float 22 | ret float %res 23 | } 24 | -------------------------------------------------------------------------------- /check_external_calls.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | FILE=$1 4 | if [ ! -f $FILE ]; then 5 | echo "ERROR: Not a file: $FILE" 6 | exit 3 7 | fi 8 | ret=0 9 | if [ "x$LLVM_CONFIG" = "x" ]; then 10 | LLVM_CONFIG=llvm-config 11 | echo 'WARNING: $LLVM_CONFIG not set, falling back to $PATH llvm-config' 12 | ret=2 13 | fi 14 | 15 | 16 | BIN_DIR=$($LLVM_CONFIG --bindir) 17 | DIS="$BIN_DIR/llvm-dis" 18 | if [ ! -x $DIS ]; then 19 | echo "ERROR: Disassembler '$DIS' is not executable" 20 | exit 3 21 | fi 22 | 23 | TMP_FILE=$(mktemp) 24 | 25 | # Check for calls. Calls to llvm intrinsics are OK 26 | $DIS < $FILE | grep ' call ' | grep -v '@llvm' > "$TMP_FILE" 27 | COUNT=$(wc -l < "$TMP_FILE") 28 | 29 | if [ "$COUNT" -ne "0" ]; then 30 | echo "ERROR: $COUNT unresolved calls detected in $FILE" 31 | cat $TMP_FILE 32 | ret=1 33 | else 34 | echo "File $FILE is OK" 35 | fi 36 | exit $ret 37 | -------------------------------------------------------------------------------- /cmake/CMakeCLCCompiler.cmake.in: -------------------------------------------------------------------------------- 1 | set(CMAKE_CLC_COMPILER "@CMAKE_CLC_COMPILER@") 2 | set(CMAKE_CLC_COMPILER_LOADED 1) 3 | 4 | set(CMAKE_CLC_SOURCE_FILE_EXTENSIONS cl) 5 | set(CMAKE_CLC_OUTPUT_EXTENSION .bc) 6 | set(CMAKE_CLC_OUTPUT_EXTENSION_REPLACE 1) 7 | set(CMAKE_STATIC_LIBRARY_PREFIX_CLC "") 8 | set(CMAKE_STATIC_LIBRARY_SUFFIX_CLC ".bc") 9 | set(CMAKE_CLC_COMPILER_ENV_VAR "CLC_COMPILER") 10 | -------------------------------------------------------------------------------- /cmake/CMakeCLCInformation.cmake: -------------------------------------------------------------------------------- 1 | if(NOT CMAKE_CLC_COMPILE_OBJECT) 2 | set(CMAKE_CLC_COMPILE_OBJECT 3 | " -o -c -emit-llvm") 4 | endif() 5 | 6 | if(NOT CMAKE_CLC_CREATE_STATIC_LIBRARY) 7 | set(CMAKE_CLC_CREATE_STATIC_LIBRARY 8 | " -o ") 9 | endif() 10 | 11 | set(CMAKE_INCLUDE_FLAG_CLC "-I") 12 | -------------------------------------------------------------------------------- /cmake/CMakeDetermineCLCCompiler.cmake: -------------------------------------------------------------------------------- 1 | include(${CMAKE_ROOT}/Modules/CMakeDetermineCompiler.cmake) 2 | 3 | if(NOT CMAKE_CLC_COMPILER) 4 | find_program(CMAKE_CLC_COMPILER NAMES clang) 5 | endif() 6 | mark_as_advanced(CMAKE_CLC_COMPILER) 7 | 8 | if(NOT CMAKE_CLC_ARCHIVE) 9 | find_program(CMAKE_CLC_ARCHIVE NAMES llvm-link) 10 | endif() 11 | mark_as_advanced(CMAKE_CLC_ARCHIVE) 12 | 13 | set(CMAKE_CLC_COMPILER_ENV_VAR "CLC_COMPILER") 14 | set(CMAKE_CLC_ARCHIVE_ENV_VAR "CLC_LINKER") 15 | find_file(clc_comp_in CMakeCLCCompiler.cmake.in PATHS ${CMAKE_ROOT}/Modules ${CMAKE_MODULE_PATH}) 16 | # configure all variables set in this file 17 | configure_file(${clc_comp_in} ${CMAKE_PLATFORM_INFO_DIR}/CMakeCLCCompiler.cmake @ONLY) 18 | mark_as_advanced(clc_comp_in) 19 | -------------------------------------------------------------------------------- /cmake/CMakeDetermineLLAsmCompiler.cmake: -------------------------------------------------------------------------------- 1 | include(${CMAKE_ROOT}/Modules/CMakeDetermineCompiler.cmake) 2 | 3 | if(NOT CMAKE_LLAsm_PREPROCESSOR) 4 | find_program(CMAKE_LLAsm_PREPROCESSOR NAMES clang) 5 | endif() 6 | mark_as_advanced(CMAKE_LLAsm_PREPROCESSOR) 7 | 8 | if(NOT CMAKE_LLAsm_COMPILER) 9 | find_program(CMAKE_LLAsm_COMPILER NAMES llvm-as) 10 | endif() 11 | mark_as_advanced(CMAKE_LLAsm_ASSEMBLER) 12 | 13 | if(NOT CMAKE_LLAsm_ARCHIVE) 14 | find_program(CMAKE_LLAsm_ARCHIVE NAMES llvm-link) 15 | endif() 16 | mark_as_advanced(CMAKE_LLAsm_ARCHIVE) 17 | 18 | set(CMAKE_LLAsm_PREPROCESSOR_ENV_VAR "LL_PREPROCESSOR") 19 | set(CMAKE_LLAsm_COMPILER_ENV_VAR "LL_ASSEMBLER") 20 | set(CMAKE_LLAsm_ARCHIVE_ENV_VAR "LL_LINKER") 21 | find_file(ll_comp_in CMakeLLAsmCompiler.cmake.in PATHS ${CMAKE_ROOT}/Modules ${CMAKE_MODULE_PATH}) 22 | # configure all variables set in this file 23 | configure_file(${ll_comp_in} ${CMAKE_PLATFORM_INFO_DIR}/CMakeLLAsmCompiler.cmake @ONLY) 24 | mark_as_advanced(ll_comp_in) 25 | -------------------------------------------------------------------------------- /cmake/CMakeLLAsmCompiler.cmake.in: -------------------------------------------------------------------------------- 1 | set(CMAKE_LLAsm_PREPROCESSOR "@CMAKE_LLAsm_PREPROCESSOR@") 2 | set(CMAKE_LLAsm_COMPILER "@CMAKE_LLAsm_COMPILER@") 3 | set(CMAKE_LLAsm_ARCHIVE "@CMAKE_LLAsm_ARCHIVE@") 4 | set(CMAKE_LLAsm_COMPILER_LOADED 1) 5 | 6 | set(CMAKE_LLAsm_SOURCE_FILE_EXTENSIONS ll) 7 | set(CMAKE_LLAsm_OUTPUT_EXTENSION .bc) 8 | set(CMAKE_LLAsm_OUTPUT_EXTENSION_REPLACE 1) 9 | set(CMAKE_STATIC_LIBRARY_PREFIX_LLAsm "") 10 | set(CMAKE_STATIC_LIBRARY_SUFFIX_LLAsm ".bc") 11 | -------------------------------------------------------------------------------- /cmake/CMakeLLAsmInformation.cmake: -------------------------------------------------------------------------------- 1 | if(NOT CMAKE_LLAsm_COMPILE_OBJECT) 2 | set(CMAKE_LLAsm_COMPILE_OBJECT 3 | "${CMAKE_LLAsm_PREPROCESSOR} -E -P -x cl -o - | -o ") 4 | endif() 5 | 6 | if(NOT CMAKE_LLAsm_CREATE_STATIC_LIBRARY) 7 | set(CMAKE_LLAsm_CREATE_STATIC_LIBRARY 8 | " -o ") 9 | endif() 10 | 11 | set(CMAKE_INCLUDE_FLAG_LLAsm "-I") 12 | -------------------------------------------------------------------------------- /compile-test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | clang -target nvptx--nvidiacl -Iptx-nvidiacl/include -Igeneric/include -Xclang -mlink-bitcode-file -Xclang nvptx--nvidiacl/lib/builtins.bc -include clc/clc.h -Dcl_clang_storage_class_specifiers -Dcl_khr_fp64 "$@" 4 | -------------------------------------------------------------------------------- /generic/include/clc/async/async_work_group_copy.h: -------------------------------------------------------------------------------- 1 | #define __CLC_DST_ADDR_SPACE local 2 | #define __CLC_SRC_ADDR_SPACE global 3 | #define __CLC_BODY 4 | #include 5 | #undef __CLC_DST_ADDR_SPACE 6 | #undef __CLC_SRC_ADDR_SPACE 7 | #undef __CLC_BODY 8 | 9 | #define __CLC_DST_ADDR_SPACE global 10 | #define __CLC_SRC_ADDR_SPACE local 11 | #define __CLC_BODY 12 | #include 13 | #undef __CLC_DST_ADDR_SPACE 14 | #undef __CLC_SRC_ADDR_SPACE 15 | #undef __CLC_BODY 16 | -------------------------------------------------------------------------------- /generic/include/clc/async/async_work_group_copy.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL event_t async_work_group_copy( 2 | __CLC_DST_ADDR_SPACE __CLC_GENTYPE *dst, 3 | const __CLC_SRC_ADDR_SPACE __CLC_GENTYPE *src, 4 | size_t num_gentypes, 5 | event_t event); 6 | -------------------------------------------------------------------------------- /generic/include/clc/async/async_work_group_strided_copy.h: -------------------------------------------------------------------------------- 1 | #define __CLC_DST_ADDR_SPACE local 2 | #define __CLC_SRC_ADDR_SPACE global 3 | #define __CLC_BODY 4 | #include 5 | #undef __CLC_DST_ADDR_SPACE 6 | #undef __CLC_SRC_ADDR_SPACE 7 | #undef __CLC_BODY 8 | 9 | #define __CLC_DST_ADDR_SPACE global 10 | #define __CLC_SRC_ADDR_SPACE local 11 | #define __CLC_BODY 12 | #include 13 | #undef __CLC_DST_ADDR_SPACE 14 | #undef __CLC_SRC_ADDR_SPACE 15 | #undef __CLC_BODY 16 | -------------------------------------------------------------------------------- /generic/include/clc/async/async_work_group_strided_copy.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL event_t async_work_group_strided_copy( 2 | __CLC_DST_ADDR_SPACE __CLC_GENTYPE *dst, 3 | const __CLC_SRC_ADDR_SPACE __CLC_GENTYPE *src, 4 | size_t num_gentypes, 5 | size_t stride, 6 | event_t event); 7 | -------------------------------------------------------------------------------- /generic/include/clc/async/prefetch.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | #undef __CLC_BODY 4 | -------------------------------------------------------------------------------- /generic/include/clc/async/prefetch.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL void prefetch(const global __CLC_GENTYPE *p, size_t num_gentypes); 2 | -------------------------------------------------------------------------------- /generic/include/clc/async/wait_group_events.h: -------------------------------------------------------------------------------- 1 | void wait_group_events(int num_events, event_t *event_list); 2 | -------------------------------------------------------------------------------- /generic/include/clc/atom_decl_int32.inc: -------------------------------------------------------------------------------- 1 | #define __CLC_DECLARE_ATOM(ADDRSPACE, TYPE) \ 2 | _CLC_OVERLOAD _CLC_DECL TYPE __CLC_FUNCTION (volatile ADDRSPACE TYPE *, TYPE); 3 | 4 | __CLC_DECLARE_ATOM(__CLC_ADDRESS_SPACE, int) 5 | __CLC_DECLARE_ATOM(__CLC_ADDRESS_SPACE, uint) 6 | 7 | #undef __CLC_DECLARE_ATOM 8 | 9 | #undef __CLC_FUNCTION 10 | #undef __CLC_ADDRESS_SPACE 11 | -------------------------------------------------------------------------------- /generic/include/clc/atom_decl_int64.inc: -------------------------------------------------------------------------------- 1 | #define __CLC_DECLARE_ATOM(ADDRSPACE, TYPE) \ 2 | _CLC_OVERLOAD _CLC_DECL TYPE __CLC_FUNCTION (volatile ADDRSPACE TYPE *, TYPE); 3 | 4 | __CLC_DECLARE_ATOM(local, long) 5 | __CLC_DECLARE_ATOM(local, ulong) 6 | __CLC_DECLARE_ATOM(global, long) 7 | __CLC_DECLARE_ATOM(global, ulong) 8 | 9 | #undef __CLC_DECLARE_ATOM 10 | 11 | #undef __CLC_FUNCTION 12 | -------------------------------------------------------------------------------- /generic/include/clc/atomic/atomic_add.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atomic_add 2 | #include 3 | -------------------------------------------------------------------------------- /generic/include/clc/atomic/atomic_and.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atomic_and 2 | #include 3 | -------------------------------------------------------------------------------- /generic/include/clc/atomic/atomic_cmpxchg.h: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL int atomic_cmpxchg (volatile local int *, int, int); 2 | _CLC_OVERLOAD _CLC_DECL int atomic_cmpxchg (volatile global int *, int, int); 3 | _CLC_OVERLOAD _CLC_DECL uint atomic_cmpxchg (volatile local uint *, uint, uint); 4 | _CLC_OVERLOAD _CLC_DECL uint atomic_cmpxchg (volatile global uint *, uint, uint); 5 | -------------------------------------------------------------------------------- /generic/include/clc/atomic/atomic_dec.h: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL int atomic_dec (volatile local int *); 2 | _CLC_OVERLOAD _CLC_DECL int atomic_dec (volatile global int *); 3 | _CLC_OVERLOAD _CLC_DECL uint atomic_dec (volatile local uint *); 4 | _CLC_OVERLOAD _CLC_DECL uint atomic_dec (volatile global uint *); 5 | -------------------------------------------------------------------------------- /generic/include/clc/atomic/atomic_decl.inc: -------------------------------------------------------------------------------- 1 | #define __CLC_DECLARE_ATOMIC(ADDRSPACE, TYPE) \ 2 | _CLC_OVERLOAD _CLC_DECL TYPE __CLC_FUNCTION (volatile ADDRSPACE TYPE *, TYPE); 3 | 4 | #define __CLC_DECLARE_ATOMIC_ADDRSPACE(TYPE) \ 5 | __CLC_DECLARE_ATOMIC(global, TYPE) \ 6 | __CLC_DECLARE_ATOMIC(local, TYPE) 7 | 8 | __CLC_DECLARE_ATOMIC_ADDRSPACE(int) 9 | __CLC_DECLARE_ATOMIC_ADDRSPACE(uint) 10 | 11 | #undef __CLC_DECLARE_ATOMIC_ADDRSPACE 12 | #undef __CLC_DECLARE_ATOMIC 13 | 14 | #undef __CLC_FUNCTION 15 | -------------------------------------------------------------------------------- /generic/include/clc/atomic/atomic_inc.h: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL int atomic_inc (volatile local int *); 2 | _CLC_OVERLOAD _CLC_DECL int atomic_inc (volatile global int *); 3 | _CLC_OVERLOAD _CLC_DECL uint atomic_inc (volatile local uint *); 4 | _CLC_OVERLOAD _CLC_DECL uint atomic_inc (volatile global uint *); 5 | -------------------------------------------------------------------------------- /generic/include/clc/atomic/atomic_max.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atomic_max 2 | #include 3 | -------------------------------------------------------------------------------- /generic/include/clc/atomic/atomic_min.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atomic_min 2 | #include 3 | -------------------------------------------------------------------------------- /generic/include/clc/atomic/atomic_or.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atomic_or 2 | #include 3 | -------------------------------------------------------------------------------- /generic/include/clc/atomic/atomic_sub.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atomic_sub 2 | #include 3 | -------------------------------------------------------------------------------- /generic/include/clc/atomic/atomic_xchg.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atomic_xchg 2 | 3 | _CLC_OVERLOAD _CLC_DECL float __CLC_FUNCTION (volatile local float *, float); 4 | _CLC_OVERLOAD _CLC_DECL float __CLC_FUNCTION (volatile global float *, float); 5 | #include 6 | -------------------------------------------------------------------------------- /generic/include/clc/atomic/atomic_xor.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atomic_xor 2 | #include 3 | -------------------------------------------------------------------------------- /generic/include/clc/cl_khr_global_int32_base_atomics/atom_add.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atom_add 2 | #define __CLC_ADDRESS_SPACE global 3 | #include 4 | -------------------------------------------------------------------------------- /generic/include/clc/cl_khr_global_int32_base_atomics/atom_cmpxchg.h: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL int atom_cmpxchg(volatile global int *p, int cmp, int val); 2 | _CLC_OVERLOAD _CLC_DECL unsigned int atom_cmpxchg(volatile global unsigned int *p, unsigned int cmp, unsigned int val); 3 | -------------------------------------------------------------------------------- /generic/include/clc/cl_khr_global_int32_base_atomics/atom_dec.h: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL int atom_dec(volatile global int *p); 2 | _CLC_OVERLOAD _CLC_DECL unsigned int atom_dec(volatile global unsigned int *p); 3 | -------------------------------------------------------------------------------- /generic/include/clc/cl_khr_global_int32_base_atomics/atom_inc.h: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL int atom_inc(volatile global int *p); 2 | _CLC_OVERLOAD _CLC_DECL unsigned int atom_inc(volatile global unsigned int *p); 3 | -------------------------------------------------------------------------------- /generic/include/clc/cl_khr_global_int32_base_atomics/atom_sub.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atom_sub 2 | #define __CLC_ADDRESS_SPACE global 3 | #include 4 | -------------------------------------------------------------------------------- /generic/include/clc/cl_khr_global_int32_base_atomics/atom_xchg.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atom_xchg 2 | #define __CLC_ADDRESS_SPACE global 3 | #include 4 | -------------------------------------------------------------------------------- /generic/include/clc/cl_khr_global_int32_extended_atomics/atom_and.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atom_and 2 | #define __CLC_ADDRESS_SPACE global 3 | #include 4 | -------------------------------------------------------------------------------- /generic/include/clc/cl_khr_global_int32_extended_atomics/atom_max.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atom_max 2 | #define __CLC_ADDRESS_SPACE global 3 | #include 4 | -------------------------------------------------------------------------------- /generic/include/clc/cl_khr_global_int32_extended_atomics/atom_min.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atom_min 2 | #define __CLC_ADDRESS_SPACE global 3 | #include 4 | -------------------------------------------------------------------------------- /generic/include/clc/cl_khr_global_int32_extended_atomics/atom_or.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atom_or 2 | #define __CLC_ADDRESS_SPACE global 3 | #include 4 | -------------------------------------------------------------------------------- /generic/include/clc/cl_khr_global_int32_extended_atomics/atom_xor.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atom_xor 2 | #define __CLC_ADDRESS_SPACE global 3 | #include 4 | -------------------------------------------------------------------------------- /generic/include/clc/cl_khr_int64_base_atomics/atom_add.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atom_add 2 | #include 3 | -------------------------------------------------------------------------------- /generic/include/clc/cl_khr_int64_base_atomics/atom_cmpxchg.h: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL long atom_cmpxchg(volatile global long *p, long cmp, long val); 2 | _CLC_OVERLOAD _CLC_DECL unsigned long atom_cmpxchg(volatile global unsigned long *p, unsigned long cmp, unsigned long val); 3 | _CLC_OVERLOAD _CLC_DECL long atom_cmpxchg(volatile local long *p, long cmp, long val); 4 | _CLC_OVERLOAD _CLC_DECL unsigned long atom_cmpxchg(volatile local unsigned long *p, unsigned long cmp, unsigned long val); 5 | -------------------------------------------------------------------------------- /generic/include/clc/cl_khr_int64_base_atomics/atom_dec.h: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL long atom_dec(volatile global long *p); 2 | _CLC_OVERLOAD _CLC_DECL unsigned long atom_dec(volatile global unsigned long *p); 3 | _CLC_OVERLOAD _CLC_DECL long atom_dec(volatile local long *p); 4 | _CLC_OVERLOAD _CLC_DECL unsigned long atom_dec(volatile local unsigned long *p); 5 | -------------------------------------------------------------------------------- /generic/include/clc/cl_khr_int64_base_atomics/atom_inc.h: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL long atom_inc(volatile global long *p); 2 | _CLC_OVERLOAD _CLC_DECL unsigned long atom_inc(volatile global unsigned long *p); 3 | _CLC_OVERLOAD _CLC_DECL long atom_inc(volatile local long *p); 4 | _CLC_OVERLOAD _CLC_DECL unsigned long atom_inc(volatile local unsigned long *p); 5 | -------------------------------------------------------------------------------- /generic/include/clc/cl_khr_int64_base_atomics/atom_sub.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atom_sub 2 | #include 3 | -------------------------------------------------------------------------------- /generic/include/clc/cl_khr_int64_base_atomics/atom_xchg.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atom_xchg 2 | #include 3 | -------------------------------------------------------------------------------- /generic/include/clc/cl_khr_int64_extended_atomics/atom_and.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atom_and 2 | #include 3 | -------------------------------------------------------------------------------- /generic/include/clc/cl_khr_int64_extended_atomics/atom_max.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atom_max 2 | #include 3 | -------------------------------------------------------------------------------- /generic/include/clc/cl_khr_int64_extended_atomics/atom_min.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atom_min 2 | #include 3 | -------------------------------------------------------------------------------- /generic/include/clc/cl_khr_int64_extended_atomics/atom_or.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atom_or 2 | #include 3 | -------------------------------------------------------------------------------- /generic/include/clc/cl_khr_int64_extended_atomics/atom_xor.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atom_xor 2 | #include 3 | -------------------------------------------------------------------------------- /generic/include/clc/cl_khr_local_int32_base_atomics/atom_add.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atom_add 2 | #define __CLC_ADDRESS_SPACE local 3 | #include 4 | -------------------------------------------------------------------------------- /generic/include/clc/cl_khr_local_int32_base_atomics/atom_cmpxchg.h: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL int atom_cmpxchg(volatile local int *p, int cmp, int val); 2 | _CLC_OVERLOAD _CLC_DECL unsigned int atom_cmpxchg(volatile local unsigned int *p, unsigned int cmp, unsigned int val); 3 | -------------------------------------------------------------------------------- /generic/include/clc/cl_khr_local_int32_base_atomics/atom_dec.h: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL int atom_dec(volatile local int *p); 2 | _CLC_OVERLOAD _CLC_DECL unsigned int atom_dec(volatile local unsigned int *p); 3 | -------------------------------------------------------------------------------- /generic/include/clc/cl_khr_local_int32_base_atomics/atom_inc.h: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL int atom_inc(volatile local int *p); 2 | _CLC_OVERLOAD _CLC_DECL unsigned int atom_inc(volatile local unsigned int *p); 3 | -------------------------------------------------------------------------------- /generic/include/clc/cl_khr_local_int32_base_atomics/atom_sub.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atom_sub 2 | #define __CLC_ADDRESS_SPACE local 3 | #include 4 | -------------------------------------------------------------------------------- /generic/include/clc/cl_khr_local_int32_base_atomics/atom_xchg.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atom_xchg 2 | #define __CLC_ADDRESS_SPACE local 3 | #include 4 | -------------------------------------------------------------------------------- /generic/include/clc/cl_khr_local_int32_extended_atomics/atom_and.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atom_and 2 | #define __CLC_ADDRESS_SPACE local 3 | #include 4 | -------------------------------------------------------------------------------- /generic/include/clc/cl_khr_local_int32_extended_atomics/atom_max.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atom_max 2 | #define __CLC_ADDRESS_SPACE local 3 | #include 4 | -------------------------------------------------------------------------------- /generic/include/clc/cl_khr_local_int32_extended_atomics/atom_min.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atom_min 2 | #define __CLC_ADDRESS_SPACE local 3 | #include 4 | -------------------------------------------------------------------------------- /generic/include/clc/cl_khr_local_int32_extended_atomics/atom_or.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atom_or 2 | #define __CLC_ADDRESS_SPACE local 3 | #include 4 | -------------------------------------------------------------------------------- /generic/include/clc/cl_khr_local_int32_extended_atomics/atom_xor.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION atom_xor 2 | #define __CLC_ADDRESS_SPACE local 3 | #include 4 | -------------------------------------------------------------------------------- /generic/include/clc/clcfunc.h: -------------------------------------------------------------------------------- 1 | #define _CLC_OVERLOAD __attribute__((overloadable)) 2 | #define _CLC_DECL 3 | #define _CLC_DEF __attribute__((always_inline)) 4 | #define _CLC_INLINE __attribute__((always_inline)) inline 5 | -------------------------------------------------------------------------------- /generic/include/clc/clcmacros.h: -------------------------------------------------------------------------------- 1 | /* 6.9 Preprocessor Directives and Macros 2 | * Some of these are handled by clang or passed by clover */ 3 | #if __OPENCL_VERSION__ >= 110 4 | #define CLC_VERSION_1_0 100 5 | #define CLC_VERSION_1_1 110 6 | #endif 7 | 8 | #if __OPENCL_VERSION__ >= 120 9 | #define CLC_VERSION_1_2 120 10 | #endif 11 | 12 | #define NULL ((void*)0) 13 | 14 | #define __kernel_exec(X, typen) __kernel \ 15 | __attribute__((work_group_size_hint(X, 1, 1))) \ 16 | __attribute__((vec_type_hint(typen))) 17 | 18 | #define kernel_exec(X, typen) __kernel_exec(X, typen) 19 | -------------------------------------------------------------------------------- /generic/include/clc/common/mix.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /generic/include/clc/common/mix.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mix(__CLC_GENTYPE a, __CLC_GENTYPE b, __CLC_GENTYPE c); 2 | 3 | #ifndef __CLC_SCALAR 4 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mix(__CLC_GENTYPE a, __CLC_GENTYPE b, __CLC_SCALAR_GENTYPE c); 5 | #endif 6 | -------------------------------------------------------------------------------- /generic/include/clc/common/sign.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION sign 2 | #define __CLC_BODY 3 | #include 4 | #undef __CLC_FUNCTION 5 | #undef __CLC_BODY 6 | -------------------------------------------------------------------------------- /generic/include/clc/explicit_fence/explicit_memory_fence.h: -------------------------------------------------------------------------------- 1 | _CLC_DECL void mem_fence(cl_mem_fence_flags flags); 2 | _CLC_DECL void read_mem_fence(cl_mem_fence_flags flags); 3 | _CLC_DECL void write_mem_fence(cl_mem_fence_flags flags); 4 | -------------------------------------------------------------------------------- /generic/include/clc/geometric/cross.h: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL float3 cross(float3 p0, float3 p1); 2 | _CLC_OVERLOAD _CLC_DECL float4 cross(float4 p0, float4 p1); 3 | 4 | #ifdef cl_khr_fp64 5 | _CLC_OVERLOAD _CLC_DECL double3 cross(double3 p0, double3 p1); 6 | _CLC_OVERLOAD _CLC_DECL double4 cross(double4 p0, double4 p1); 7 | #endif 8 | -------------------------------------------------------------------------------- /generic/include/clc/geometric/distance.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /generic/include/clc/geometric/dot.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /generic/include/clc/geometric/dot.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_FLOAT dot(__CLC_FLOATN p0, __CLC_FLOATN p1); 2 | -------------------------------------------------------------------------------- /generic/include/clc/geometric/length.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /generic/include/clc/geometric/length.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_FLOAT length(__CLC_FLOATN p0); 2 | -------------------------------------------------------------------------------- /generic/include/clc/geometric/normalize.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /generic/include/clc/geometric/normalize.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_FLOATN normalize(__CLC_FLOATN p); 2 | -------------------------------------------------------------------------------- /generic/include/clc/integer/abs.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /generic/include/clc/integer/abs.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_U_GENTYPE abs(__CLC_GENTYPE x); 2 | -------------------------------------------------------------------------------- /generic/include/clc/integer/abs_diff.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /generic/include/clc/integer/abs_diff.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_U_GENTYPE abs_diff(__CLC_GENTYPE x, __CLC_GENTYPE y); 2 | -------------------------------------------------------------------------------- /generic/include/clc/integer/add_sat.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /generic/include/clc/integer/add_sat.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE add_sat(__CLC_GENTYPE x, __CLC_GENTYPE y); 2 | -------------------------------------------------------------------------------- /generic/include/clc/integer/clz.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /generic/include/clc/integer/clz.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE clz(__CLC_GENTYPE x); 2 | -------------------------------------------------------------------------------- /generic/include/clc/integer/definitions.h: -------------------------------------------------------------------------------- 1 | #define CHAR_BIT 8 2 | #define INT_MAX 2147483647 3 | #define INT_MIN (-2147483647 - 1) 4 | #define LONG_MAX 0x7fffffffffffffffL 5 | #define LONG_MIN (-0x7fffffffffffffffL - 1) 6 | #define CHAR_MAX SCHAR_MAX 7 | #define CHAR_MIN SCHAR_MIN 8 | #define SCHAR_MAX 127 9 | #define SCHAR_MIN (-127 - 1) 10 | #define SHRT_MAX 32767 11 | #define SHRT_MIN (-32767 - 1) 12 | #define UCHAR_MAX 255 13 | #define USHRT_MAX 65535 14 | #define UINT_MAX 0xffffffff 15 | #define ULONG_MAX 0xffffffffffffffffUL 16 | -------------------------------------------------------------------------------- /generic/include/clc/integer/hadd.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /generic/include/clc/integer/hadd.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE hadd(__CLC_GENTYPE x, __CLC_GENTYPE y); 2 | -------------------------------------------------------------------------------- /generic/include/clc/integer/integer-gentype.inc: -------------------------------------------------------------------------------- 1 | #define __CLC_GENTYPE int 2 | #include __CLC_BODY 3 | #undef __CLC_GENTYPE 4 | 5 | #define __CLC_GENTYPE int2 6 | #include __CLC_BODY 7 | #undef __CLC_GENTYPE 8 | 9 | #define __CLC_GENTYPE int3 10 | #include __CLC_BODY 11 | #undef __CLC_GENTYPE 12 | 13 | #define __CLC_GENTYPE int4 14 | #include __CLC_BODY 15 | #undef __CLC_GENTYPE 16 | 17 | #define __CLC_GENTYPE int8 18 | #include __CLC_BODY 19 | #undef __CLC_GENTYPE 20 | 21 | #define __CLC_GENTYPE int16 22 | #include __CLC_BODY 23 | #undef __CLC_GENTYPE 24 | 25 | #define __CLC_GENTYPE uint 26 | #include __CLC_BODY 27 | #undef __CLC_GENTYPE 28 | 29 | #define __CLC_GENTYPE uint2 30 | #include __CLC_BODY 31 | #undef __CLC_GENTYPE 32 | 33 | #define __CLC_GENTYPE uint3 34 | #include __CLC_BODY 35 | #undef __CLC_GENTYPE 36 | 37 | #define __CLC_GENTYPE uint4 38 | #include __CLC_BODY 39 | #undef __CLC_GENTYPE 40 | 41 | #define __CLC_GENTYPE uint8 42 | #include __CLC_BODY 43 | #undef __CLC_GENTYPE 44 | 45 | #define __CLC_GENTYPE uint16 46 | #include __CLC_BODY 47 | #undef __CLC_GENTYPE 48 | -------------------------------------------------------------------------------- /generic/include/clc/integer/mad24.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | #undef __CLC_BODY 4 | -------------------------------------------------------------------------------- /generic/include/clc/integer/mad24.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mad24(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z); 2 | -------------------------------------------------------------------------------- /generic/include/clc/integer/mad_hi.h: -------------------------------------------------------------------------------- 1 | #define mad_hi(a, b, c) (mul_hi((a),(b))+(c)) 2 | -------------------------------------------------------------------------------- /generic/include/clc/integer/mad_sat.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | #undef __CLC_BODY 4 | -------------------------------------------------------------------------------- /generic/include/clc/integer/mad_sat.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mad_sat(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z); 2 | -------------------------------------------------------------------------------- /generic/include/clc/integer/mul24.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | #undef __CLC_BODY 4 | -------------------------------------------------------------------------------- /generic/include/clc/integer/mul24.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mul24(__CLC_GENTYPE x, __CLC_GENTYPE y); 2 | -------------------------------------------------------------------------------- /generic/include/clc/integer/mul_hi.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /generic/include/clc/integer/mul_hi.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mul_hi(__CLC_GENTYPE x, __CLC_GENTYPE y); 2 | -------------------------------------------------------------------------------- /generic/include/clc/integer/popcount.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION popcount 2 | #define __CLC_BODY 3 | #include 4 | #undef __CLC_FUNCTION 5 | #undef __CLC_BODY 6 | -------------------------------------------------------------------------------- /generic/include/clc/integer/rhadd.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /generic/include/clc/integer/rhadd.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE rhadd(__CLC_GENTYPE x, __CLC_GENTYPE y); 2 | -------------------------------------------------------------------------------- /generic/include/clc/integer/rotate.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /generic/include/clc/integer/rotate.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE rotate(__CLC_GENTYPE x, __CLC_GENTYPE y); 2 | -------------------------------------------------------------------------------- /generic/include/clc/integer/sub_sat.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /generic/include/clc/integer/sub_sat.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sub_sat(__CLC_GENTYPE x, __CLC_GENTYPE y); 2 | -------------------------------------------------------------------------------- /generic/include/clc/integer/unary.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x); 2 | -------------------------------------------------------------------------------- /generic/include/clc/integer/upsample.h: -------------------------------------------------------------------------------- 1 | #define __CLC_UPSAMPLE_DECL(BGENTYPE, GENTYPE, UGENTYPE) \ 2 | _CLC_OVERLOAD _CLC_DECL BGENTYPE upsample(GENTYPE hi, UGENTYPE lo); 3 | 4 | #define __CLC_UPSAMPLE_VEC(BGENTYPE, GENTYPE, UGENTYPE) \ 5 | __CLC_UPSAMPLE_DECL(BGENTYPE, GENTYPE, UGENTYPE) \ 6 | __CLC_UPSAMPLE_DECL(BGENTYPE##2, GENTYPE##2, UGENTYPE##2) \ 7 | __CLC_UPSAMPLE_DECL(BGENTYPE##3, GENTYPE##3, UGENTYPE##3) \ 8 | __CLC_UPSAMPLE_DECL(BGENTYPE##4, GENTYPE##4, UGENTYPE##4) \ 9 | __CLC_UPSAMPLE_DECL(BGENTYPE##8, GENTYPE##8, UGENTYPE##8) \ 10 | __CLC_UPSAMPLE_DECL(BGENTYPE##16, GENTYPE##16, UGENTYPE##16) \ 11 | 12 | #define __CLC_UPSAMPLE_TYPES() \ 13 | __CLC_UPSAMPLE_VEC(short, char, uchar) \ 14 | __CLC_UPSAMPLE_VEC(ushort, uchar, uchar) \ 15 | __CLC_UPSAMPLE_VEC(int, short, ushort) \ 16 | __CLC_UPSAMPLE_VEC(uint, ushort, ushort) \ 17 | __CLC_UPSAMPLE_VEC(long, int, uint) \ 18 | __CLC_UPSAMPLE_VEC(ulong, uint, uint) \ 19 | 20 | __CLC_UPSAMPLE_TYPES() 21 | 22 | #undef __CLC_UPSAMPLE_TYPES 23 | #undef __CLC_UPSAMPLE_DECL 24 | #undef __CLC_UPSAMPLE_VEC 25 | 26 | -------------------------------------------------------------------------------- /generic/include/clc/math/acos.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION acos 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /generic/include/clc/math/asin.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION asin 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /generic/include/clc/math/binary_decl.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE a, __CLC_GENTYPE b); 2 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE a, __CLC_SCALAR_GENTYPE b); 3 | -------------------------------------------------------------------------------- /generic/include/clc/math/binary_decl_tt.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE a, __CLC_GENTYPE b); 2 | -------------------------------------------------------------------------------- /generic/include/clc/math/ceil.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION ceil 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /generic/include/clc/math/copysign.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION copysign 2 | #define __CLC_BODY 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /generic/include/clc/math/cos.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION cos 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /generic/include/clc/math/cospi.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION cospi 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /generic/include/clc/math/erf.h: -------------------------------------------------------------------------------- 1 | #undef erfc 2 | 3 | #define __CLC_BODY 4 | #define __CLC_FUNCTION erf 5 | 6 | #include 7 | 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /generic/include/clc/math/erfc.h: -------------------------------------------------------------------------------- 1 | #undef erfc 2 | 3 | #define __CLC_BODY 4 | #define __CLC_FUNCTION erfc 5 | 6 | #include 7 | 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /generic/include/clc/math/exp.h: -------------------------------------------------------------------------------- 1 | #undef exp 2 | 3 | #define __CLC_BODY 4 | #define __CLC_FUNCTION exp 5 | 6 | #include 7 | 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /generic/include/clc/math/exp10.h: -------------------------------------------------------------------------------- 1 | #undef exp10 2 | 3 | #define __CLC_BODY 4 | #define __CLC_FUNCTION exp10 5 | 6 | #include 7 | 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /generic/include/clc/math/expm1.h: -------------------------------------------------------------------------------- 1 | #undef exp 2 | 3 | #define __CLC_BODY 4 | #define __CLC_FUNCTION expm1 5 | 6 | #include 7 | 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /generic/include/clc/math/fabs.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION fabs 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /generic/include/clc/math/fdim.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION fdim 2 | #define __CLC_BODY 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /generic/include/clc/math/floor.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION floor 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /generic/include/clc/math/fma.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION fma 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /generic/include/clc/math/fmax.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION fmax 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | 9 | -------------------------------------------------------------------------------- /generic/include/clc/math/fmin.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION fmin 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | 9 | -------------------------------------------------------------------------------- /generic/include/clc/math/fmod.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION fmod 2 | #define __CLC_BODY 3 | #include 4 | #undef __CLC_FUNCTION 5 | -------------------------------------------------------------------------------- /generic/include/clc/math/frexp.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /generic/include/clc/math/frexp.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE frexp(__CLC_GENTYPE x, global __CLC_INTN *iptr); 2 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE frexp(__CLC_GENTYPE x, local __CLC_INTN *iptr); 3 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE frexp(__CLC_GENTYPE x, private __CLC_INTN *iptr); 4 | -------------------------------------------------------------------------------- /generic/include/clc/math/half_cos.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION half_cos 3 | #define __FLOAT_ONLY 4 | 5 | #include 6 | 7 | #undef __FLOAT_ONLY 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /generic/include/clc/math/half_divide.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION half_divide 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /generic/include/clc/math/half_exp.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION half_exp 3 | #define __FLOAT_ONLY 4 | 5 | #include 6 | 7 | #undef __FLOAT_ONLY 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /generic/include/clc/math/half_exp10.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION half_exp10 3 | #define __FLOAT_ONLY 4 | 5 | #include 6 | 7 | #undef __FLOAT_ONLY 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /generic/include/clc/math/half_exp2.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION half_exp2 3 | #define __FLOAT_ONLY 4 | 5 | #include 6 | 7 | #undef __FLOAT_ONLY 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /generic/include/clc/math/half_log.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION half_log 3 | #define __FLOAT_ONLY 4 | 5 | #include 6 | 7 | #undef __FLOAT_ONLY 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /generic/include/clc/math/half_log10.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION half_log10 3 | #define __FLOAT_ONLY 4 | 5 | #include 6 | 7 | #undef __FLOAT_ONLY 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /generic/include/clc/math/half_log2.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION half_log2 3 | #define __FLOAT_ONLY 4 | 5 | #include 6 | 7 | #undef __FLOAT_ONLY 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /generic/include/clc/math/half_powr.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION half_powr 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /generic/include/clc/math/half_recip.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION half_recip 3 | #define __FLOAT_ONLY 4 | 5 | #include 6 | 7 | #undef __FLOAT_ONLY 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /generic/include/clc/math/half_sin.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION half_sin 3 | #define __FLOAT_ONLY 4 | 5 | #include 6 | 7 | #undef __FLOAT_ONLY 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /generic/include/clc/math/half_tan.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION half_tan 3 | #define __FLOAT_ONLY 4 | 5 | #include 6 | 7 | #undef __FLOAT_ONLY 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /generic/include/clc/math/hypot.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION hypot 2 | #define __CLC_BODY 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /generic/include/clc/math/ilogb.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | 3 | #include 4 | 5 | #undef __CLC_BODY 6 | -------------------------------------------------------------------------------- /generic/include/clc/math/ilogb.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_INTN ilogb(__CLC_GENTYPE x); 2 | -------------------------------------------------------------------------------- /generic/include/clc/math/lgamma.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION lgamma 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /generic/include/clc/math/lgamma_r.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /generic/include/clc/math/lgamma_r.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE lgamma_r(__CLC_GENTYPE x, global __CLC_INTN *iptr); 2 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE lgamma_r(__CLC_GENTYPE x, local __CLC_INTN *iptr); 3 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE lgamma_r(__CLC_GENTYPE x, private __CLC_INTN *iptr); 4 | -------------------------------------------------------------------------------- /generic/include/clc/math/log10.h: -------------------------------------------------------------------------------- 1 | #undef log10 2 | 3 | #define __CLC_BODY 4 | #define __CLC_FUNCTION log10 5 | 6 | #include 7 | 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /generic/include/clc/math/logb.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION logb 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /generic/include/clc/math/mad.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION mad 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /generic/include/clc/math/maxmag.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION maxmag 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /generic/include/clc/math/minmag.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION minmag 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /generic/include/clc/math/nan.h: -------------------------------------------------------------------------------- 1 | #define __CLC_CONCAT(x, y) x ## y 2 | #define __CLC_XCONCAT(x, y) __CLC_CONCAT(x, y) 3 | 4 | #define __CLC_BODY 5 | #include 6 | 7 | #undef __CLC_XCONCAT 8 | #undef __CLC_CONCAT 9 | -------------------------------------------------------------------------------- /generic/include/clc/math/nan.inc: -------------------------------------------------------------------------------- 1 | #ifdef __CLC_SCALAR 2 | #define __CLC_VECSIZE 3 | #endif 4 | 5 | #if __CLC_FPSIZE == 64 6 | #define __CLC_NATN __CLC_XCONCAT(ulong, __CLC_VECSIZE) 7 | #elif __CLC_FPSIZE == 32 8 | #define __CLC_NATN __CLC_XCONCAT(uint, __CLC_VECSIZE) 9 | #elif __CLC_FPSIZE == 16 10 | #define __CLC_NATN __CLC_XCONCAT(ushort, __CLC_VECSIZE) 11 | #endif 12 | 13 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE nan(__CLC_NATN code); 14 | 15 | #undef __CLC_NATN 16 | #ifdef __CLC_SCALAR 17 | #undef __CLC_VECSIZE 18 | #endif 19 | -------------------------------------------------------------------------------- /generic/include/clc/math/native_cos.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION native_cos 3 | #define __FLOAT_ONLY 4 | 5 | #include 6 | 7 | #undef __FLOAT_ONLY 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /generic/include/clc/math/native_divide.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION native_divide 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /generic/include/clc/math/native_exp.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION native_exp 3 | #define __FLOAT_ONLY 4 | 5 | #include 6 | 7 | #undef __FLOAT_ONLY 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /generic/include/clc/math/native_exp10.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION native_exp10 3 | #define __FLOAT_ONLY 4 | 5 | #include 6 | 7 | #undef __FLOAT_ONLY 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /generic/include/clc/math/native_exp2.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION native_exp2 3 | #define __FLOAT_ONLY 4 | 5 | #include 6 | 7 | #undef __FLOAT_ONLY 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /generic/include/clc/math/native_log10.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION native_log10 3 | #define __FLOAT_ONLY 4 | 5 | #include 6 | 7 | #undef __FLOAT_ONLY 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /generic/include/clc/math/native_powr.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION native_powr 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /generic/include/clc/math/native_recip.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION native_recip 3 | #define __FLOAT_ONLY 4 | 5 | #include 6 | 7 | #undef __FLOAT_ONLY 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /generic/include/clc/math/native_rsqrt.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION native_rsqrt 3 | #define __FLOAT_ONLY 4 | 5 | #include 6 | 7 | #undef __FLOAT_ONLY 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /generic/include/clc/math/native_sin.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION native_sin 3 | #define __FLOAT_ONLY 4 | 5 | #include 6 | 7 | #undef __FLOAT_ONLY 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /generic/include/clc/math/native_sqrt.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION native_sqrt 3 | #define __FLOAT_ONLY 4 | 5 | #include 6 | 7 | #undef __FLOAT_ONLY 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /generic/include/clc/math/native_tan.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION native_tan 3 | #define __FLOAT_ONLY 4 | 5 | #include 6 | 7 | #undef __FLOAT_ONLY 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /generic/include/clc/math/nextafter.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION nextafter 2 | #define __CLC_BODY 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /generic/include/clc/math/pow.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION pow 2 | #define __CLC_BODY 3 | #include 4 | #undef __CLC_BODY 5 | #undef __CLC_FUNCTION 6 | -------------------------------------------------------------------------------- /generic/include/clc/math/pown.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | #undef __CLC_BODY 4 | -------------------------------------------------------------------------------- /generic/include/clc/math/pown.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE pown(__CLC_GENTYPE a, __CLC_INTN b); 2 | -------------------------------------------------------------------------------- /generic/include/clc/math/powr.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION powr 2 | #define __CLC_BODY 3 | #include 4 | #undef __CLC_BODY 5 | #undef __CLC_FUNCTION 6 | -------------------------------------------------------------------------------- /generic/include/clc/math/remainder.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION remainder 2 | #define __CLC_BODY 3 | #include 4 | #undef __CLC_FUNCTION 5 | -------------------------------------------------------------------------------- /generic/include/clc/math/remquo.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION remquo 2 | 3 | #define __CLC_BODY 4 | #define __CLC_ADDRESS_SPACE global 5 | #include 6 | #undef __CLC_ADDRESS_SPACE 7 | 8 | #define __CLC_BODY 9 | #define __CLC_ADDRESS_SPACE local 10 | #include 11 | #undef __CLC_ADDRESS_SPACE 12 | 13 | #define __CLC_BODY 14 | #define __CLC_ADDRESS_SPACE private 15 | #include 16 | #undef __CLC_ADDRESS_SPACE 17 | 18 | #undef __CLC_FUNCTION 19 | -------------------------------------------------------------------------------- /generic/include/clc/math/remquo.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_ADDRESS_SPACE __CLC_INTN *q); 2 | -------------------------------------------------------------------------------- /generic/include/clc/math/rint.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION rint 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /generic/include/clc/math/rootn.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | #undef __CLC_BODY 4 | -------------------------------------------------------------------------------- /generic/include/clc/math/rootn.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE rootn(__CLC_GENTYPE a, __CLC_INTN b); 2 | -------------------------------------------------------------------------------- /generic/include/clc/math/round.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION round 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /generic/include/clc/math/rsqrt.h: -------------------------------------------------------------------------------- 1 | #define rsqrt(x) (1.f/sqrt(x)) 2 | -------------------------------------------------------------------------------- /generic/include/clc/math/sin.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION sin 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /generic/include/clc/math/sincos.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | -------------------------------------------------------------------------------- /generic/include/clc/math/sincos.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sincos (__CLC_GENTYPE x, global __CLC_GENTYPE * cosval); 2 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sincos (__CLC_GENTYPE x, local __CLC_GENTYPE * cosval); 3 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sincos (__CLC_GENTYPE x, private __CLC_GENTYPE * cosval); 4 | -------------------------------------------------------------------------------- /generic/include/clc/math/sinpi.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION sinpi 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /generic/include/clc/math/sqrt.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION sqrt 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /generic/include/clc/math/tan.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION tan 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /generic/include/clc/math/tanpi.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION tanpi 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /generic/include/clc/math/ternary_decl.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE a, __CLC_GENTYPE b, __CLC_GENTYPE c); 2 | -------------------------------------------------------------------------------- /generic/include/clc/math/tgamma.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION tgamma 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /generic/include/clc/math/trunc.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION trunc 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /generic/include/clc/math/unary_decl.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x); 2 | -------------------------------------------------------------------------------- /generic/include/clc/relational/all.h: -------------------------------------------------------------------------------- 1 | #define _CLC_ALL_DECL(TYPE) \ 2 | _CLC_OVERLOAD _CLC_DECL int all(TYPE v); 3 | 4 | #define _CLC_VECTOR_ALL_DECL(TYPE) \ 5 | _CLC_ALL_DECL(TYPE) \ 6 | _CLC_ALL_DECL(TYPE##2) \ 7 | _CLC_ALL_DECL(TYPE##3) \ 8 | _CLC_ALL_DECL(TYPE##4) \ 9 | _CLC_ALL_DECL(TYPE##8) \ 10 | _CLC_ALL_DECL(TYPE##16) 11 | 12 | _CLC_VECTOR_ALL_DECL(char) 13 | _CLC_VECTOR_ALL_DECL(short) 14 | _CLC_VECTOR_ALL_DECL(int) 15 | _CLC_VECTOR_ALL_DECL(long) 16 | 17 | #undef _CLC_ALL_DECL 18 | #undef _CLC_VECTOR_ALL_DECL 19 | -------------------------------------------------------------------------------- /generic/include/clc/relational/any.h: -------------------------------------------------------------------------------- 1 | 2 | #define _CLC_ANY_DECL(TYPE) \ 3 | _CLC_OVERLOAD _CLC_DECL int any(TYPE v); 4 | 5 | #define _CLC_VECTOR_ANY_DECL(TYPE) \ 6 | _CLC_ANY_DECL(TYPE) \ 7 | _CLC_ANY_DECL(TYPE##2) \ 8 | _CLC_ANY_DECL(TYPE##3) \ 9 | _CLC_ANY_DECL(TYPE##4) \ 10 | _CLC_ANY_DECL(TYPE##8) \ 11 | _CLC_ANY_DECL(TYPE##16) 12 | 13 | _CLC_VECTOR_ANY_DECL(char) 14 | _CLC_VECTOR_ANY_DECL(short) 15 | _CLC_VECTOR_ANY_DECL(int) 16 | _CLC_VECTOR_ANY_DECL(long) 17 | -------------------------------------------------------------------------------- /generic/include/clc/relational/binary_decl.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_INTN __CLC_FUNCTION(__CLC_FLOATN a, __CLC_FLOATN b); 2 | -------------------------------------------------------------------------------- /generic/include/clc/relational/isequal.h: -------------------------------------------------------------------------------- 1 | #define _CLC_ISEQUAL_DECL(TYPE, RETTYPE) \ 2 | _CLC_OVERLOAD _CLC_DECL RETTYPE isequal(TYPE x, TYPE y); 3 | 4 | #define _CLC_VECTOR_ISEQUAL_DECL(TYPE, RETTYPE) \ 5 | _CLC_ISEQUAL_DECL(TYPE##2, RETTYPE##2) \ 6 | _CLC_ISEQUAL_DECL(TYPE##3, RETTYPE##3) \ 7 | _CLC_ISEQUAL_DECL(TYPE##4, RETTYPE##4) \ 8 | _CLC_ISEQUAL_DECL(TYPE##8, RETTYPE##8) \ 9 | _CLC_ISEQUAL_DECL(TYPE##16, RETTYPE##16) 10 | 11 | _CLC_ISEQUAL_DECL(float, int) 12 | _CLC_VECTOR_ISEQUAL_DECL(float, int) 13 | 14 | #ifdef cl_khr_fp64 15 | _CLC_ISEQUAL_DECL(double, int) 16 | _CLC_VECTOR_ISEQUAL_DECL(double, long) 17 | #endif 18 | #ifdef cl_khr_fp16 19 | _CLC_ISEQUAL_DECL(half, int) 20 | _CLC_VECTOR_ISEQUAL_DECL(half, short) 21 | #endif 22 | 23 | #undef _CLC_ISEQUAL_DECL 24 | #undef _CLC_VECTOR_ISEQUAL_DEC 25 | -------------------------------------------------------------------------------- /generic/include/clc/relational/isfinite.h: -------------------------------------------------------------------------------- 1 | #undef isfinite 2 | 3 | #define __CLC_FUNCTION isfinite 4 | #define __CLC_BODY 5 | 6 | #include 7 | 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /generic/include/clc/relational/isgreater.h: -------------------------------------------------------------------------------- 1 | #undef isgreater 2 | 3 | #define __CLC_FUNCTION isgreater 4 | #define __CLC_BODY 5 | 6 | #include 7 | 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /generic/include/clc/relational/isgreaterequal.h: -------------------------------------------------------------------------------- 1 | #undef isgreaterequal 2 | 3 | #define __CLC_FUNCTION isgreaterequal 4 | #define __CLC_BODY 5 | 6 | #include 7 | 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /generic/include/clc/relational/isinf.h: -------------------------------------------------------------------------------- 1 | 2 | #define _CLC_ISINF_DECL(RET_TYPE, ARG_TYPE) \ 3 | _CLC_OVERLOAD _CLC_DECL RET_TYPE isinf(ARG_TYPE); 4 | 5 | #define _CLC_VECTOR_ISINF_DECL(RET_TYPE, ARG_TYPE) \ 6 | _CLC_ISINF_DECL(RET_TYPE##2, ARG_TYPE##2) \ 7 | _CLC_ISINF_DECL(RET_TYPE##3, ARG_TYPE##3) \ 8 | _CLC_ISINF_DECL(RET_TYPE##4, ARG_TYPE##4) \ 9 | _CLC_ISINF_DECL(RET_TYPE##8, ARG_TYPE##8) \ 10 | _CLC_ISINF_DECL(RET_TYPE##16, ARG_TYPE##16) 11 | 12 | _CLC_ISINF_DECL(int, float) 13 | _CLC_VECTOR_ISINF_DECL(int, float) 14 | 15 | #ifdef cl_khr_fp64 16 | _CLC_ISINF_DECL(int, double) 17 | _CLC_VECTOR_ISINF_DECL(long, double) 18 | #endif 19 | 20 | #ifdef cl_khr_fp16 21 | _CLC_ISINF_DECL(int, half) 22 | _CLC_VECTOR_ISINF_DECL(short, half) 23 | #endif 24 | 25 | #undef _CLC_ISINF_DECL 26 | #undef _CLC_VECTOR_ISINF_DECL 27 | -------------------------------------------------------------------------------- /generic/include/clc/relational/isless.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION isless 2 | #define __CLC_BODY 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /generic/include/clc/relational/islessequal.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION islessequal 2 | #define __CLC_BODY 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /generic/include/clc/relational/islessgreater.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION islessgreater 2 | #define __CLC_BODY 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /generic/include/clc/relational/isnan.h: -------------------------------------------------------------------------------- 1 | 2 | #define _CLC_ISNAN_DECL(RET_TYPE, ARG_TYPE) \ 3 | _CLC_OVERLOAD _CLC_DECL RET_TYPE isnan(ARG_TYPE); 4 | 5 | #define _CLC_VECTOR_ISNAN_DECL(RET_TYPE, ARG_TYPE) \ 6 | _CLC_ISNAN_DECL(RET_TYPE##2, ARG_TYPE##2) \ 7 | _CLC_ISNAN_DECL(RET_TYPE##3, ARG_TYPE##3) \ 8 | _CLC_ISNAN_DECL(RET_TYPE##4, ARG_TYPE##4) \ 9 | _CLC_ISNAN_DECL(RET_TYPE##8, ARG_TYPE##8) \ 10 | _CLC_ISNAN_DECL(RET_TYPE##16, ARG_TYPE##16) 11 | 12 | _CLC_ISNAN_DECL(int, float) 13 | _CLC_VECTOR_ISNAN_DECL(int, float) 14 | 15 | #ifdef cl_khr_fp64 16 | _CLC_ISNAN_DECL(int, double) 17 | _CLC_VECTOR_ISNAN_DECL(long, double) 18 | #endif 19 | 20 | #ifdef cl_khr_fp16 21 | _CLC_ISNAN_DECL(int, half) 22 | _CLC_VECTOR_ISNAN_DECL(short, half) 23 | #endif 24 | 25 | #undef _CLC_ISNAN_DECL 26 | #undef _CLC_VECTOR_ISNAN_DECL 27 | -------------------------------------------------------------------------------- /generic/include/clc/relational/isnormal.h: -------------------------------------------------------------------------------- 1 | #undef isnormal 2 | 3 | #define __CLC_FUNCTION isnormal 4 | #define __CLC_BODY 5 | 6 | #include 7 | 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /generic/include/clc/relational/isnotequal.h: -------------------------------------------------------------------------------- 1 | #undef isnotequal 2 | 3 | #define __CLC_FUNCTION isnotequal 4 | #define __CLC_BODY 5 | 6 | #include 7 | 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /generic/include/clc/relational/isordered.h: -------------------------------------------------------------------------------- 1 | #undef isordered 2 | 3 | #define __CLC_FUNCTION isordered 4 | #define __CLC_BODY 5 | 6 | #include 7 | 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /generic/include/clc/relational/isunordered.h: -------------------------------------------------------------------------------- 1 | #undef isunordered 2 | 3 | #define __CLC_FUNCTION isunordered 4 | #define __CLC_BODY 5 | 6 | #include 7 | 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /generic/include/clc/relational/select.h: -------------------------------------------------------------------------------- 1 | /* Duplciate these so we don't have to distribute utils.h */ 2 | #define __CLC_CONCAT(x, y) x ## y 3 | #define __CLC_XCONCAT(x, y) __CLC_CONCAT(x, y) 4 | 5 | #define __CLC_BODY 6 | #include 7 | #define __CLC_BODY 8 | #include 9 | 10 | #undef __CLC_CONCAT 11 | #undef __CLC_XCONCAT 12 | -------------------------------------------------------------------------------- /generic/include/clc/relational/select.inc: -------------------------------------------------------------------------------- 1 | #ifdef __CLC_SCALAR 2 | #define __CLC_VECSIZE 3 | #endif 4 | 5 | #if __CLC_FPSIZE == 64 6 | #define __CLC_S_GENTYPE __CLC_XCONCAT(long, __CLC_VECSIZE) 7 | #define __CLC_U_GENTYPE __CLC_XCONCAT(ulong, __CLC_VECSIZE) 8 | #elif __CLC_FPSIZE == 32 9 | #define __CLC_S_GENTYPE __CLC_XCONCAT(int, __CLC_VECSIZE) 10 | #define __CLC_U_GENTYPE __CLC_XCONCAT(uint, __CLC_VECSIZE) 11 | #elif __CLC_FPSIZE == 16 12 | #define __CLC_S_GENTYPE __CLC_XCONCAT(short, __CLC_VECSIZE) 13 | #define __CLC_U_GENTYPE __CLC_XCONCAT(ushort, __CLC_VECSIZE) 14 | #endif 15 | 16 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE select(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_S_GENTYPE z); 17 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE select(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_U_GENTYPE z); 18 | 19 | #ifdef __CLC_FPSIZE 20 | #undef __CLC_S_GENTYPE 21 | #undef __CLC_U_GENTYPE 22 | #endif 23 | #ifdef __CLC_SCALAR 24 | #undef __CLC_VECSIZE 25 | #endif 26 | -------------------------------------------------------------------------------- /generic/include/clc/relational/signbit.h: -------------------------------------------------------------------------------- 1 | #undef signbit 2 | 3 | #define __CLC_FUNCTION signbit 4 | #define __CLC_BODY 5 | 6 | #include 7 | 8 | #undef __CLC_BODY 9 | #undef __CLC_FUNCTION 10 | -------------------------------------------------------------------------------- /generic/include/clc/relational/unary_decl.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_INTN __CLC_FUNCTION(__CLC_FLOATN x); 2 | -------------------------------------------------------------------------------- /generic/include/clc/shared/clamp.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | 4 | #define __CLC_BODY 5 | #include 6 | -------------------------------------------------------------------------------- /generic/include/clc/shared/clamp.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE clamp(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z); 2 | 3 | #ifndef __CLC_SCALAR 4 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE clamp(__CLC_GENTYPE x, __CLC_SCALAR_GENTYPE y, __CLC_SCALAR_GENTYPE z); 5 | #endif 6 | -------------------------------------------------------------------------------- /generic/include/clc/shared/max.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | 4 | #define __CLC_BODY 5 | #include 6 | -------------------------------------------------------------------------------- /generic/include/clc/shared/max.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE max(__CLC_GENTYPE a, __CLC_GENTYPE b); 2 | 3 | #ifndef __CLC_SCALAR 4 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE max(__CLC_GENTYPE a, __CLC_SCALAR_GENTYPE b); 5 | #endif 6 | -------------------------------------------------------------------------------- /generic/include/clc/shared/min.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | 4 | #define __CLC_BODY 5 | #include 6 | -------------------------------------------------------------------------------- /generic/include/clc/shared/min.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE min(__CLC_GENTYPE a, __CLC_GENTYPE b); 2 | 3 | #ifndef __CLC_SCALAR 4 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE min(__CLC_GENTYPE a, __CLC_SCALAR_GENTYPE b); 5 | #endif 6 | -------------------------------------------------------------------------------- /generic/include/clc/synchronization/barrier.h: -------------------------------------------------------------------------------- 1 | _CLC_DECL void barrier(cl_mem_fence_flags flags); 2 | -------------------------------------------------------------------------------- /generic/include/clc/synchronization/cl_mem_fence_flags.h: -------------------------------------------------------------------------------- 1 | typedef uint cl_mem_fence_flags; 2 | 3 | #define CLK_LOCAL_MEM_FENCE 1 4 | #define CLK_GLOBAL_MEM_FENCE 2 5 | -------------------------------------------------------------------------------- /generic/include/clc/workitem/get_global_id.h: -------------------------------------------------------------------------------- 1 | _CLC_DECL size_t get_global_id(uint dim); 2 | -------------------------------------------------------------------------------- /generic/include/clc/workitem/get_global_offset.h: -------------------------------------------------------------------------------- 1 | _CLC_DECL size_t get_global_offset(uint dim); 2 | -------------------------------------------------------------------------------- /generic/include/clc/workitem/get_global_size.h: -------------------------------------------------------------------------------- 1 | _CLC_DECL size_t get_global_size(uint dim); 2 | -------------------------------------------------------------------------------- /generic/include/clc/workitem/get_group_id.h: -------------------------------------------------------------------------------- 1 | _CLC_DECL size_t get_group_id(uint dim); 2 | -------------------------------------------------------------------------------- /generic/include/clc/workitem/get_local_id.h: -------------------------------------------------------------------------------- 1 | _CLC_DECL size_t get_local_id(uint dim); 2 | -------------------------------------------------------------------------------- /generic/include/clc/workitem/get_local_size.h: -------------------------------------------------------------------------------- 1 | _CLC_DECL size_t get_local_size(uint dim); 2 | -------------------------------------------------------------------------------- /generic/include/clc/workitem/get_num_groups.h: -------------------------------------------------------------------------------- 1 | _CLC_DECL size_t get_num_groups(uint dim); 2 | -------------------------------------------------------------------------------- /generic/include/clc/workitem/get_work_dim.h: -------------------------------------------------------------------------------- 1 | _CLC_DECL uint get_work_dim(void); 2 | -------------------------------------------------------------------------------- /generic/include/integer/popcount.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION __clc_native_popcount 2 | #define __CLC_INTRINSIC "llvm.ctpop" 3 | #include 4 | -------------------------------------------------------------------------------- /generic/include/integer/unary_intrin.inc: -------------------------------------------------------------------------------- 1 | #define __CLC_INTRINSIC_DEF(SCALAR_TYPE, BIT_SIZE) \ 2 | _CLC_OVERLOAD SCALAR_TYPE __CLC_FUNCTION(SCALAR_TYPE x) __asm(__CLC_INTRINSIC ".i" BIT_SIZE); \ 3 | _CLC_OVERLOAD SCALAR_TYPE##2 __CLC_FUNCTION(SCALAR_TYPE##2 x) __asm(__CLC_INTRINSIC ".v2i" BIT_SIZE); \ 4 | _CLC_OVERLOAD SCALAR_TYPE##3 __CLC_FUNCTION(SCALAR_TYPE##3 x) __asm(__CLC_INTRINSIC ".v3i" BIT_SIZE); \ 5 | _CLC_OVERLOAD SCALAR_TYPE##4 __CLC_FUNCTION(SCALAR_TYPE##4 x) __asm(__CLC_INTRINSIC ".v4i" BIT_SIZE); \ 6 | _CLC_OVERLOAD SCALAR_TYPE##8 __CLC_FUNCTION(SCALAR_TYPE##8 x) __asm(__CLC_INTRINSIC ".v8i" BIT_SIZE); \ 7 | _CLC_OVERLOAD SCALAR_TYPE##16 __CLC_FUNCTION(SCALAR_TYPE##16 x) __asm(__CLC_INTRINSIC ".v16i" BIT_SIZE); 8 | 9 | __CLC_INTRINSIC_DEF(char, "8") 10 | __CLC_INTRINSIC_DEF(uchar, "8") 11 | __CLC_INTRINSIC_DEF(short, "16") 12 | __CLC_INTRINSIC_DEF(ushort, "16") 13 | __CLC_INTRINSIC_DEF(int, "32") 14 | __CLC_INTRINSIC_DEF(uint, "32") 15 | __CLC_INTRINSIC_DEF(long, "64") 16 | __CLC_INTRINSIC_DEF(ulong, "64") 17 | 18 | #undef __CLC_FUNCTION 19 | #undef __CLC_INTRINSIC 20 | #undef __CLC_INTRINSIC_DEF 21 | -------------------------------------------------------------------------------- /generic/include/math/clc_exp10.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION __clc_exp10 2 | #define __CLC_BODY 3 | #include 4 | #undef __CLC_FUNCTION 5 | -------------------------------------------------------------------------------- /generic/include/math/clc_fma.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION __clc_fma 2 | #define __CLC_INTRINSIC "llvm.fma" 3 | #include "math/ternary_intrin.inc" 4 | 5 | #define __FLOAT_ONLY 6 | #define __CLC_FUNCTION __clc_sw_fma 7 | #define __CLC_BODY 8 | #include 9 | #undef __CLC_BODY 10 | #undef __CLC_FUNCTION 11 | #undef __FLOAT_ONLY 12 | -------------------------------------------------------------------------------- /generic/include/math/clc_fmod.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION __clc_fmod 2 | #define __CLC_BODY 3 | #include 4 | #undef __CLC_FUNCTION 5 | -------------------------------------------------------------------------------- /generic/include/math/clc_hypot.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION __clc_hypot 2 | #define __CLC_BODY 3 | #include 4 | #undef __CLC_BODY 5 | #undef __CLC_FUNCTION 6 | -------------------------------------------------------------------------------- /generic/include/math/clc_ldexp.h: -------------------------------------------------------------------------------- 1 | _CLC_DEF _CLC_OVERLOAD float __clc_ldexp(float, int); 2 | 3 | #ifdef cl_khr_fp64 4 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 5 | _CLC_DEF _CLC_OVERLOAD double __clc_ldexp(double, int); 6 | #endif 7 | 8 | #ifdef cl_khr_fp16 9 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable 10 | _CLC_DEF _CLC_OVERLOAD float __clc_ldexp(half, int); 11 | #endif 12 | -------------------------------------------------------------------------------- /generic/include/math/clc_nextafter.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #define __CLC_FUNCTION __clc_nextafter 3 | 4 | #include 5 | 6 | #undef __CLC_BODY 7 | #undef __CLC_FUNCTION 8 | -------------------------------------------------------------------------------- /generic/include/math/clc_pow.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION __clc_pow 2 | #define __CLC_BODY 3 | #include 4 | #undef __CLC_BODY 5 | #undef __CLC_FUNCTION 6 | -------------------------------------------------------------------------------- /generic/include/math/clc_pown.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | #undef __CLC_BODY 4 | -------------------------------------------------------------------------------- /generic/include/math/clc_pown.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_pown(__CLC_GENTYPE a, __CLC_INTN b); 2 | -------------------------------------------------------------------------------- /generic/include/math/clc_powr.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION __clc_powr 2 | #define __CLC_BODY 3 | #include 4 | #undef __CLC_BODY 5 | #undef __CLC_FUNCTION 6 | -------------------------------------------------------------------------------- /generic/include/math/clc_remainder.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION __clc_remainder 2 | #define __CLC_BODY 3 | #include 4 | #undef __CLC_FUNCTION 5 | -------------------------------------------------------------------------------- /generic/include/math/clc_remquo.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION __clc_remquo 2 | 3 | #define __CLC_BODY 4 | #define __CLC_ADDRESS_SPACE private 5 | #include 6 | #undef __CLC_ADDRESS_SPACE 7 | 8 | #undef __CLC_FUNCTION 9 | -------------------------------------------------------------------------------- /generic/include/math/clc_rootn.h: -------------------------------------------------------------------------------- 1 | #define __CLC_BODY 2 | #include 3 | #undef __CLC_BODY 4 | -------------------------------------------------------------------------------- /generic/include/math/clc_rootn.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_rootn(__CLC_GENTYPE a, __CLC_INTN b); 2 | -------------------------------------------------------------------------------- /generic/include/math/clc_sqrt.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION __clc_sqrt 2 | #define __CLC_BODY 3 | #include 4 | #undef __CLC_BODY 5 | #undef __CLC_FUNCTION 6 | -------------------------------------------------------------------------------- /generic/include/math/clc_tan.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION __clc_tan 2 | #define __CLC_BODY 3 | #include 4 | #undef __CLC_BODY 5 | #undef __CLC_FUNCTION 6 | -------------------------------------------------------------------------------- /generic/include/math/clc_tanpi.h: -------------------------------------------------------------------------------- 1 | #define __CLC_FUNCTION __clc_tanpi 2 | #define __CLC_BODY 3 | #include 4 | #undef __CLC_BODY 5 | #undef __CLC_FUNCTION 6 | -------------------------------------------------------------------------------- /generic/include/utils.h: -------------------------------------------------------------------------------- 1 | #ifndef __CLC_UTILS_H_ 2 | #define __CLC_UTILS_H_ 3 | 4 | #define __CLC_CONCAT(x, y) x ## y 5 | #define __CLC_XCONCAT(x, y) __CLC_CONCAT(x, y) 6 | 7 | #define __CLC_STR(x) #x 8 | #define __CLC_XSTR(x) __CLC_STR(x) 9 | 10 | #endif 11 | -------------------------------------------------------------------------------- /generic/lib/async/async_work_group_copy.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #include 5 | -------------------------------------------------------------------------------- /generic/lib/async/async_work_group_copy.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DEF event_t async_work_group_copy( 2 | local __CLC_GENTYPE *dst, 3 | const global __CLC_GENTYPE *src, 4 | size_t num_gentypes, 5 | event_t event) { 6 | 7 | return async_work_group_strided_copy(dst, src, num_gentypes, 1, event); 8 | } 9 | 10 | _CLC_OVERLOAD _CLC_DEF event_t async_work_group_copy( 11 | global __CLC_GENTYPE *dst, 12 | const local __CLC_GENTYPE *src, 13 | size_t num_gentypes, 14 | event_t event) { 15 | 16 | return async_work_group_strided_copy(dst, src, num_gentypes, 1, event); 17 | } 18 | -------------------------------------------------------------------------------- /generic/lib/async/async_work_group_strided_copy.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #include 5 | -------------------------------------------------------------------------------- /generic/lib/async/prefetch.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #include 5 | -------------------------------------------------------------------------------- /generic/lib/async/prefetch.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DEF void prefetch(const global __CLC_GENTYPE *p, size_t num_gentypes) { } 2 | -------------------------------------------------------------------------------- /generic/lib/async/wait_group_events.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DEF void wait_group_events(int num_events, event_t *event_list) { 4 | barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); 5 | } 6 | -------------------------------------------------------------------------------- /generic/lib/atom_int32_binary.inc: -------------------------------------------------------------------------------- 1 | #include 2 | #include "utils.h" 3 | 4 | #define __CLC_ATOM_IMPL(AS, TYPE) \ 5 | _CLC_OVERLOAD _CLC_DEF TYPE __CLC_XCONCAT(atom_, __CLC_ATOMIC_OP) (volatile AS TYPE *p, TYPE val) { \ 6 | return __CLC_XCONCAT(atomic_, __CLC_ATOMIC_OP) (p, val); \ 7 | } 8 | 9 | __CLC_ATOM_IMPL(__CLC_ATOMIC_ADDRESS_SPACE, int) 10 | __CLC_ATOM_IMPL(__CLC_ATOMIC_ADDRESS_SPACE, uint) 11 | 12 | #undef __CLC_ATOM_IMPL 13 | #undef __CLC_ATOMIC_OP 14 | #undef __CLC_ATOMIC_ADDRESS_SPACE 15 | -------------------------------------------------------------------------------- /generic/lib/atomic/atomic_add.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE, AS) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atomic_add(volatile AS TYPE *p, TYPE val) { \ 5 | return __sync_fetch_and_add(p, val); \ 6 | } 7 | 8 | IMPL(int, global) 9 | IMPL(unsigned int, global) 10 | IMPL(int, local) 11 | IMPL(unsigned int, local) 12 | #undef IMPL 13 | -------------------------------------------------------------------------------- /generic/lib/atomic/atomic_and.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE, AS) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atomic_and(volatile AS TYPE *p, TYPE val) { \ 5 | return __sync_fetch_and_and(p, val); \ 6 | } 7 | 8 | IMPL(int, global) 9 | IMPL(unsigned int, global) 10 | IMPL(int, local) 11 | IMPL(unsigned int, local) 12 | #undef IMPL 13 | -------------------------------------------------------------------------------- /generic/lib/atomic/atomic_cmpxchg.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE, AS) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atomic_cmpxchg(volatile AS TYPE *p, TYPE cmp, TYPE val) { \ 5 | return __sync_val_compare_and_swap(p, cmp, val); \ 6 | } 7 | 8 | IMPL(int, global) 9 | IMPL(unsigned int, global) 10 | IMPL(int, local) 11 | IMPL(unsigned int, local) 12 | #undef IMPL 13 | -------------------------------------------------------------------------------- /generic/lib/atomic/atomic_dec.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE, AS) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atomic_dec(volatile AS TYPE *p) { \ 5 | return __sync_fetch_and_sub(p, (TYPE)1); \ 6 | } 7 | 8 | IMPL(int, global) 9 | IMPL(unsigned int, global) 10 | IMPL(int, local) 11 | IMPL(unsigned int, local) 12 | #undef IMPL 13 | -------------------------------------------------------------------------------- /generic/lib/atomic/atomic_inc.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE, AS) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atomic_inc(volatile AS TYPE *p) { \ 5 | return __sync_fetch_and_add(p, (TYPE)1); \ 6 | } 7 | 8 | IMPL(int, global) 9 | IMPL(unsigned int, global) 10 | IMPL(int, local) 11 | IMPL(unsigned int, local) 12 | #undef IMPL 13 | -------------------------------------------------------------------------------- /generic/lib/atomic/atomic_max.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE, AS, OP) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atomic_max(volatile AS TYPE *p, TYPE val) { \ 5 | return __sync_fetch_and_##OP(p, val); \ 6 | } 7 | 8 | IMPL(int, global, max) 9 | IMPL(unsigned int, global, umax) 10 | IMPL(int, local, max) 11 | IMPL(unsigned int, local, umax) 12 | #undef IMPL 13 | -------------------------------------------------------------------------------- /generic/lib/atomic/atomic_min.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE, AS, OP) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atomic_min(volatile AS TYPE *p, TYPE val) { \ 5 | return __sync_fetch_and_##OP(p, val); \ 6 | } 7 | 8 | IMPL(int, global, min) 9 | IMPL(unsigned int, global, umin) 10 | IMPL(int, local, min) 11 | IMPL(unsigned int, local, umin) 12 | #undef IMPL 13 | -------------------------------------------------------------------------------- /generic/lib/atomic/atomic_or.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE, AS) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atomic_or(volatile AS TYPE *p, TYPE val) { \ 5 | return __sync_fetch_and_or(p, val); \ 6 | } 7 | 8 | IMPL(int, global) 9 | IMPL(unsigned int, global) 10 | IMPL(int, local) 11 | IMPL(unsigned int, local) 12 | #undef IMPL 13 | -------------------------------------------------------------------------------- /generic/lib/atomic/atomic_sub.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE, AS) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atomic_sub(volatile AS TYPE *p, TYPE val) { \ 5 | return __sync_fetch_and_sub(p, val); \ 6 | } 7 | 8 | IMPL(int, global) 9 | IMPL(unsigned int, global) 10 | IMPL(int, local) 11 | IMPL(unsigned int, local) 12 | #undef IMPL 13 | -------------------------------------------------------------------------------- /generic/lib/atomic/atomic_xchg.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_OVERLOAD _CLC_DEF float atomic_xchg(volatile global float *p, float val) { 4 | return as_float(atomic_xchg((volatile global uint *)p, as_uint(val))); 5 | } 6 | 7 | _CLC_OVERLOAD _CLC_DEF float atomic_xchg(volatile local float *p, float val) { 8 | return as_float(atomic_xchg((volatile local uint *)p, as_uint(val))); 9 | } 10 | 11 | #define IMPL(TYPE, AS) \ 12 | _CLC_OVERLOAD _CLC_DEF TYPE atomic_xchg(volatile AS TYPE *p, TYPE val) { \ 13 | return __sync_swap_4(p, val); \ 14 | } 15 | 16 | IMPL(int, global) 17 | IMPL(unsigned int, global) 18 | IMPL(int, local) 19 | IMPL(unsigned int, local) 20 | #undef IMPL 21 | -------------------------------------------------------------------------------- /generic/lib/atomic/atomic_xor.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE, AS) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atomic_xor(volatile AS TYPE *p, TYPE val) { \ 5 | return __sync_fetch_and_xor(p, val); \ 6 | } 7 | 8 | IMPL(int, global) 9 | IMPL(unsigned int, global) 10 | IMPL(int, local) 11 | IMPL(unsigned int, local) 12 | #undef IMPL 13 | -------------------------------------------------------------------------------- /generic/lib/cl_khr_global_int32_base_atomics/atom_add.cl: -------------------------------------------------------------------------------- 1 | #define __CLC_ATOMIC_OP add 2 | #define __CLC_ATOMIC_ADDRESS_SPACE global 3 | #include "../atom_int32_binary.inc" 4 | -------------------------------------------------------------------------------- /generic/lib/cl_khr_global_int32_base_atomics/atom_cmpxchg.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(volatile global TYPE *p, TYPE cmp, TYPE val) { \ 5 | return atomic_cmpxchg(p, cmp, val); \ 6 | } 7 | 8 | IMPL(int) 9 | IMPL(unsigned int) 10 | -------------------------------------------------------------------------------- /generic/lib/cl_khr_global_int32_base_atomics/atom_dec.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atom_dec(volatile global TYPE *p) { \ 5 | return atomic_dec(p); \ 6 | } 7 | 8 | IMPL(int) 9 | IMPL(unsigned int) 10 | -------------------------------------------------------------------------------- /generic/lib/cl_khr_global_int32_base_atomics/atom_inc.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atom_inc(volatile global TYPE *p) { \ 5 | return atomic_inc(p); \ 6 | } 7 | 8 | IMPL(int) 9 | IMPL(unsigned int) 10 | -------------------------------------------------------------------------------- /generic/lib/cl_khr_global_int32_base_atomics/atom_sub.cl: -------------------------------------------------------------------------------- 1 | #define __CLC_ATOMIC_OP sub 2 | #define __CLC_ATOMIC_ADDRESS_SPACE global 3 | #include "../atom_int32_binary.inc" 4 | -------------------------------------------------------------------------------- /generic/lib/cl_khr_global_int32_base_atomics/atom_xchg.cl: -------------------------------------------------------------------------------- 1 | #define __CLC_ATOMIC_OP xchg 2 | #define __CLC_ATOMIC_ADDRESS_SPACE global 3 | #include "../atom_int32_binary.inc" 4 | -------------------------------------------------------------------------------- /generic/lib/cl_khr_global_int32_extended_atomics/atom_and.cl: -------------------------------------------------------------------------------- 1 | #define __CLC_ATOMIC_OP and 2 | #define __CLC_ATOMIC_ADDRESS_SPACE global 3 | #include "../atom_int32_binary.inc" 4 | -------------------------------------------------------------------------------- /generic/lib/cl_khr_global_int32_extended_atomics/atom_max.cl: -------------------------------------------------------------------------------- 1 | #define __CLC_ATOMIC_OP max 2 | #define __CLC_ATOMIC_ADDRESS_SPACE global 3 | #include "../atom_int32_binary.inc" 4 | -------------------------------------------------------------------------------- /generic/lib/cl_khr_global_int32_extended_atomics/atom_min.cl: -------------------------------------------------------------------------------- 1 | #define __CLC_ATOMIC_OP min 2 | #define __CLC_ATOMIC_ADDRESS_SPACE global 3 | #include "../atom_int32_binary.inc" 4 | -------------------------------------------------------------------------------- /generic/lib/cl_khr_global_int32_extended_atomics/atom_or.cl: -------------------------------------------------------------------------------- 1 | #define __CLC_ATOMIC_OP or 2 | #define __CLC_ATOMIC_ADDRESS_SPACE global 3 | #include "../atom_int32_binary.inc" 4 | -------------------------------------------------------------------------------- /generic/lib/cl_khr_global_int32_extended_atomics/atom_xor.cl: -------------------------------------------------------------------------------- 1 | #define __CLC_ATOMIC_OP xor 2 | #define __CLC_ATOMIC_ADDRESS_SPACE global 3 | #include "../atom_int32_binary.inc" 4 | -------------------------------------------------------------------------------- /generic/lib/cl_khr_int64_base_atomics/atom_add.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifdef cl_khr_int64_base_atomics 4 | 5 | #define IMPL(AS, TYPE) \ 6 | _CLC_OVERLOAD _CLC_DEF TYPE atom_add(volatile AS TYPE *p, TYPE val) { \ 7 | return __sync_fetch_and_add_8(p, val); \ 8 | } 9 | 10 | IMPL(global, long) 11 | IMPL(global, unsigned long) 12 | IMPL(local, long) 13 | IMPL(local, unsigned long) 14 | #undef IMPL 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /generic/lib/cl_khr_int64_base_atomics/atom_cmpxchg.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifdef cl_khr_int64_base_atomics 4 | 5 | #define IMPL(AS, TYPE) \ 6 | _CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(volatile AS TYPE *p, TYPE cmp, TYPE val) { \ 7 | return __sync_val_compare_and_swap_8(p, cmp, val); \ 8 | } 9 | 10 | IMPL(global, long) 11 | IMPL(global, unsigned long) 12 | IMPL(local, long) 13 | IMPL(local, unsigned long) 14 | #undef IMPL 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /generic/lib/cl_khr_int64_base_atomics/atom_dec.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifdef cl_khr_int64_base_atomics 4 | 5 | #define IMPL(AS, TYPE) \ 6 | _CLC_OVERLOAD _CLC_DEF TYPE atom_dec(volatile AS TYPE *p) { \ 7 | return atom_sub(p, (TYPE)1); \ 8 | } 9 | 10 | IMPL(global, long) 11 | IMPL(global, unsigned long) 12 | IMPL(local, long) 13 | IMPL(local, unsigned long) 14 | #undef IMPL 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /generic/lib/cl_khr_int64_base_atomics/atom_inc.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifdef cl_khr_int64_base_atomics 4 | 5 | #define IMPL(AS, TYPE) \ 6 | _CLC_OVERLOAD _CLC_DEF TYPE atom_inc(volatile AS TYPE *p) { \ 7 | return atom_add(p, (TYPE)1); \ 8 | } 9 | 10 | IMPL(global, long) 11 | IMPL(global, unsigned long) 12 | IMPL(local, long) 13 | IMPL(local, unsigned long) 14 | #undef IMPL 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /generic/lib/cl_khr_int64_base_atomics/atom_sub.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifdef cl_khr_int64_base_atomics 4 | 5 | #define IMPL(AS, TYPE) \ 6 | _CLC_OVERLOAD _CLC_DEF TYPE atom_sub(volatile AS TYPE *p, TYPE val) { \ 7 | return __sync_fetch_and_sub_8(p, val); \ 8 | } 9 | 10 | IMPL(global, long) 11 | IMPL(global, unsigned long) 12 | IMPL(local, long) 13 | IMPL(local, unsigned long) 14 | #undef IMPL 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /generic/lib/cl_khr_int64_base_atomics/atom_xchg.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifdef cl_khr_int64_base_atomics 4 | 5 | #define IMPL(AS, TYPE) \ 6 | _CLC_OVERLOAD _CLC_DEF TYPE atom_xchg(volatile AS TYPE *p, TYPE val) { \ 7 | return __sync_swap_8(p, val); \ 8 | } 9 | 10 | IMPL(global, long) 11 | IMPL(global, unsigned long) 12 | IMPL(local, long) 13 | IMPL(local, unsigned long) 14 | #undef IMPL 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /generic/lib/cl_khr_int64_extended_atomics/atom_and.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifdef cl_khr_int64_extended_atomics 4 | 5 | #define IMPL(AS, TYPE) \ 6 | _CLC_OVERLOAD _CLC_DEF TYPE atom_and(volatile AS TYPE *p, TYPE val) { \ 7 | return __sync_fetch_and_and_8(p, val); \ 8 | } 9 | 10 | IMPL(global, long) 11 | IMPL(global, unsigned long) 12 | IMPL(local, long) 13 | IMPL(local, unsigned long) 14 | #undef IMPL 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /generic/lib/cl_khr_int64_extended_atomics/atom_max.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifdef cl_khr_int64_extended_atomics 4 | 5 | unsigned long __clc__sync_fetch_and_max_local_8(volatile local long *, long); 6 | unsigned long __clc__sync_fetch_and_max_global_8(volatile global long *, long); 7 | unsigned long __clc__sync_fetch_and_umax_local_8(volatile local unsigned long *, unsigned long); 8 | unsigned long __clc__sync_fetch_and_umax_global_8(volatile global unsigned long *, unsigned long); 9 | 10 | #define IMPL(AS, TYPE, OP) \ 11 | _CLC_OVERLOAD _CLC_DEF TYPE atom_max(volatile AS TYPE *p, TYPE val) { \ 12 | return __clc__sync_fetch_and_##OP##_##AS##_8(p, val); \ 13 | } 14 | 15 | IMPL(global, long, max) 16 | IMPL(global, unsigned long, umax) 17 | IMPL(local, long, max) 18 | IMPL(local, unsigned long, umax) 19 | #undef IMPL 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /generic/lib/cl_khr_int64_extended_atomics/atom_min.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifdef cl_khr_int64_extended_atomics 4 | 5 | unsigned long __clc__sync_fetch_and_min_local_8(volatile local long *, long); 6 | unsigned long __clc__sync_fetch_and_min_global_8(volatile global long *, long); 7 | unsigned long __clc__sync_fetch_and_umin_local_8(volatile local unsigned long *, unsigned long); 8 | unsigned long __clc__sync_fetch_and_umin_global_8(volatile global unsigned long *, unsigned long); 9 | 10 | #define IMPL(AS, TYPE, OP) \ 11 | _CLC_OVERLOAD _CLC_DEF TYPE atom_min(volatile AS TYPE *p, TYPE val) { \ 12 | return __clc__sync_fetch_and_##OP##_##AS##_8(p, val); \ 13 | } 14 | 15 | IMPL(global, long, min) 16 | IMPL(global, unsigned long, umin) 17 | IMPL(local, long, min) 18 | IMPL(local, unsigned long, umin) 19 | #undef IMPL 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /generic/lib/cl_khr_int64_extended_atomics/atom_or.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifdef cl_khr_int64_extended_atomics 4 | 5 | #define IMPL(AS, TYPE) \ 6 | _CLC_OVERLOAD _CLC_DEF TYPE atom_or(volatile AS TYPE *p, TYPE val) { \ 7 | return __sync_fetch_and_or_8(p, val); \ 8 | } 9 | 10 | IMPL(global, long) 11 | IMPL(global, unsigned long) 12 | IMPL(local, long) 13 | IMPL(local, unsigned long) 14 | #undef IMPL 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /generic/lib/cl_khr_int64_extended_atomics/atom_xor.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifdef cl_khr_int64_extended_atomics 4 | 5 | #define IMPL(AS, TYPE) \ 6 | _CLC_OVERLOAD _CLC_DEF TYPE atom_xor(volatile AS TYPE *p, TYPE val) { \ 7 | return __sync_fetch_and_xor_8(p, val); \ 8 | } 9 | 10 | IMPL(global, long) 11 | IMPL(global, unsigned long) 12 | IMPL(local, long) 13 | IMPL(local, unsigned long) 14 | #undef IMPL 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /generic/lib/cl_khr_local_int32_base_atomics/atom_add.cl: -------------------------------------------------------------------------------- 1 | #define __CLC_ATOMIC_OP add 2 | #define __CLC_ATOMIC_ADDRESS_SPACE local 3 | #include "../atom_int32_binary.inc" 4 | -------------------------------------------------------------------------------- /generic/lib/cl_khr_local_int32_base_atomics/atom_cmpxchg.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(volatile local TYPE *p, TYPE cmp, TYPE val) { \ 5 | return atomic_cmpxchg(p, cmp, val); \ 6 | } 7 | 8 | IMPL(int) 9 | IMPL(unsigned int) 10 | -------------------------------------------------------------------------------- /generic/lib/cl_khr_local_int32_base_atomics/atom_dec.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atom_dec(volatile local TYPE *p) { \ 5 | return atomic_dec(p); \ 6 | } 7 | 8 | IMPL(int) 9 | IMPL(unsigned int) 10 | -------------------------------------------------------------------------------- /generic/lib/cl_khr_local_int32_base_atomics/atom_inc.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define IMPL(TYPE) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE atom_inc(volatile local TYPE *p) { \ 5 | return atomic_inc(p); \ 6 | } 7 | 8 | IMPL(int) 9 | IMPL(unsigned int) 10 | -------------------------------------------------------------------------------- /generic/lib/cl_khr_local_int32_base_atomics/atom_sub.cl: -------------------------------------------------------------------------------- 1 | #define __CLC_ATOMIC_OP sub 2 | #define __CLC_ATOMIC_ADDRESS_SPACE local 3 | #include "../atom_int32_binary.inc" 4 | -------------------------------------------------------------------------------- /generic/lib/cl_khr_local_int32_base_atomics/atom_xchg.cl: -------------------------------------------------------------------------------- 1 | #define __CLC_ATOMIC_OP xchg 2 | #define __CLC_ATOMIC_ADDRESS_SPACE local 3 | #include "../atom_int32_binary.inc" 4 | -------------------------------------------------------------------------------- /generic/lib/cl_khr_local_int32_extended_atomics/atom_and.cl: -------------------------------------------------------------------------------- 1 | #define __CLC_ATOMIC_OP and 2 | #define __CLC_ATOMIC_ADDRESS_SPACE local 3 | #include "../atom_int32_binary.inc" 4 | -------------------------------------------------------------------------------- /generic/lib/cl_khr_local_int32_extended_atomics/atom_max.cl: -------------------------------------------------------------------------------- 1 | #define __CLC_ATOMIC_OP max 2 | #define __CLC_ATOMIC_ADDRESS_SPACE local 3 | #include "../atom_int32_binary.inc" 4 | -------------------------------------------------------------------------------- /generic/lib/cl_khr_local_int32_extended_atomics/atom_min.cl: -------------------------------------------------------------------------------- 1 | #define __CLC_ATOMIC_OP min 2 | #define __CLC_ATOMIC_ADDRESS_SPACE local 3 | #include "../atom_int32_binary.inc" 4 | -------------------------------------------------------------------------------- /generic/lib/cl_khr_local_int32_extended_atomics/atom_or.cl: -------------------------------------------------------------------------------- 1 | #define __CLC_ATOMIC_OP or 2 | #define __CLC_ATOMIC_ADDRESS_SPACE local 3 | #include "../atom_int32_binary.inc" 4 | -------------------------------------------------------------------------------- /generic/lib/cl_khr_local_int32_extended_atomics/atom_xor.cl: -------------------------------------------------------------------------------- 1 | #define __CLC_ATOMIC_OP xor 2 | #define __CLC_ATOMIC_ADDRESS_SPACE local 3 | #include "../atom_int32_binary.inc" 4 | -------------------------------------------------------------------------------- /generic/lib/clc_unary.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x) { 2 | return __CLC_IMPL_FUNC(x); 3 | } 4 | -------------------------------------------------------------------------------- /generic/lib/common/mix.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #include 5 | -------------------------------------------------------------------------------- /generic/lib/common/mix.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE mix(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE a) { 2 | return mad( y - x, a, x ); 3 | } 4 | 5 | #ifndef __CLC_SCALAR 6 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE mix(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_SCALAR_GENTYPE a) { 7 | return mix(x, y, (__CLC_GENTYPE)a); 8 | } 9 | #endif 10 | -------------------------------------------------------------------------------- /generic/lib/common/sign.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../clcmacro.h" 3 | 4 | #define SIGN(TYPE, F) \ 5 | _CLC_DEF _CLC_OVERLOAD TYPE sign(TYPE x) { \ 6 | if (isnan(x)) { \ 7 | return 0.0F; \ 8 | } \ 9 | if (x > 0.0F) { \ 10 | return 1.0F; \ 11 | } \ 12 | if (x < 0.0F) { \ 13 | return -1.0F; \ 14 | } \ 15 | return x; /* -0.0 or +0.0 */ \ 16 | } 17 | 18 | SIGN(float, f) 19 | _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, sign, float) 20 | 21 | #ifdef cl_khr_fp64 22 | 23 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 24 | 25 | SIGN(double, ) 26 | _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sign, double) 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /generic/lib/geometric/cross.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_OVERLOAD _CLC_DEF float3 cross(float3 p0, float3 p1) { 4 | return (float3)(p0.y*p1.z - p0.z*p1.y, p0.z*p1.x - p0.x*p1.z, 5 | p0.x*p1.y - p0.y*p1.x); 6 | } 7 | 8 | _CLC_OVERLOAD _CLC_DEF float4 cross(float4 p0, float4 p1) { 9 | return (float4)(p0.y*p1.z - p0.z*p1.y, p0.z*p1.x - p0.x*p1.z, 10 | p0.x*p1.y - p0.y*p1.x, 0.f); 11 | } 12 | 13 | #ifdef cl_khr_fp64 14 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 15 | 16 | _CLC_OVERLOAD _CLC_DEF double3 cross(double3 p0, double3 p1) { 17 | return (double3)(p0.y*p1.z - p0.z*p1.y, p0.z*p1.x - p0.x*p1.z, 18 | p0.x*p1.y - p0.y*p1.x); 19 | } 20 | 21 | _CLC_OVERLOAD _CLC_DEF double4 cross(double4 p0, double4 p1) { 22 | return (double4)(p0.y*p1.z - p0.z*p1.y, p0.z*p1.x - p0.x*p1.z, 23 | p0.x*p1.y - p0.y*p1.x, 0.f); 24 | } 25 | #endif 26 | -------------------------------------------------------------------------------- /generic/lib/integer/abs.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #include 5 | -------------------------------------------------------------------------------- /generic/lib/integer/abs.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DEF __CLC_U_GENTYPE abs(__CLC_GENTYPE x) { 2 | return __builtin_astype((__CLC_GENTYPE)(x > (__CLC_GENTYPE)(0) ? x : -x), __CLC_U_GENTYPE); 3 | } 4 | -------------------------------------------------------------------------------- /generic/lib/integer/abs_diff.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #include 5 | -------------------------------------------------------------------------------- /generic/lib/integer/abs_diff.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DEF __CLC_U_GENTYPE abs_diff(__CLC_GENTYPE x, __CLC_GENTYPE y) { 2 | return __builtin_astype((__CLC_GENTYPE)(x > y ? x-y : y-x), __CLC_U_GENTYPE); 3 | } 4 | -------------------------------------------------------------------------------- /generic/lib/integer/hadd.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #include 5 | -------------------------------------------------------------------------------- /generic/lib/integer/hadd.inc: -------------------------------------------------------------------------------- 1 | //hadd = (x+y)>>1 2 | //This can be simplified to x>>1 + y>>1 + (1 if both x and y have the 1s bit set) 3 | //This saves us having to do any checks for overflow in the addition sum 4 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE hadd(__CLC_GENTYPE x, __CLC_GENTYPE y) { 5 | return (x>>(__CLC_GENTYPE)1)+(y>>(__CLC_GENTYPE)1)+(x&y&(__CLC_GENTYPE)1); 6 | } 7 | -------------------------------------------------------------------------------- /generic/lib/integer/mad24.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #include 5 | -------------------------------------------------------------------------------- /generic/lib/integer/mad24.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE mad24(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z){ 2 | return mul24(x, y) + z; 3 | } 4 | -------------------------------------------------------------------------------- /generic/lib/integer/mul24.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #include 5 | -------------------------------------------------------------------------------- /generic/lib/integer/mul24.inc: -------------------------------------------------------------------------------- 1 | 2 | // We need to use shifts here in order to mantain the sign bit for signed 3 | // integers. The compiler should optimize this to (x & 0x00FFFFFF) for 4 | // unsigned integers. 5 | #define CONVERT_TO_24BIT(x) (((x) << 8) >> 8) 6 | 7 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE mul24(__CLC_GENTYPE x, __CLC_GENTYPE y){ 8 | return CONVERT_TO_24BIT(x) * CONVERT_TO_24BIT(y); 9 | } 10 | 11 | #undef CONVERT_TO_24BIT 12 | -------------------------------------------------------------------------------- /generic/lib/integer/popcount.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define __CLC_FUNC popcount 5 | #define __CLC_IMPL_FUNC __clc_native_popcount 6 | 7 | #define __CLC_BODY "../clc_unary.inc" 8 | #include 9 | -------------------------------------------------------------------------------- /generic/lib/integer/rhadd.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #include 5 | -------------------------------------------------------------------------------- /generic/lib/integer/rhadd.inc: -------------------------------------------------------------------------------- 1 | //rhadd = (x+y+1)>>1 2 | //This can be simplified to x>>1 + y>>1 + (1 if either x or y have the 1s bit set) 3 | //This saves us having to do any checks for overflow in the addition sums 4 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE rhadd(__CLC_GENTYPE x, __CLC_GENTYPE y) { 5 | return (x>>(__CLC_GENTYPE)1)+(y>>(__CLC_GENTYPE)1)+((x&(__CLC_GENTYPE)1)|(y&(__CLC_GENTYPE)1)); 6 | } 7 | -------------------------------------------------------------------------------- /generic/lib/integer/rotate.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #include 5 | -------------------------------------------------------------------------------- /generic/lib/math/acos.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #include 5 | -------------------------------------------------------------------------------- /generic/lib/math/acos.inc: -------------------------------------------------------------------------------- 1 | /* 2 | * There are multiple formulas for calculating arccosine of x: 3 | * 1) acos(x) = (1/2*pi) + i * ln(i*x + sqrt(1-x^2)) (notice the 'i'...) 4 | * 2) acos(x) = pi/2 + asin(-x) (asin isn't implemented yet) 5 | * 3) acos(x) = pi/2 - asin(x) (ditto) 6 | * 4) acos(x) = 2*atan2(sqrt(1-x), sqrt(1+x)) 7 | * 5) acos(x) = pi/2 - atan2(x, ( sqrt(1-x^2) ) ) 8 | * 9 | * Options 1-3 are not currently usable, #5 generates more concise radeonsi 10 | * bitcode and assembly than #4 (134 vs 132 instructions on radeonsi), but 11 | * precision of #4 may be better. 12 | */ 13 | 14 | // TODO: Enable half precision when atan2 is implemented 15 | #if __CLC_FPSIZE > 16 16 | 17 | #if __CLC_FPSIZE == 64 18 | #define __CLC_CONST(x) x 19 | #elif __CLC_FPSIZE == 32 20 | #define __CLC_CONST(x) x ## f 21 | #elif __CLC_FPSIZE == 16 22 | #define __CLC_CONST(x) x ## h 23 | #endif 24 | 25 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE acos(__CLC_GENTYPE x) { 26 | return ( 27 | (__CLC_GENTYPE) __CLC_CONST(2.0) * atan2( 28 | sqrt((__CLC_GENTYPE) __CLC_CONST(1.0) - x), 29 | sqrt((__CLC_GENTYPE) __CLC_CONST(1.0) + x) 30 | ) 31 | ); 32 | } 33 | 34 | #undef __CLC_CONST 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /generic/lib/math/asin.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #include 5 | -------------------------------------------------------------------------------- /generic/lib/math/asin.inc: -------------------------------------------------------------------------------- 1 | // TODO: Enable half precision when atan2 is implemented 2 | #if __CLC_FPSIZE > 16 3 | 4 | #if __CLC_FPSIZE == 64 5 | #define __CLC_CONST(x) x 6 | #elif __CLC_FPSIZE == 32 7 | #define __CLC_CONST(x) x ## f 8 | #elif __CLC_FPSIZE == 16 9 | #define __CLC_CONST(x) x ## h 10 | #endif 11 | 12 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE asin(__CLC_GENTYPE x) { 13 | return atan2(x, sqrt( (__CLC_GENTYPE)__CLC_CONST(1.0) - (x*x) )); 14 | } 15 | 16 | #undef __CLC_CONST 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /generic/lib/math/binary_impl.inc: -------------------------------------------------------------------------------- 1 | 2 | #ifndef __CLC_SCALAR 3 | 4 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE x, __CLC_GENTYPE y) { 5 | return FUNCTION_IMPL(x, y); 6 | } 7 | 8 | #endif 9 | 10 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE x, float y) { 11 | __CLC_GENTYPE vec_y = (__CLC_GENTYPE) (y); 12 | return FUNCTION_IMPL(x, vec_y); 13 | } 14 | 15 | #ifdef cl_khr_fp64 16 | 17 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE x, double y) { 18 | __CLC_GENTYPE vec_y = (__CLC_GENTYPE) (y); 19 | return FUNCTION_IMPL(x, vec_y); 20 | } 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /generic/lib/math/ceil.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../clcmacro.h" 3 | 4 | // Map the llvm intrinsic to an OpenCL function. 5 | #define __CLC_FUNCTION __clc_ceil 6 | #define __CLC_INTRINSIC "llvm.ceil" 7 | #include "math/unary_intrin.inc" 8 | 9 | #undef __CLC_FUNCTION 10 | #define __CLC_FUNCTION ceil 11 | #include "unary_builtin.inc" 12 | -------------------------------------------------------------------------------- /generic/lib/math/clc_sw_binary.inc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_SW_FUNC(x) __CLC_CONCAT(__clc_, x) 4 | 5 | // TODO: Enable half precision when the sw routine is implemented 6 | #if __CLC_FPSIZE > 16 7 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x, __CLC_GENTYPE y) { 8 | return __CLC_SW_FUNC(__CLC_FUNC)(x, y); 9 | } 10 | #endif 11 | 12 | #undef __CLC_SW_FUNC 13 | -------------------------------------------------------------------------------- /generic/lib/math/clc_sw_unary.inc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_SW_FUNC(x) __CLC_CONCAT(__clc_, x) 4 | 5 | // TODO: Enable half precision when the sw routine is implemented 6 | #if __CLC_FPSIZE > 16 7 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x) { 8 | return __CLC_SW_FUNC(__CLC_FUNC)(x); 9 | } 10 | #endif 11 | 12 | #undef __CLC_SW_FUNC 13 | -------------------------------------------------------------------------------- /generic/lib/math/copysign.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../clcmacro.h" 3 | 4 | _CLC_DEFINE_BINARY_BUILTIN(float, copysign, __builtin_copysignf, float, float) 5 | 6 | #ifdef cl_khr_fp64 7 | 8 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 9 | 10 | _CLC_DEFINE_BINARY_BUILTIN(double, copysign, __builtin_copysign, double, double) 11 | 12 | #endif 13 | 14 | #ifdef cl_khr_fp16 15 | 16 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable 17 | 18 | _CLC_DEF _CLC_OVERLOAD half copysign(half x, half y) 19 | { 20 | ushort sign_x = as_ushort(x) & 0x8000u; 21 | ushort unsigned_y = as_ushort(y) & 0x7ffffu; 22 | 23 | return as_half((ushort)(sign_x | unsigned_y)); 24 | } 25 | _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, copysign, half, half) 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /generic/lib/math/exp10.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define __CLC_FUNC exp10 5 | #define __CLC_BODY 6 | #include 7 | -------------------------------------------------------------------------------- /generic/lib/math/fabs.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../clcmacro.h" 3 | 4 | // Map the llvm intrinsic to an OpenCL function. 5 | #define __CLC_FUNCTION __clc_fabs 6 | #define __CLC_INTRINSIC "llvm.fabs" 7 | #include "math/unary_intrin.inc" 8 | 9 | #undef __CLC_FUNCTION 10 | #define __CLC_FUNCTION fabs 11 | #include "unary_builtin.inc" 12 | -------------------------------------------------------------------------------- /generic/lib/math/fdim.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "math.h" 4 | 5 | #define __CLC_BODY 6 | #include 7 | -------------------------------------------------------------------------------- /generic/lib/math/floor.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../clcmacro.h" 3 | 4 | // Map the llvm intrinsic to an OpenCL function. 5 | #define __CLC_FUNCTION __clc_floor 6 | #define __CLC_INTRINSIC "llvm.floor" 7 | #include "math/unary_intrin.inc" 8 | 9 | #undef __CLC_FUNCTION 10 | #define __CLC_FUNCTION floor 11 | #include "unary_builtin.inc" 12 | -------------------------------------------------------------------------------- /generic/lib/math/fma.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "math.h" 4 | #include "math/clc_fma.h" 5 | 6 | #define __CLC_BODY 7 | #include 8 | -------------------------------------------------------------------------------- /generic/lib/math/fma.inc: -------------------------------------------------------------------------------- 1 | _CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE fma(__CLC_GENTYPE a, __CLC_GENTYPE b, __CLC_GENTYPE c) { 2 | #if __CLC_FPSIZE == 32 && HAVE_HW_FMA32() == 0 3 | return __clc_sw_fma(a, b, c); 4 | #else 5 | return __clc_fma(a, b, c); 6 | #endif 7 | } 8 | -------------------------------------------------------------------------------- /generic/lib/math/fmax.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "../clcmacro.h" 4 | 5 | _CLC_DEFINE_BINARY_BUILTIN(float, fmax, __builtin_fmaxf, float, float); 6 | 7 | #ifdef cl_khr_fp64 8 | 9 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 10 | 11 | _CLC_DEFINE_BINARY_BUILTIN(double, fmax, __builtin_fmax, double, double); 12 | 13 | #endif 14 | 15 | #ifdef cl_khr_fp16 16 | 17 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable 18 | 19 | _CLC_DEF _CLC_OVERLOAD half fmax(half x, half y) 20 | { 21 | if (isnan(x)) 22 | return y; 23 | if (isnan(y)) 24 | return x; 25 | return (x < y) ? y : x; 26 | } 27 | _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, fmax, half, half) 28 | 29 | #endif 30 | 31 | #define __CLC_BODY 32 | #include 33 | -------------------------------------------------------------------------------- /generic/lib/math/fmax.inc: -------------------------------------------------------------------------------- 1 | 2 | #if !defined(__CLC_SCALAR) 3 | 4 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fmax(__CLC_GENTYPE x, float y) { 5 | return fmax(x, (__CLC_GENTYPE)((__CLC_SCALAR_GENTYPE)y)); 6 | } 7 | 8 | #ifdef cl_khr_fp64 9 | 10 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 11 | 12 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fmax(__CLC_GENTYPE x, double y) { 13 | return fmax(x, (__CLC_GENTYPE)((__CLC_SCALAR_GENTYPE)y)); 14 | } 15 | 16 | #endif // ifdef cl_khr_fp64 17 | 18 | #ifdef cl_khr_fp16 19 | 20 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable 21 | 22 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fmax(__CLC_GENTYPE x, half y) { 23 | return fmax(x, (__CLC_GENTYPE)((__CLC_SCALAR_GENTYPE)y)); 24 | } 25 | 26 | #endif // ifdef cl_khr_fp16 27 | 28 | #endif // !defined(__CLC_SCALAR) 29 | -------------------------------------------------------------------------------- /generic/lib/math/fmin.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "../clcmacro.h" 4 | 5 | _CLC_DEFINE_BINARY_BUILTIN(float, fmin, __builtin_fminf, float, float); 6 | 7 | #ifdef cl_khr_fp64 8 | 9 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 10 | 11 | _CLC_DEFINE_BINARY_BUILTIN(double, fmin, __builtin_fmin, double, double); 12 | 13 | #endif 14 | #ifdef cl_khr_fp16 15 | 16 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable 17 | 18 | _CLC_DEF _CLC_OVERLOAD half fmin(half x, half y) 19 | { 20 | if (isnan(x)) 21 | return y; 22 | if (isnan(y)) 23 | return x; 24 | return (y < x) ? y : x; 25 | } 26 | _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, fmin, half, half) 27 | 28 | #endif 29 | 30 | #define __CLC_BODY 31 | #include 32 | -------------------------------------------------------------------------------- /generic/lib/math/fmin.inc: -------------------------------------------------------------------------------- 1 | 2 | #if !defined(__CLC_SCALAR) 3 | 4 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fmin(__CLC_GENTYPE x, float y) { 5 | return fmin(x, (__CLC_GENTYPE)((__CLC_SCALAR_GENTYPE)y)); 6 | } 7 | 8 | #ifdef cl_khr_fp64 9 | 10 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 11 | 12 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fmin(__CLC_GENTYPE x, double y) { 13 | return fmin(x, (__CLC_GENTYPE)((__CLC_SCALAR_GENTYPE)y)); 14 | } 15 | 16 | #endif // ifdef cl_khr_fp64 17 | 18 | #ifdef cl_khr_fp16 19 | 20 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable 21 | 22 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fmin(__CLC_GENTYPE x, half y) { 23 | return fmin(x, (__CLC_GENTYPE)((__CLC_SCALAR_GENTYPE)y)); 24 | } 25 | 26 | #endif // ifdef cl_khr_fp16 27 | 28 | #endif // !defined(__CLC_SCALAR) 29 | -------------------------------------------------------------------------------- /generic/lib/math/fmod.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define __CLC_FUNC fmod 5 | #define __CLC_BODY 6 | #include 7 | -------------------------------------------------------------------------------- /generic/lib/math/frexp.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define __CLC_BODY 5 | #define __CLC_ADDRESS_SPACE private 6 | #include 7 | #undef __CLC_ADDRESS_SPACE 8 | 9 | #define __CLC_BODY 10 | #define __CLC_ADDRESS_SPACE global 11 | #include 12 | #undef __CLC_ADDRESS_SPACE 13 | 14 | #define __CLC_BODY 15 | #define __CLC_ADDRESS_SPACE local 16 | #include 17 | #undef __CLC_ADDRESS_SPACE 18 | -------------------------------------------------------------------------------- /generic/lib/math/half_binary.inc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_HALF_FUNC(x) __CLC_CONCAT(half_, x) 4 | 5 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_HALF_FUNC(__CLC_FUNC)(__CLC_GENTYPE x, __CLC_GENTYPE y) { 6 | return __CLC_FUNC(x, y); 7 | } 8 | 9 | #undef __CLC_HALF_FUNC 10 | -------------------------------------------------------------------------------- /generic/lib/math/half_cos.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_FUNC cos 4 | #define __CLC_BODY 5 | #define __FLOAT_ONLY 6 | #include 7 | -------------------------------------------------------------------------------- /generic/lib/math/half_divide.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define divide(x,y) (x/y) 4 | 5 | #define __CLC_FUNC divide 6 | #define __CLC_BODY 7 | #define __FLOAT_ONLY 8 | #include 9 | #undef divide 10 | -------------------------------------------------------------------------------- /generic/lib/math/half_exp.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_FUNC exp 4 | #define __CLC_BODY 5 | #define __FLOAT_ONLY 6 | #include 7 | -------------------------------------------------------------------------------- /generic/lib/math/half_exp10.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_FUNC exp10 4 | #define __CLC_BODY 5 | #define __FLOAT_ONLY 6 | #include 7 | -------------------------------------------------------------------------------- /generic/lib/math/half_exp2.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_FUNC exp2 4 | #define __CLC_BODY 5 | #define __FLOAT_ONLY 6 | #include 7 | -------------------------------------------------------------------------------- /generic/lib/math/half_log.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_FUNC log 4 | #define __CLC_BODY 5 | #define __FLOAT_ONLY 6 | #include 7 | -------------------------------------------------------------------------------- /generic/lib/math/half_log10.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_FUNC log10 4 | #define __CLC_BODY 5 | #define __FLOAT_ONLY 6 | #include 7 | -------------------------------------------------------------------------------- /generic/lib/math/half_log2.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_FUNC log2 4 | #define __CLC_BODY 5 | #define __FLOAT_ONLY 6 | #include 7 | -------------------------------------------------------------------------------- /generic/lib/math/half_powr.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_FUNC powr 4 | #define __CLC_BODY 5 | #define __FLOAT_ONLY 6 | #include 7 | -------------------------------------------------------------------------------- /generic/lib/math/half_recip.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define recip(x) (1.0f/x) 4 | 5 | #define __CLC_FUNC recip 6 | #define __CLC_BODY 7 | #define __FLOAT_ONLY 8 | #include 9 | 10 | #undef recip 11 | -------------------------------------------------------------------------------- /generic/lib/math/half_rsqrt.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_FUNC rsqrt 4 | #define __CLC_BODY 5 | #define __FLOAT_ONLY 6 | #include 7 | -------------------------------------------------------------------------------- /generic/lib/math/half_sin.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_FUNC sin 4 | #define __CLC_BODY 5 | #define __FLOAT_ONLY 6 | #include 7 | -------------------------------------------------------------------------------- /generic/lib/math/half_sqrt.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_FUNC sqrt 4 | #define __CLC_BODY 5 | #define __FLOAT_ONLY 6 | #include 7 | -------------------------------------------------------------------------------- /generic/lib/math/half_tan.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_FUNC tan 4 | #define __CLC_BODY 5 | #define __FLOAT_ONLY 6 | #include 7 | -------------------------------------------------------------------------------- /generic/lib/math/half_unary.inc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_HALF_FUNC(x) __CLC_CONCAT(half_, x) 4 | 5 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_HALF_FUNC(__CLC_FUNC)(__CLC_GENTYPE val) { 6 | return __CLC_FUNC(val); 7 | } 8 | 9 | #undef __CLC_HALF_FUNC 10 | -------------------------------------------------------------------------------- /generic/lib/math/hypot.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #define __CLC_FUNC hypot 6 | #define __CLC_BODY 7 | #include 8 | -------------------------------------------------------------------------------- /generic/lib/math/log.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../clcmacro.h" 3 | 4 | /* 5 | *log(x) = log2(x) * (1/log2(e)) 6 | */ 7 | 8 | _CLC_OVERLOAD _CLC_DEF float log(float x) 9 | { 10 | return log2(x) * (1.0f / M_LOG2E_F); 11 | } 12 | 13 | _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, log, float); 14 | 15 | #ifdef cl_khr_fp64 16 | 17 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 18 | 19 | _CLC_OVERLOAD _CLC_DEF double log(double x) 20 | { 21 | return log2(x) * (1.0 / M_LOG2E); 22 | } 23 | 24 | _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, log, double); 25 | 26 | #endif // cl_khr_fp64 27 | -------------------------------------------------------------------------------- /generic/lib/math/logb.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "math.h" 3 | #include "../clcmacro.h" 4 | 5 | _CLC_OVERLOAD _CLC_DEF float logb(float x) { 6 | int ax = as_int(x) & EXSIGNBIT_SP32; 7 | float s = -118 - clz(ax); 8 | float r = (ax >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32; 9 | r = ax >= PINFBITPATT_SP32 ? as_float(ax) : r; 10 | r = ax < 0x00800000 ? s : r; 11 | r = ax == 0 ? as_float(NINFBITPATT_SP32) : r; 12 | return r; 13 | } 14 | 15 | _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, logb, float); 16 | 17 | #ifdef cl_khr_fp64 18 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 19 | 20 | _CLC_OVERLOAD _CLC_DEF double logb(double x) { 21 | long ax = as_long(x) & EXSIGNBIT_DP64; 22 | double s = -1011L - clz(ax); 23 | double r = (int) (ax >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64; 24 | r = ax >= PINFBITPATT_DP64 ? as_double(ax) : r; 25 | r = ax < 0x0010000000000000L ? s : r; 26 | r = ax == 0L ? as_double(NINFBITPATT_DP64) : r; 27 | return r; 28 | } 29 | 30 | _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, logb, double) 31 | #endif 32 | -------------------------------------------------------------------------------- /generic/lib/math/mad.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #include 5 | -------------------------------------------------------------------------------- /generic/lib/math/mad.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE mad(__CLC_GENTYPE a, __CLC_GENTYPE b, __CLC_GENTYPE c) { 2 | return a * b + c; 3 | } 4 | -------------------------------------------------------------------------------- /generic/lib/math/maxmag.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define __CLC_BODY 5 | #include 6 | -------------------------------------------------------------------------------- /generic/lib/math/maxmag.inc: -------------------------------------------------------------------------------- 1 | #ifdef __CLC_SCALAR 2 | #define __CLC_VECSIZE 3 | #endif 4 | 5 | #if __CLC_FPSIZE == 64 6 | #define __CLC_CONVERT_NATN __CLC_XCONCAT(convert_long, __CLC_VECSIZE) 7 | #elif __CLC_FPSIZE == 32 8 | #define __CLC_CONVERT_NATN __CLC_XCONCAT(convert_int, __CLC_VECSIZE) 9 | #elif __CLC_FPSIZE == 16 10 | #define __CLC_CONVERT_NATN __CLC_XCONCAT(convert_short, __CLC_VECSIZE) 11 | #endif 12 | 13 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE maxmag(__CLC_GENTYPE x, __CLC_GENTYPE y) { 14 | const __CLC_GENTYPE res = select(y, x, __CLC_CONVERT_NATN(isgreater(fabs(x), fabs(y)))); 15 | return select(res, fmax(x, y), __CLC_CONVERT_NATN(isnan(x) | isnan(y) | isequal(fabs(x), fabs(y)))); 16 | } 17 | 18 | #undef __CLC_CONVERT_NATN 19 | 20 | #ifdef __CLC_SCALAR 21 | #undef __CLC_VECSIZE 22 | #endif 23 | -------------------------------------------------------------------------------- /generic/lib/math/minmag.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define __CLC_BODY 5 | #include 6 | -------------------------------------------------------------------------------- /generic/lib/math/minmag.inc: -------------------------------------------------------------------------------- 1 | #ifdef __CLC_SCALAR 2 | #define __CLC_VECSIZE 3 | #endif 4 | 5 | #if __CLC_FPSIZE == 64 6 | #define __CLC_CONVERT_NATN __CLC_XCONCAT(convert_long, __CLC_VECSIZE) 7 | #elif __CLC_FPSIZE == 32 8 | #define __CLC_CONVERT_NATN __CLC_XCONCAT(convert_int, __CLC_VECSIZE) 9 | #elif __CLC_FPSIZE == 16 10 | #define __CLC_CONVERT_NATN __CLC_XCONCAT(convert_short, __CLC_VECSIZE) 11 | #endif 12 | 13 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE minmag(__CLC_GENTYPE x, __CLC_GENTYPE y) { 14 | const __CLC_GENTYPE res = select(y, x, __CLC_CONVERT_NATN(isless(fabs(x), fabs(y)))); 15 | return select(res, fmin(x, y), __CLC_CONVERT_NATN(isnan(x) | isnan(y) | isequal(fabs(x), fabs(y)))); 16 | } 17 | 18 | #undef __CLC_CONVERT_NATN 19 | 20 | #ifdef __CLC_SCALAR 21 | #undef __CLC_VECSIZE 22 | #endif 23 | -------------------------------------------------------------------------------- /generic/lib/math/nan.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "utils.h" 3 | 4 | #define __CLC_AS_GENTYPE __CLC_XCONCAT(as_, __CLC_GENTYPE) 5 | #define __CLC_BODY 6 | #include 7 | -------------------------------------------------------------------------------- /generic/lib/math/nan.inc: -------------------------------------------------------------------------------- 1 | #ifdef __CLC_SCALAR 2 | #define __CLC_VECSIZE 3 | #endif 4 | 5 | #if __CLC_FPSIZE == 64 6 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE nan(__CLC_XCONCAT(ulong, __CLC_VECSIZE) code) 7 | { 8 | return __CLC_AS_GENTYPE(code | 0x7ff0000000000000ul); 9 | } 10 | #elif __CLC_FPSIZE == 32 11 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE nan(__CLC_XCONCAT(uint, __CLC_VECSIZE) code) 12 | { 13 | return __CLC_AS_GENTYPE(code | 0x7fc00000); 14 | } 15 | #elif __CLC_FPSIZE == 16 16 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE nan(__CLC_XCONCAT(ushort, __CLC_VECSIZE) code) 17 | { 18 | const ushort mask = 0x7e00; 19 | const __CLC_XCONCAT(ushort, __CLC_VECSIZE) res = code | mask; 20 | return __CLC_AS_GENTYPE(res); 21 | } 22 | #endif 23 | 24 | 25 | #ifdef __CLC_SCALAR 26 | #undef __CLC_VECSIZE 27 | #endif 28 | -------------------------------------------------------------------------------- /generic/lib/math/native_cos.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_NATIVE_INTRINSIC cos 4 | 5 | #define __CLC_BODY 6 | #define __FLOAT_ONLY 7 | #include 8 | -------------------------------------------------------------------------------- /generic/lib/math/native_divide.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #define __FLOAT_ONLY 5 | #include 6 | -------------------------------------------------------------------------------- /generic/lib/math/native_divide.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE native_divide(__CLC_GENTYPE x, __CLC_GENTYPE y) { 2 | return x / y; 3 | } 4 | -------------------------------------------------------------------------------- /generic/lib/math/native_exp.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_NATIVE_INTRINSIC exp 4 | 5 | #define __CLC_BODY 6 | #define __FLOAT_ONLY 7 | #include 8 | -------------------------------------------------------------------------------- /generic/lib/math/native_exp10.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #define __FLOAT_ONLY 5 | #include 6 | -------------------------------------------------------------------------------- /generic/lib/math/native_exp10.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE native_exp10(__CLC_GENTYPE val) { 2 | return native_exp2(val * M_LOG210_F); 3 | } 4 | -------------------------------------------------------------------------------- /generic/lib/math/native_exp2.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_NATIVE_INTRINSIC exp2 4 | 5 | #define __CLC_BODY 6 | #define __FLOAT_ONLY 7 | #include 8 | -------------------------------------------------------------------------------- /generic/lib/math/native_log10.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_NATIVE_INTRINSIC log10 4 | 5 | #define __CLC_BODY 6 | #define __FLOAT_ONLY 7 | #include 8 | -------------------------------------------------------------------------------- /generic/lib/math/native_powr.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #define __FLOAT_ONLY 5 | #include 6 | -------------------------------------------------------------------------------- /generic/lib/math/native_powr.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE native_powr(__CLC_GENTYPE x, __CLC_GENTYPE y) { 2 | // x^y == 2^{log2 x^y} == 2^{y * log2 x} 3 | // for x < 0 propagate nan created by log2 4 | return native_exp2(y * native_log2(x)); 5 | } 6 | -------------------------------------------------------------------------------- /generic/lib/math/native_recip.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #define __FLOAT_ONLY 5 | #include 6 | -------------------------------------------------------------------------------- /generic/lib/math/native_recip.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE native_recip(__CLC_GENTYPE val) { 2 | return 1.0f / val; 3 | } 4 | -------------------------------------------------------------------------------- /generic/lib/math/native_rsqrt.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #define __FLOAT_ONLY 5 | #include 6 | -------------------------------------------------------------------------------- /generic/lib/math/native_rsqrt.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE native_rsqrt(__CLC_GENTYPE val) { 2 | return 1.0f / native_sqrt(val); 3 | } 4 | -------------------------------------------------------------------------------- /generic/lib/math/native_sin.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_NATIVE_INTRINSIC sin 4 | 5 | #define __CLC_BODY 6 | #define __FLOAT_ONLY 7 | #include 8 | -------------------------------------------------------------------------------- /generic/lib/math/native_sqrt.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_NATIVE_INTRINSIC sqrt 4 | 5 | #define __CLC_BODY 6 | #define __FLOAT_ONLY 7 | #include 8 | -------------------------------------------------------------------------------- /generic/lib/math/native_tan.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #define __FLOAT_ONLY 5 | #include 6 | -------------------------------------------------------------------------------- /generic/lib/math/native_tan.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE native_tan(__CLC_GENTYPE val) { 2 | return native_sin(val) / native_cos(val); 3 | } 4 | -------------------------------------------------------------------------------- /generic/lib/math/nextafter.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../clcmacro.h" 3 | 4 | _CLC_DEFINE_BINARY_BUILTIN(float, nextafter, __builtin_nextafterf, float, float) 5 | 6 | #ifdef cl_khr_fp64 7 | 8 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 9 | 10 | _CLC_DEFINE_BINARY_BUILTIN(double, nextafter, __builtin_nextafter, double, double) 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /generic/lib/math/pow.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #define __CLC_FUNC pow 6 | #define __CLC_BODY 7 | #include 8 | -------------------------------------------------------------------------------- /generic/lib/math/pown.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #define __CLC_BODY 6 | #include 7 | -------------------------------------------------------------------------------- /generic/lib/math/pown.inc: -------------------------------------------------------------------------------- 1 | // TODO: Enable half precision when the sw routine is implemented 2 | #if __CLC_FPSIZE > 16 3 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE pown(__CLC_GENTYPE x, __CLC_INTN y) { 4 | return __clc_pown(x, y); 5 | } 6 | #endif 7 | -------------------------------------------------------------------------------- /generic/lib/math/powr.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #define __CLC_FUNC powr 6 | #define __CLC_BODY 7 | #include 8 | -------------------------------------------------------------------------------- /generic/lib/math/remainder.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define __CLC_FUNC remainder 5 | #define __CLC_BODY 6 | #include 7 | -------------------------------------------------------------------------------- /generic/lib/math/remquo.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define __CLC_BODY 5 | #define __CLC_ADDRESS_SPACE global 6 | #include 7 | #undef __CLC_ADDRESS_SPACE 8 | 9 | #define __CLC_BODY 10 | #define __CLC_ADDRESS_SPACE local 11 | #include 12 | #undef __CLC_ADDRESS_SPACE 13 | 14 | #define __CLC_BODY 15 | #define __CLC_ADDRESS_SPACE private 16 | #include 17 | #undef __CLC_ADDRESS_SPACE 18 | -------------------------------------------------------------------------------- /generic/lib/math/remquo.inc: -------------------------------------------------------------------------------- 1 | // TODO: Enable half precision when the sw routine is implemented 2 | #if __CLC_FPSIZE > 16 3 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE remquo(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_ADDRESS_SPACE __CLC_INTN *q) { 4 | __CLC_INTN local_q; 5 | __CLC_GENTYPE ret = __clc_remquo(x, y, &local_q); 6 | *q = local_q; 7 | return ret; 8 | } 9 | #endif 10 | -------------------------------------------------------------------------------- /generic/lib/math/rint.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | // Map the llvm intrinsic to an OpenCL function. 4 | #define __CLC_FUNCTION __clc_rint 5 | #define __CLC_INTRINSIC "llvm.rint" 6 | #include "math/unary_intrin.inc" 7 | 8 | #undef __CLC_FUNCTION 9 | #define __CLC_FUNCTION rint 10 | #include "unary_builtin.inc" 11 | -------------------------------------------------------------------------------- /generic/lib/math/rootn.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #define __CLC_BODY 6 | #include 7 | -------------------------------------------------------------------------------- /generic/lib/math/rootn.inc: -------------------------------------------------------------------------------- 1 | // TODO: Enable half precision when the sw routine is implemented 2 | #if __CLC_FPSIZE > 16 3 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE rootn(__CLC_GENTYPE x, __CLC_INTN y) { 4 | return __clc_rootn(x, y); 5 | } 6 | #endif 7 | -------------------------------------------------------------------------------- /generic/lib/math/round.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | // Map the llvm intrinsic to an OpenCL function. 4 | #define __CLC_FUNCTION __clc_round 5 | #define __CLC_INTRINSIC "llvm.round" 6 | #include "math/unary_intrin.inc" 7 | 8 | #undef __CLC_FUNCTION 9 | #define __CLC_FUNCTION round 10 | #include "unary_builtin.inc" 11 | -------------------------------------------------------------------------------- /generic/lib/math/sincos.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #include 5 | -------------------------------------------------------------------------------- /generic/lib/math/sincos.inc: -------------------------------------------------------------------------------- 1 | // TODO: Enable half precision when sin/cos is implemented 2 | #if __CLC_FPSIZE > 16 3 | #define __CLC_DECLARE_SINCOS(ADDRSPACE, TYPE) \ 4 | _CLC_OVERLOAD _CLC_DEF TYPE sincos (TYPE x, ADDRSPACE TYPE * cosval) { \ 5 | *cosval = cos(x); \ 6 | return sin(x); \ 7 | } 8 | 9 | __CLC_DECLARE_SINCOS(global, __CLC_GENTYPE) 10 | __CLC_DECLARE_SINCOS(local, __CLC_GENTYPE) 11 | __CLC_DECLARE_SINCOS(private, __CLC_GENTYPE) 12 | 13 | #undef __CLC_DECLARE_SINCOS 14 | #endif 15 | -------------------------------------------------------------------------------- /generic/lib/math/tan.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #define __CLC_FUNC tan 6 | #define __CLC_BODY 7 | #include 8 | -------------------------------------------------------------------------------- /generic/lib/math/tanpi.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #define __CLC_FUNC tanpi 6 | #define __CLC_BODY 7 | #include 8 | -------------------------------------------------------------------------------- /generic/lib/math/trunc.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | // Map the llvm intrinsic to an OpenCL function. 4 | #define __CLC_FUNCTION __clc_trunc 5 | #define __CLC_INTRINSIC "llvm.trunc" 6 | #include "math/unary_intrin.inc" 7 | 8 | #undef __CLC_FUNCTION 9 | #define __CLC_FUNCTION trunc 10 | #include "unary_builtin.inc" 11 | -------------------------------------------------------------------------------- /generic/lib/math/unary_builtin.inc: -------------------------------------------------------------------------------- 1 | #include "../clcmacro.h" 2 | #include "utils.h" 3 | 4 | #ifndef __CLC_BUILTIN 5 | #define __CLC_BUILTIN __CLC_XCONCAT(__clc_, __CLC_FUNCTION) 6 | #endif 7 | 8 | _CLC_DEFINE_UNARY_BUILTIN(float, __CLC_FUNCTION, __CLC_BUILTIN, float) 9 | 10 | #ifdef cl_khr_fp64 11 | 12 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 13 | 14 | _CLC_DEFINE_UNARY_BUILTIN(double, __CLC_FUNCTION, __CLC_BUILTIN, double) 15 | 16 | #endif 17 | 18 | #ifdef cl_khr_fp16 19 | 20 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable 21 | 22 | _CLC_DEFINE_UNARY_BUILTIN(half, __CLC_FUNCTION, __CLC_BUILTIN, half) 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /generic/lib/relational/isfinite.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "relational.h" 3 | 4 | _CLC_DEFINE_RELATIONAL_UNARY(int, isfinite, __builtin_isfinite, float) 5 | 6 | #ifdef cl_khr_fp64 7 | 8 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 9 | 10 | // The scalar version of isfinite(double) returns an int, but the vector versions 11 | // return long. 12 | _CLC_DEF _CLC_OVERLOAD int isfinite(double x) { 13 | return __builtin_isfinite(x); 14 | } 15 | 16 | _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, isfinite, double) 17 | 18 | #endif 19 | #ifdef cl_khr_fp16 20 | 21 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable 22 | 23 | // The scalar version of isfinite(half) returns an int, but the vector versions 24 | // return short. 25 | _CLC_DEF _CLC_OVERLOAD int isfinite(half x) { 26 | return __builtin_isfinite(x); 27 | } 28 | 29 | _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, isfinite, half) 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /generic/lib/relational/isgreater.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "relational.h" 3 | 4 | //Note: It would be nice to use __builtin_isgreater with vector inputs, but it seems to only take scalar values as 5 | // input, which will produce incorrect output for vector input types. 6 | 7 | _CLC_DEFINE_RELATIONAL_BINARY(int, isgreater, __builtin_isgreater, float, float) 8 | 9 | #ifdef cl_khr_fp64 10 | 11 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 12 | 13 | // The scalar version of isgreater(double, double) returns an int, but the vector versions 14 | // return long. 15 | 16 | _CLC_DEF _CLC_OVERLOAD int isgreater(double x, double y){ 17 | return __builtin_isgreater(x, y); 18 | } 19 | 20 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isgreater, double, double) 21 | 22 | #endif 23 | 24 | #ifdef cl_khr_fp16 25 | 26 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable 27 | 28 | // The scalar version of isgreater(half, half) returns an int, but the vector versions 29 | // return short. 30 | 31 | _CLC_DEF _CLC_OVERLOAD int isgreater(half x, half y){ 32 | return __builtin_isgreater(x, y); 33 | } 34 | 35 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isgreater, half, half) 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /generic/lib/relational/isinf.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "relational.h" 3 | 4 | _CLC_DEFINE_RELATIONAL_UNARY(int, isinf, __builtin_isinf, float) 5 | 6 | #ifdef cl_khr_fp64 7 | 8 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 9 | 10 | // The scalar version of isinf(double) returns an int, but the vector versions 11 | // return long. 12 | _CLC_DEF _CLC_OVERLOAD int isinf(double x) { 13 | return __builtin_isinf(x); 14 | } 15 | 16 | _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, isinf, double) 17 | #endif 18 | 19 | #ifdef cl_khr_fp16 20 | 21 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable 22 | 23 | // The scalar version of isinf(half) returns an int, but the vector versions 24 | // return short. 25 | _CLC_DEF _CLC_OVERLOAD int isinf(half x) { 26 | return __builtin_isinf(x); 27 | } 28 | 29 | _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, isinf, half) 30 | #endif 31 | -------------------------------------------------------------------------------- /generic/lib/relational/isless.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "relational.h" 3 | 4 | //Note: It would be nice to use __builtin_isless with vector inputs, but it seems to only take scalar values as 5 | // input, which will produce incorrect output for vector input types. 6 | 7 | _CLC_DEFINE_RELATIONAL_BINARY(int, isless, __builtin_isless, float, float) 8 | 9 | #ifdef cl_khr_fp64 10 | 11 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 12 | 13 | // The scalar version of isless(double, double) returns an int, but the vector versions 14 | // return long. 15 | 16 | _CLC_DEF _CLC_OVERLOAD int isless(double x, double y){ 17 | return __builtin_isless(x, y); 18 | } 19 | 20 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isless, double, double) 21 | 22 | #endif 23 | #ifdef cl_khr_fp16 24 | 25 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable 26 | 27 | // The scalar version of isless(half, half) returns an int, but the vector versions 28 | // return short. 29 | 30 | _CLC_DEF _CLC_OVERLOAD int isless(half x, half y){ 31 | return __builtin_isless(x, y); 32 | } 33 | 34 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isless, half, half) 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /generic/lib/relational/islessequal.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "relational.h" 3 | 4 | //Note: It would be nice to use __builtin_islessequal with vector inputs, but it seems to only take scalar values as 5 | // input, which will produce incorrect output for vector input types. 6 | 7 | _CLC_DEFINE_RELATIONAL_BINARY(int, islessequal, __builtin_islessequal, float, float) 8 | 9 | #ifdef cl_khr_fp64 10 | 11 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 12 | 13 | // The scalar version of islessequal(double, double) returns an int, but the vector versions 14 | // return long. 15 | 16 | _CLC_DEF _CLC_OVERLOAD int islessequal(double x, double y){ 17 | return __builtin_islessequal(x, y); 18 | } 19 | 20 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, islessequal, double, double) 21 | 22 | #endif 23 | #ifdef cl_khr_fp16 24 | 25 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable 26 | 27 | // The scalar version of islessequal(half, half) returns an int, but the vector versions 28 | // return short. 29 | 30 | _CLC_DEF _CLC_OVERLOAD int islessequal(half x, half y){ 31 | return __builtin_islessequal(x, y); 32 | } 33 | 34 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, islessequal, half, half) 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /generic/lib/relational/islessgreater.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "relational.h" 3 | 4 | //Note: It would be nice to use __builtin_islessgreater with vector inputs, but it seems to only take scalar values as 5 | // input, which will produce incorrect output for vector input types. 6 | 7 | _CLC_DEFINE_RELATIONAL_BINARY(int, islessgreater, __builtin_islessgreater, float, float) 8 | 9 | #ifdef cl_khr_fp64 10 | 11 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 12 | 13 | // The scalar version of islessgreater(double, double) returns an int, but the vector versions 14 | // return long. 15 | 16 | _CLC_DEF _CLC_OVERLOAD int islessgreater(double x, double y){ 17 | return __builtin_islessgreater(x, y); 18 | } 19 | 20 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, islessgreater, double, double) 21 | 22 | #endif 23 | #ifdef cl_khr_fp16 24 | 25 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable 26 | 27 | // The scalar version of islessgreater(half, half) returns an int, but the vector versions 28 | // return short. 29 | 30 | _CLC_DEF _CLC_OVERLOAD int islessgreater(half x, half y){ 31 | return __builtin_islessgreater(x, y); 32 | } 33 | 34 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, islessgreater, half, half) 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /generic/lib/relational/isnan.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "relational.h" 3 | 4 | _CLC_DEFINE_RELATIONAL_UNARY(int, isnan, __builtin_isnan, float) 5 | 6 | #ifdef cl_khr_fp64 7 | 8 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 9 | 10 | // The scalar version of isnan(double) returns an int, but the vector versions 11 | // return long. 12 | _CLC_DEF _CLC_OVERLOAD int isnan(double x) { 13 | return __builtin_isnan(x); 14 | } 15 | 16 | _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, isnan, double) 17 | 18 | #endif 19 | 20 | #ifdef cl_khr_fp16 21 | 22 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable 23 | 24 | // The scalar version of isnan(half) returns an int, but the vector versions 25 | // return short. 26 | _CLC_DEF _CLC_OVERLOAD int isnan(half x) { 27 | return __builtin_isnan(x); 28 | } 29 | 30 | _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, isnan, half) 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /generic/lib/relational/isnormal.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "relational.h" 3 | 4 | _CLC_DEFINE_RELATIONAL_UNARY(int, isnormal, __builtin_isnormal, float) 5 | 6 | #ifdef cl_khr_fp64 7 | 8 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 9 | 10 | // The scalar version of isnormal(double) returns an int, but the vector versions 11 | // return long. 12 | _CLC_DEF _CLC_OVERLOAD int isnormal(double x) { 13 | return __builtin_isnormal(x); 14 | } 15 | 16 | _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, isnormal, double) 17 | 18 | #endif 19 | #ifdef cl_khr_fp16 20 | 21 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable 22 | 23 | // The scalar version of isnormal(half) returns an int, but the vector versions 24 | // return short. 25 | _CLC_DEF _CLC_OVERLOAD int isnormal(half x) { 26 | return __builtin_isnormal(x); 27 | } 28 | 29 | _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, isnormal, half) 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /generic/lib/relational/isnotequal.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "relational.h" 3 | 4 | #define _CLC_DEFINE_ISNOTEQUAL(RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \ 5 | _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \ 6 | return (x != y); \ 7 | } \ 8 | 9 | _CLC_DEFINE_ISNOTEQUAL(int, isnotequal, float, float) 10 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(int, isnotequal, float, float) 11 | 12 | #ifdef cl_khr_fp64 13 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 14 | 15 | // The scalar version of isnotequal(double, double) returns an int, but the vector versions 16 | // return long. 17 | 18 | _CLC_DEFINE_ISNOTEQUAL(int, isnotequal, double, double) 19 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isnotequal, double, double) 20 | 21 | #endif 22 | #ifdef cl_khr_fp16 23 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable 24 | 25 | // The scalar version of isnotequal(half, half) returns an int, but the vector versions 26 | // return short. 27 | 28 | _CLC_DEFINE_ISNOTEQUAL(int, isnotequal, half, half) 29 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isnotequal, half, half) 30 | 31 | #endif 32 | 33 | #undef _CLC_DEFINE_ISNOTEQUAL 34 | -------------------------------------------------------------------------------- /generic/lib/relational/isordered.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "relational.h" 3 | 4 | #define _CLC_DEFINE_ISORDERED(RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \ 5 | _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \ 6 | return isequal(x, x) && isequal(y, y); \ 7 | } \ 8 | 9 | _CLC_DEFINE_ISORDERED(int, isordered, float, float) 10 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(int, isordered, float, float) 11 | 12 | #ifdef cl_khr_fp64 13 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 14 | 15 | // The scalar version of isordered(double, double) returns an int, but the vector versions 16 | // return long. 17 | 18 | _CLC_DEFINE_ISORDERED(int, isordered, double, double) 19 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isordered, double, double) 20 | 21 | #endif 22 | #ifdef cl_khr_fp16 23 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable 24 | 25 | // The scalar version of isordered(half, half) returns an int, but the vector versions 26 | // return short. 27 | 28 | _CLC_DEFINE_ISORDERED(int, isordered, half, half) 29 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isordered, half, half) 30 | 31 | #endif 32 | 33 | #undef _CLC_DEFINE_ISORDERED 34 | -------------------------------------------------------------------------------- /generic/lib/relational/isunordered.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "relational.h" 3 | 4 | //Note: It would be nice to use __builtin_isunordered with vector inputs, but it seems to only take scalar values as 5 | // input, which will produce incorrect output for vector input types. 6 | 7 | _CLC_DEFINE_RELATIONAL_BINARY(int, isunordered, __builtin_isunordered, float, float) 8 | 9 | #ifdef cl_khr_fp64 10 | 11 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 12 | 13 | // The scalar version of isunordered(double, double) returns an int, but the vector versions 14 | // return long. 15 | 16 | _CLC_DEF _CLC_OVERLOAD int isunordered(double x, double y){ 17 | return __builtin_isunordered(x, y); 18 | } 19 | 20 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isunordered, double, double) 21 | 22 | #endif 23 | #ifdef cl_khr_fp16 24 | 25 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable 26 | 27 | // The scalar version of isunordered(half, half) returns an int, but the vector versions 28 | // return short. 29 | 30 | _CLC_DEF _CLC_OVERLOAD int isunordered(half x, half y){ 31 | return __builtin_isunordered(x, y); 32 | } 33 | 34 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isunordered, half, half) 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /generic/lib/relational/select.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define __CLC_BODY 5 | #include 6 | #define __CLC_BODY 7 | #include 8 | -------------------------------------------------------------------------------- /generic/lib/relational/select.inc: -------------------------------------------------------------------------------- 1 | #ifdef __CLC_SCALAR 2 | #define __CLC_VECSIZE 3 | #endif 4 | 5 | #if __CLC_FPSIZE == 64 6 | #define __CLC_S_GENTYPE __CLC_XCONCAT(long, __CLC_VECSIZE) 7 | #define __CLC_U_GENTYPE __CLC_XCONCAT(ulong, __CLC_VECSIZE) 8 | #elif __CLC_FPSIZE == 32 9 | #define __CLC_S_GENTYPE __CLC_XCONCAT(int, __CLC_VECSIZE) 10 | #define __CLC_U_GENTYPE __CLC_XCONCAT(uint, __CLC_VECSIZE) 11 | #elif __CLC_FPSIZE == 16 12 | #define __CLC_S_GENTYPE __CLC_XCONCAT(short, __CLC_VECSIZE) 13 | #define __CLC_U_GENTYPE __CLC_XCONCAT(ushort, __CLC_VECSIZE) 14 | #endif 15 | 16 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE select(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_S_GENTYPE z) 17 | { 18 | return z ? y : x; 19 | } 20 | 21 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE select(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_U_GENTYPE z) 22 | { 23 | return z ? y : x; 24 | } 25 | 26 | #ifdef __CLC_FPSIZE 27 | #undef __CLC_S_GENTYPE 28 | #undef __CLC_U_GENTYPE 29 | #endif 30 | 31 | #ifdef __CLC_SCALAR 32 | #undef __CLC_VECSIZE 33 | #endif 34 | -------------------------------------------------------------------------------- /generic/lib/relational/signbit.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "relational.h" 3 | 4 | _CLC_DEFINE_RELATIONAL_UNARY(int, signbit, __builtin_signbitf, float) 5 | 6 | #ifdef cl_khr_fp64 7 | 8 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 9 | 10 | // The scalar version of signbit(double) returns an int, but the vector versions 11 | // return long. 12 | 13 | _CLC_DEF _CLC_OVERLOAD int signbit(double x){ 14 | return __builtin_signbit(x); 15 | } 16 | 17 | _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, signbit, double) 18 | 19 | #endif 20 | #ifdef cl_khr_fp16 21 | 22 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable 23 | 24 | // The scalar version of signbit(half) returns an int, but the vector versions 25 | // return short. 26 | 27 | _CLC_DEF _CLC_OVERLOAD int signbit(half x){ 28 | return __builtin_signbit(x); 29 | } 30 | 31 | _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, signbit, half) 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /generic/lib/shared/clamp.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #include 5 | 6 | #define __CLC_BODY 7 | #include 8 | -------------------------------------------------------------------------------- /generic/lib/shared/clamp.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE clamp(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z) { 2 | return (x > z ? z : (x < y ? y : x)); 3 | } 4 | 5 | #ifndef __CLC_SCALAR 6 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE clamp(__CLC_GENTYPE x, __CLC_SCALAR_GENTYPE y, __CLC_SCALAR_GENTYPE z) { 7 | return (x > (__CLC_GENTYPE)z ? (__CLC_GENTYPE)z : (x < (__CLC_GENTYPE)y ? (__CLC_GENTYPE)y : x)); 8 | } 9 | #endif 10 | -------------------------------------------------------------------------------- /generic/lib/shared/max.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #include 5 | 6 | #define __CLC_BODY 7 | #include 8 | -------------------------------------------------------------------------------- /generic/lib/shared/max.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE max(__CLC_GENTYPE a, __CLC_GENTYPE b) { 2 | return (a > b ? a : b); 3 | } 4 | 5 | #ifndef __CLC_SCALAR 6 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE max(__CLC_GENTYPE a, __CLC_SCALAR_GENTYPE b) { 7 | return (a > (__CLC_GENTYPE)b ? a : (__CLC_GENTYPE)b); 8 | } 9 | #endif 10 | -------------------------------------------------------------------------------- /generic/lib/shared/min.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define __CLC_BODY 4 | #include 5 | 6 | #define __CLC_BODY 7 | #include 8 | -------------------------------------------------------------------------------- /generic/lib/shared/min.inc: -------------------------------------------------------------------------------- 1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE min(__CLC_GENTYPE a, __CLC_GENTYPE b) { 2 | return (b < a ? b : a); 3 | } 4 | 5 | #ifndef __CLC_SCALAR 6 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE min(__CLC_GENTYPE a, __CLC_SCALAR_GENTYPE b) { 7 | return (b < (__CLC_GENTYPE)a ? (__CLC_GENTYPE)b : a); 8 | } 9 | #endif 10 | -------------------------------------------------------------------------------- /generic/lib/shared/vload_half.inc: -------------------------------------------------------------------------------- 1 | #if __CLC_FPSIZE == 32 2 | 3 | #ifdef __CLC_VECSIZE 4 | 5 | #if __CLC_VECSIZE == 3 6 | # define __CLC_OFFSET 4 7 | #else 8 | # define __CLC_OFFSET __CLC_VECSIZE 9 | #endif 10 | 11 | FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __private); 12 | FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __local); 13 | FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __global); 14 | FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __constant); 15 | 16 | #undef __CLC_OFFSET 17 | #else 18 | FUNC(, 1, 1, __CLC_GENTYPE, __private); 19 | FUNC(, 1, 1, __CLC_GENTYPE, __local); 20 | FUNC(, 1, 1, __CLC_GENTYPE, __global); 21 | FUNC(, 1, 1, __CLC_GENTYPE, __constant); 22 | #endif 23 | #endif 24 | -------------------------------------------------------------------------------- /generic/lib/shared/vstore_half.inc: -------------------------------------------------------------------------------- 1 | // This does not exist for fp16 2 | #if __CLC_FPSIZE > 16 3 | #ifdef __CLC_VECSIZE 4 | 5 | #if __CLC_VECSIZE == 3 6 | # define __CLC_OFFSET 4 7 | #else 8 | # define __CLC_OFFSET __CLC_VECSIZE 9 | #endif 10 | 11 | FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __private); 12 | FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local); 13 | FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global); 14 | 15 | #undef __CLC_OFFSET 16 | #else 17 | FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __private); 18 | FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local); 19 | FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global); 20 | #endif 21 | #endif 22 | -------------------------------------------------------------------------------- /generic/lib/subnormal_disable.ll: -------------------------------------------------------------------------------- 1 | @__CLC_SUBNORMAL_DISABLE = unnamed_addr constant i1 true 2 | -------------------------------------------------------------------------------- /generic/lib/subnormal_helper_func.ll: -------------------------------------------------------------------------------- 1 | @__CLC_SUBNORMAL_DISABLE = external global i1 2 | 3 | define i1 @__clc_subnormals_disabled() #0 { 4 | %disable = load i1, i1* @__CLC_SUBNORMAL_DISABLE 5 | ret i1 %disable 6 | } 7 | 8 | attributes #0 = { alwaysinline } 9 | -------------------------------------------------------------------------------- /generic/lib/subnormal_use_default.ll: -------------------------------------------------------------------------------- 1 | @__CLC_SUBNORMAL_DISABLE = unnamed_addr constant i1 false 2 | -------------------------------------------------------------------------------- /generic/lib/workitem/get_global_id.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DEF size_t get_global_id(uint dim) { 4 | return get_group_id(dim) * get_local_size(dim) + get_local_id(dim) + get_global_offset(dim); 5 | } 6 | -------------------------------------------------------------------------------- /generic/lib/workitem/get_global_size.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DEF size_t get_global_size(uint dim) { 4 | return get_num_groups(dim)*get_local_size(dim); 5 | } 6 | -------------------------------------------------------------------------------- /libclc.pc.in: -------------------------------------------------------------------------------- 1 | includedir=@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_INCLUDEDIR@ 2 | libexecdir=@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_DATADIR@/clc 3 | 4 | Name: libclc 5 | Description: Library requirements of the OpenCL C programming language 6 | Version: @PROJECT_VERSION@ 7 | Cflags: -I${includedir} 8 | Libs: -L${libexecdir} 9 | -------------------------------------------------------------------------------- /ptx-nvidiacl/lib/SOURCES: -------------------------------------------------------------------------------- 1 | mem_fence/fence.cl 2 | synchronization/barrier.cl 3 | workitem/get_global_id.cl 4 | workitem/get_group_id.cl 5 | workitem/get_local_id.cl 6 | workitem/get_local_size.cl 7 | workitem/get_num_groups.cl 8 | -------------------------------------------------------------------------------- /ptx-nvidiacl/lib/mem_fence/fence.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DEF void mem_fence(cl_mem_fence_flags flags) { 4 | if (flags & (CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE)) 5 | __nvvm_membar_cta(); 6 | } 7 | 8 | // We do not have separate mechanism for read and write fences. 9 | _CLC_DEF void read_mem_fence(cl_mem_fence_flags flags) { 10 | mem_fence(flags); 11 | } 12 | 13 | _CLC_DEF void write_mem_fence(cl_mem_fence_flags flags) { 14 | mem_fence(flags); 15 | } 16 | -------------------------------------------------------------------------------- /ptx-nvidiacl/lib/synchronization/barrier.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DEF void barrier(cl_mem_fence_flags flags) { 4 | __syncthreads(); 5 | } 6 | 7 | -------------------------------------------------------------------------------- /ptx-nvidiacl/lib/workitem/get_global_id.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DEF size_t get_global_id(uint dim) { 4 | return get_group_id(dim) * get_local_size(dim) + get_local_id(dim); 5 | } 6 | -------------------------------------------------------------------------------- /ptx-nvidiacl/lib/workitem/get_group_id.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DEF size_t get_group_id(uint dim) { 4 | switch (dim) { 5 | case 0: return __nvvm_read_ptx_sreg_ctaid_x(); 6 | case 1: return __nvvm_read_ptx_sreg_ctaid_y(); 7 | case 2: return __nvvm_read_ptx_sreg_ctaid_z(); 8 | default: return 0; 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /ptx-nvidiacl/lib/workitem/get_local_id.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DEF size_t get_local_id(uint dim) { 4 | switch (dim) { 5 | case 0: return __nvvm_read_ptx_sreg_tid_x(); 6 | case 1: return __nvvm_read_ptx_sreg_tid_y(); 7 | case 2: return __nvvm_read_ptx_sreg_tid_z(); 8 | default: return 0; 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /ptx-nvidiacl/lib/workitem/get_local_size.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DEF size_t get_local_size(uint dim) { 4 | switch (dim) { 5 | case 0: return __nvvm_read_ptx_sreg_ntid_x(); 6 | case 1: return __nvvm_read_ptx_sreg_ntid_y(); 7 | case 2: return __nvvm_read_ptx_sreg_ntid_z(); 8 | default: return 0; 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /ptx-nvidiacl/lib/workitem/get_num_groups.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DEF size_t get_num_groups(uint dim) { 4 | switch (dim) { 5 | case 0: return __nvvm_read_ptx_sreg_nctaid_x(); 6 | case 1: return __nvvm_read_ptx_sreg_nctaid_y(); 7 | case 2: return __nvvm_read_ptx_sreg_nctaid_z(); 8 | default: return 0; 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /ptx/lib/SOURCES: -------------------------------------------------------------------------------- 1 | math/nextafter.cl 2 | -------------------------------------------------------------------------------- /ptx/lib/SOURCES_3.9: -------------------------------------------------------------------------------- 1 | shared/vload_half_helpers.ll 2 | shared/vstore_half_helpers.ll 3 | -------------------------------------------------------------------------------- /ptx/lib/SOURCES_4.0: -------------------------------------------------------------------------------- 1 | shared/vload_half_helpers.ll 2 | shared/vstore_half_helpers.ll 3 | -------------------------------------------------------------------------------- /ptx/lib/SOURCES_5.0: -------------------------------------------------------------------------------- 1 | shared/vload_half_helpers.ll 2 | shared/vstore_half_helpers.ll 3 | -------------------------------------------------------------------------------- /ptx/lib/math/nextafter.cl: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../lib/clcmacro.h" 3 | #include 4 | 5 | _CLC_DEFINE_BINARY_BUILTIN(float, nextafter, __clc_nextafter, float, float) 6 | 7 | #ifdef cl_khr_fp64 8 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 9 | _CLC_DEFINE_BINARY_BUILTIN(double, nextafter, __clc_nextafter, double, double) 10 | #endif 11 | -------------------------------------------------------------------------------- /ptx/lib/shared/vload_half_helpers.ll: -------------------------------------------------------------------------------- 1 | define float @__clc_vload_half_float_helper__private(half addrspace(0)* nocapture %ptr) nounwind alwaysinline { 2 | %data = load half, half addrspace(0)* %ptr 3 | %res = fpext half %data to float 4 | ret float %res 5 | } 6 | 7 | define float @__clc_vload_half_float_helper__global(half addrspace(1)* nocapture %ptr) nounwind alwaysinline { 8 | %data = load half, half addrspace(1)* %ptr 9 | %res = fpext half %data to float 10 | ret float %res 11 | } 12 | 13 | define float @__clc_vload_half_float_helper__local(half addrspace(3)* nocapture %ptr) nounwind alwaysinline { 14 | %data = load half, half addrspace(3)* %ptr 15 | %res = fpext half %data to float 16 | ret float %res 17 | } 18 | 19 | define float @__clc_vload_half_float_helper__constant(half addrspace(4)* nocapture %ptr) nounwind alwaysinline { 20 | %data = load half, half addrspace(4)* %ptr 21 | %res = fpext half %data to float 22 | ret float %res 23 | } 24 | -------------------------------------------------------------------------------- /r600/lib/SOURCES: -------------------------------------------------------------------------------- 1 | math/fmax.cl 2 | math/fmin.cl 3 | synchronization/barrier.cl 4 | workitem/get_global_offset.cl 5 | workitem/get_group_id.cl 6 | workitem/get_global_size.cl 7 | workitem/get_local_id.cl 8 | workitem/get_local_size.cl 9 | workitem/get_num_groups.cl 10 | workitem/get_work_dim.cl 11 | -------------------------------------------------------------------------------- /r600/lib/SOURCES_3.9: -------------------------------------------------------------------------------- 1 | image/get_image_dim.cl 2 | image/get_image_width.cl 3 | image/get_image_height.cl 4 | image/get_image_depth.cl 5 | image/get_image_channel_data_type.cl 6 | image/get_image_channel_order.cl 7 | image/get_image_attributes_impl.ll 8 | image/read_imagef.cl 9 | image/read_imagei.cl 10 | image/read_imageui.cl 11 | image/read_image_impl.ll 12 | image/write_imagef.cl 13 | image/write_imagei.cl 14 | image/write_imageui.cl 15 | image/write_image_impl.ll 16 | -------------------------------------------------------------------------------- /r600/lib/image/get_image_channel_data_type.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DECL int __clc_get_image_channel_data_type_2d(image2d_t); 4 | _CLC_DECL int __clc_get_image_channel_data_type_3d(image3d_t); 5 | 6 | _CLC_OVERLOAD _CLC_DEF int 7 | get_image_channel_data_type(image2d_t image) { 8 | return __clc_get_image_channel_data_type_2d(image); 9 | } 10 | _CLC_OVERLOAD _CLC_DEF int 11 | get_image_channel_data_type(image3d_t image) { 12 | return __clc_get_image_channel_data_type_3d(image); 13 | } 14 | -------------------------------------------------------------------------------- /r600/lib/image/get_image_channel_order.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DECL int __clc_get_image_channel_order_2d(image2d_t); 4 | _CLC_DECL int __clc_get_image_channel_order_3d(image3d_t); 5 | 6 | _CLC_OVERLOAD _CLC_DEF int 7 | get_image_channel_order(image2d_t image) { 8 | return __clc_get_image_channel_order_2d(image); 9 | } 10 | _CLC_OVERLOAD _CLC_DEF int 11 | get_image_channel_order(image3d_t image) { 12 | return __clc_get_image_channel_order_3d(image); 13 | } 14 | -------------------------------------------------------------------------------- /r600/lib/image/get_image_depth.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DECL int __clc_get_image_depth_3d(image3d_t); 4 | 5 | _CLC_OVERLOAD _CLC_DEF int 6 | get_image_depth(image3d_t image) { 7 | return __clc_get_image_depth_3d(image); 8 | } 9 | -------------------------------------------------------------------------------- /r600/lib/image/get_image_dim.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_OVERLOAD _CLC_DEF int2 get_image_dim (image2d_t image) { 4 | return (int2)(get_image_width(image), get_image_height(image)); 5 | } 6 | _CLC_OVERLOAD _CLC_DEF int4 get_image_dim (image3d_t image) { 7 | return (int4)(get_image_width(image), get_image_height(image), 8 | get_image_depth(image), 0); 9 | } 10 | -------------------------------------------------------------------------------- /r600/lib/image/get_image_height.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DECL int __clc_get_image_height_2d(image2d_t); 4 | _CLC_DECL int __clc_get_image_height_3d(image3d_t); 5 | 6 | _CLC_OVERLOAD _CLC_DEF int 7 | get_image_height(image2d_t image) { 8 | return __clc_get_image_height_2d(image); 9 | } 10 | _CLC_OVERLOAD _CLC_DEF int 11 | get_image_height(image3d_t image) { 12 | return __clc_get_image_height_3d(image); 13 | } 14 | -------------------------------------------------------------------------------- /r600/lib/image/get_image_width.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DECL int __clc_get_image_width_2d(image2d_t); 4 | _CLC_DECL int __clc_get_image_width_3d(image3d_t); 5 | 6 | _CLC_OVERLOAD _CLC_DEF int 7 | get_image_width(image2d_t image) { 8 | return __clc_get_image_width_2d(image); 9 | } 10 | _CLC_OVERLOAD _CLC_DEF int 11 | get_image_width(image3d_t image) { 12 | return __clc_get_image_width_3d(image); 13 | } 14 | -------------------------------------------------------------------------------- /r600/lib/image/read_imagef.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DECL float4 __clc_read_imagef_tex(image2d_t, sampler_t, float2); 4 | 5 | _CLC_OVERLOAD _CLC_DEF float4 read_imagef(image2d_t image, sampler_t sampler, 6 | int2 coord) { 7 | float2 coord_float = (float2)(coord.x, coord.y); 8 | return __clc_read_imagef_tex(image, sampler, coord_float); 9 | } 10 | 11 | _CLC_OVERLOAD _CLC_DEF float4 read_imagef(image2d_t image, sampler_t sampler, 12 | float2 coord) { 13 | return __clc_read_imagef_tex(image, sampler, coord); 14 | } 15 | -------------------------------------------------------------------------------- /r600/lib/image/read_imagei.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DECL float4 __clc_read_imagef_tex(image2d_t, sampler_t, float2); 4 | 5 | int4 __clc_reinterpret_v4f_to_v4i(float4 v) { 6 | union { 7 | int4 v4i; 8 | float4 v4f; 9 | } res = { .v4f = v}; 10 | return res.v4i; 11 | } 12 | 13 | _CLC_OVERLOAD _CLC_DEF int4 read_imagei(image2d_t image, sampler_t sampler, 14 | int2 coord) { 15 | float2 coord_float = (float2)(coord.x, coord.y); 16 | return __clc_reinterpret_v4f_to_v4i( 17 | __clc_read_imagef_tex(image, sampler, coord_float)); 18 | } 19 | _CLC_OVERLOAD _CLC_DEF int4 read_imagei(image2d_t image, sampler_t sampler, 20 | float2 coord) { 21 | return __clc_reinterpret_v4f_to_v4i( 22 | __clc_read_imagef_tex(image, sampler, coord)); 23 | } 24 | -------------------------------------------------------------------------------- /r600/lib/image/read_imageui.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DECL float4 __clc_read_imagef_tex(image2d_t, sampler_t, float2); 4 | 5 | uint4 __clc_reinterpret_v4f_to_v4ui(float4 v) { 6 | union { 7 | uint4 v4ui; 8 | float4 v4f; 9 | } res = { .v4f = v}; 10 | return res.v4ui; 11 | } 12 | 13 | _CLC_OVERLOAD _CLC_DEF uint4 read_imageui(image2d_t image, sampler_t sampler, 14 | int2 coord) { 15 | float2 coord_float = (float2)(coord.x, coord.y); 16 | return __clc_reinterpret_v4f_to_v4ui( 17 | __clc_read_imagef_tex(image, sampler, coord_float)); 18 | } 19 | _CLC_OVERLOAD _CLC_DEF uint4 read_imageui(image2d_t image, sampler_t sampler, 20 | float2 coord) { 21 | return __clc_reinterpret_v4f_to_v4ui( 22 | __clc_read_imagef_tex(image, sampler, coord)); 23 | } 24 | -------------------------------------------------------------------------------- /r600/lib/image/write_imagef.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DECL void __clc_write_imagef_2d(image2d_t image, int2 coord, float4 color); 4 | 5 | _CLC_OVERLOAD _CLC_DEF void 6 | write_imagef(image2d_t image, int2 coord, float4 color) 7 | { 8 | __clc_write_imagef_2d(image, coord, color); 9 | } 10 | -------------------------------------------------------------------------------- /r600/lib/image/write_imagei.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DECL void __clc_write_imagei_2d(image2d_t image, int2 coord, int4 color); 4 | 5 | _CLC_OVERLOAD _CLC_DEF void 6 | write_imagei(image2d_t image, int2 coord, int4 color) 7 | { 8 | __clc_write_imagei_2d(image, coord, color); 9 | } 10 | -------------------------------------------------------------------------------- /r600/lib/image/write_imageui.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DECL void __clc_write_imageui_2d(image2d_t image, int2 coord, uint4 color); 4 | 5 | _CLC_OVERLOAD _CLC_DEF void 6 | write_imageui(image2d_t image, int2 coord, uint4 color) 7 | { 8 | __clc_write_imageui_2d(image, coord, color); 9 | } 10 | -------------------------------------------------------------------------------- /r600/lib/math/fmax.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "../../../generic/lib/clcmacro.h" 4 | #include "../../../generic/lib/math/math.h" 5 | 6 | _CLC_DEF _CLC_OVERLOAD float fmax(float x, float y) 7 | { 8 | /* Flush denormals if not enabled. Otherwise fmax instruction flushes 9 | * the values for comparison, but outputs original denormal */ 10 | x = __clc_flush_denormal_if_not_supported(x); 11 | y = __clc_flush_denormal_if_not_supported(y); 12 | return __builtin_fmaxf(x, y); 13 | } 14 | _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, fmax, float, float) 15 | 16 | #ifdef cl_khr_fp64 17 | 18 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 19 | 20 | _CLC_DEF _CLC_OVERLOAD double fmax(double x, double y) 21 | { 22 | return __builtin_fmax(x, y); 23 | } 24 | _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, fmax, double, double) 25 | 26 | #endif 27 | 28 | #define __CLC_BODY <../../../generic/lib/math/fmax.inc> 29 | #include 30 | -------------------------------------------------------------------------------- /r600/lib/math/fmin.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "../../../generic/lib/clcmacro.h" 4 | #include "../../../generic/lib/math/math.h" 5 | 6 | _CLC_DEF _CLC_OVERLOAD float fmin(float x, float y) 7 | { 8 | /* fcanonicalize removes sNaNs and flushes denormals if not enabled. 9 | * Otherwise fmin instruction flushes the values for comparison, 10 | * but outputs original denormal */ 11 | x = __clc_flush_denormal_if_not_supported(x); 12 | y = __clc_flush_denormal_if_not_supported(y); 13 | return __builtin_fminf(x, y); 14 | } 15 | _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, fmin, float, float) 16 | 17 | #ifdef cl_khr_fp64 18 | 19 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 20 | 21 | _CLC_DEF _CLC_OVERLOAD double fmin(double x, double y) 22 | { 23 | return __builtin_fmin(x, y); 24 | } 25 | _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, fmin, double, double) 26 | 27 | #endif 28 | 29 | #define __CLC_BODY <../../../generic/lib/math/fmin.inc> 30 | #include 31 | -------------------------------------------------------------------------------- /r600/lib/synchronization/barrier.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DEF void __clc_r600_barrier(void) __asm("llvm.r600.group.barrier"); 4 | 5 | _CLC_DEF void barrier(uint flags) 6 | { 7 | // We should call mem_fence here, but that is not implemented for r600 yet 8 | __clc_r600_barrier(); 9 | } 10 | -------------------------------------------------------------------------------- /r600/lib/workitem/get_global_offset.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DEF uint get_global_offset(uint dim) 4 | { 5 | __attribute__((address_space(7))) uint * ptr = 6 | (__attribute__((address_space(7))) uint *) 7 | __builtin_r600_implicitarg_ptr(); 8 | if (dim < 3) 9 | return ptr[dim + 1]; 10 | return 0; 11 | } 12 | -------------------------------------------------------------------------------- /r600/lib/workitem/get_global_size.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | uint __clc_r600_get_global_size_x(void) __asm("llvm.r600.read.global.size.x"); 4 | uint __clc_r600_get_global_size_y(void) __asm("llvm.r600.read.global.size.y"); 5 | uint __clc_r600_get_global_size_z(void) __asm("llvm.r600.read.global.size.z"); 6 | 7 | _CLC_DEF size_t get_global_size(uint dim) 8 | { 9 | switch (dim) { 10 | case 0: return __clc_r600_get_global_size_x(); 11 | case 1: return __clc_r600_get_global_size_y(); 12 | case 2: return __clc_r600_get_global_size_z(); 13 | default: return 1; 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /r600/lib/workitem/get_group_id.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DEF uint get_group_id(uint dim) 4 | { 5 | switch(dim) { 6 | case 0: return __builtin_r600_read_tgid_x(); 7 | case 1: return __builtin_r600_read_tgid_y(); 8 | case 2: return __builtin_r600_read_tgid_z(); 9 | default: return 1; 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /r600/lib/workitem/get_local_id.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DEF uint get_local_id(uint dim) 4 | { 5 | switch(dim) { 6 | case 0: return __builtin_r600_read_tidig_x(); 7 | case 1: return __builtin_r600_read_tidig_y(); 8 | case 2: return __builtin_r600_read_tidig_z(); 9 | default: return 1; 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /r600/lib/workitem/get_local_size.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | uint __clc_r600_get_local_size_x(void) __asm("llvm.r600.read.local.size.x"); 4 | uint __clc_r600_get_local_size_y(void) __asm("llvm.r600.read.local.size.y"); 5 | uint __clc_r600_get_local_size_z(void) __asm("llvm.r600.read.local.size.z"); 6 | 7 | _CLC_DEF size_t get_local_size(uint dim) 8 | { 9 | switch (dim) { 10 | case 0: return __clc_r600_get_local_size_x(); 11 | case 1: return __clc_r600_get_local_size_y(); 12 | case 2: return __clc_r600_get_local_size_z(); 13 | default: return 1; 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /r600/lib/workitem/get_num_groups.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | uint __clc_r600_get_num_groups_x(void) __asm("llvm.r600.read.ngroups.x"); 4 | uint __clc_r600_get_num_groups_y(void) __asm("llvm.r600.read.ngroups.y"); 5 | uint __clc_r600_get_num_groups_z(void) __asm("llvm.r600.read.ngroups.z"); 6 | 7 | _CLC_DEF size_t get_num_groups(uint dim) 8 | { 9 | switch (dim) { 10 | case 0: return __clc_r600_get_num_groups_x(); 11 | case 1: return __clc_r600_get_num_groups_y(); 12 | case 2: return __clc_r600_get_num_groups_z(); 13 | default: return 1; 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /r600/lib/workitem/get_work_dim.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | _CLC_DEF uint get_work_dim(void) 4 | { 5 | __attribute__((address_space(7))) uint * ptr = 6 | (__attribute__((address_space(7))) uint *) 7 | __builtin_r600_implicitarg_ptr(); 8 | return ptr[0]; 9 | } 10 | -------------------------------------------------------------------------------- /test/add_sat.cl: -------------------------------------------------------------------------------- 1 | __kernel void foo(__global char *a, __global char *b, __global char *c) { 2 | *a = add_sat(*b, *c); 3 | } 4 | -------------------------------------------------------------------------------- /test/as_type.cl: -------------------------------------------------------------------------------- 1 | __kernel void foo(int4 *x, float4 *y) { 2 | *x = as_int4(*y); 3 | } 4 | -------------------------------------------------------------------------------- /test/convert.cl: -------------------------------------------------------------------------------- 1 | __kernel void foo(int4 *x, float4 *y) { 2 | *x = convert_int4(*y); 3 | } 4 | -------------------------------------------------------------------------------- /test/cos.cl: -------------------------------------------------------------------------------- 1 | __kernel void foo(float4 *f) { 2 | *f = cos(*f); 3 | } 4 | -------------------------------------------------------------------------------- /test/cross.cl: -------------------------------------------------------------------------------- 1 | __kernel void foo(float4 *f) { 2 | *f = cross(f[0], f[1]); 3 | } 4 | -------------------------------------------------------------------------------- /test/fabs.cl: -------------------------------------------------------------------------------- 1 | __kernel void foo(float *f) { 2 | *f = fabs(*f); 3 | } 4 | -------------------------------------------------------------------------------- /test/get_group_id.cl: -------------------------------------------------------------------------------- 1 | __kernel void foo(int *i) { 2 | i[get_group_id(0)] = 1; 3 | } 4 | -------------------------------------------------------------------------------- /test/rsqrt.cl: -------------------------------------------------------------------------------- 1 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 2 | 3 | __kernel void foo(float4 *x, double4 *y) { 4 | x[1] = rsqrt(x[0]); 5 | y[1] = rsqrt(y[0]); 6 | } 7 | -------------------------------------------------------------------------------- /test/subsat.cl: -------------------------------------------------------------------------------- 1 | __kernel void test_subsat_char(char *a, char x, char y) { 2 | *a = sub_sat(x, y); 3 | return; 4 | } 5 | 6 | __kernel void test_subsat_uchar(uchar *a, uchar x, uchar y) { 7 | *a = sub_sat(x, y); 8 | return; 9 | } 10 | 11 | __kernel void test_subsat_long(long *a, long x, long y) { 12 | *a = sub_sat(x, y); 13 | return; 14 | } 15 | 16 | __kernel void test_subsat_ulong(ulong *a, ulong x, ulong y) { 17 | *a = sub_sat(x, y); 18 | return; 19 | } --------------------------------------------------------------------------------