├── .gitignore
├── .travis.yml
├── CMakeLists.txt
├── CREDITS.TXT
├── LICENSE.TXT
├── README.TXT
├── amdgcn-amdhsa
    └── lib
    │   ├── SOURCES
    │   └── workitem
    │       ├── get_global_size.cl
    │       ├── get_local_size.cl
    │       └── get_num_groups.cl
├── amdgcn-mesa3d
├── amdgcn
    └── lib
    │   ├── SOURCES
    │   ├── cl_khr_int64_extended_atomics
    │       └── minmax_helpers.ll
    │   ├── integer
    │       ├── popcount.cl
    │       └── popcount.inc
    │   ├── math
    │       ├── fmax.cl
    │       ├── fmin.cl
    │       └── ldexp.cl
    │   ├── mem_fence
    │       └── fence.cl
    │   ├── synchronization
    │       └── barrier.cl
    │   └── workitem
    │       ├── get_global_offset.cl
    │       ├── get_global_size.cl
    │       ├── get_group_id.cl
    │       ├── get_local_id.cl
    │       ├── get_local_size.cl
    │       ├── get_num_groups.cl
    │       └── get_work_dim.cl
├── amdgpu
    └── lib
    │   ├── SOURCES
    │   ├── SOURCES_3.9
    │   ├── SOURCES_4.0
    │   ├── SOURCES_5.0
    │   ├── math
    │       ├── half_exp.cl
    │       ├── half_exp10.cl
    │       ├── half_exp2.cl
    │       ├── half_log.cl
    │       ├── half_log10.cl
    │       ├── half_log2.cl
    │       ├── half_native_unary.inc
    │       ├── half_recip.cl
    │       ├── half_rsqrt.cl
    │       ├── half_sqrt.cl
    │       ├── native_exp.cl
    │       ├── native_exp.inc
    │       ├── native_log.cl
    │       ├── native_log.inc
    │       ├── native_log10.cl
    │       ├── native_log10.inc
    │       ├── nextafter.cl
    │       └── sqrt.cl
    │   └── shared
    │       ├── vload_half_helpers.ll
    │       └── vstore_half_helpers.ll
├── build
    ├── metabuild.py
    └── ninja_syntax.py
├── check_external_calls.sh
├── cmake
    ├── CMakeCLCCompiler.cmake.in
    ├── CMakeCLCInformation.cmake
    ├── CMakeDetermineCLCCompiler.cmake
    ├── CMakeDetermineLLAsmCompiler.cmake
    ├── CMakeLLAsmCompiler.cmake.in
    ├── CMakeLLAsmInformation.cmake
    ├── CMakeTestCLCCompiler.cmake
    └── CMakeTestLLAsmCompiler.cmake
├── compile-test.sh
├── configure.py
├── generic
    ├── include
    │   ├── clc
    │   │   ├── as_type.h
    │   │   ├── async
    │   │   │   ├── async_work_group_copy.h
    │   │   │   ├── async_work_group_copy.inc
    │   │   │   ├── async_work_group_strided_copy.h
    │   │   │   ├── async_work_group_strided_copy.inc
    │   │   │   ├── gentype.inc
    │   │   │   ├── prefetch.h
    │   │   │   ├── prefetch.inc
    │   │   │   └── wait_group_events.h
    │   │   ├── atom_decl_int32.inc
    │   │   ├── atom_decl_int64.inc
    │   │   ├── atomic
    │   │   │   ├── atomic_add.h
    │   │   │   ├── atomic_and.h
    │   │   │   ├── atomic_cmpxchg.h
    │   │   │   ├── atomic_dec.h
    │   │   │   ├── atomic_decl.inc
    │   │   │   ├── atomic_inc.h
    │   │   │   ├── atomic_max.h
    │   │   │   ├── atomic_min.h
    │   │   │   ├── atomic_or.h
    │   │   │   ├── atomic_sub.h
    │   │   │   ├── atomic_xchg.h
    │   │   │   └── atomic_xor.h
    │   │   ├── cl_khr_global_int32_base_atomics
    │   │   │   ├── atom_add.h
    │   │   │   ├── atom_cmpxchg.h
    │   │   │   ├── atom_dec.h
    │   │   │   ├── atom_inc.h
    │   │   │   ├── atom_sub.h
    │   │   │   └── atom_xchg.h
    │   │   ├── cl_khr_global_int32_extended_atomics
    │   │   │   ├── atom_and.h
    │   │   │   ├── atom_max.h
    │   │   │   ├── atom_min.h
    │   │   │   ├── atom_or.h
    │   │   │   └── atom_xor.h
    │   │   ├── cl_khr_int64_base_atomics
    │   │   │   ├── atom_add.h
    │   │   │   ├── atom_cmpxchg.h
    │   │   │   ├── atom_dec.h
    │   │   │   ├── atom_inc.h
    │   │   │   ├── atom_sub.h
    │   │   │   └── atom_xchg.h
    │   │   ├── cl_khr_int64_extended_atomics
    │   │   │   ├── atom_and.h
    │   │   │   ├── atom_max.h
    │   │   │   ├── atom_min.h
    │   │   │   ├── atom_or.h
    │   │   │   └── atom_xor.h
    │   │   ├── cl_khr_local_int32_base_atomics
    │   │   │   ├── atom_add.h
    │   │   │   ├── atom_cmpxchg.h
    │   │   │   ├── atom_dec.h
    │   │   │   ├── atom_inc.h
    │   │   │   ├── atom_sub.h
    │   │   │   └── atom_xchg.h
    │   │   ├── cl_khr_local_int32_extended_atomics
    │   │   │   ├── atom_and.h
    │   │   │   ├── atom_max.h
    │   │   │   ├── atom_min.h
    │   │   │   ├── atom_or.h
    │   │   │   └── atom_xor.h
    │   │   ├── clc.h
    │   │   ├── clcfunc.h
    │   │   ├── clcmacros.h
    │   │   ├── clctypes.h
    │   │   ├── common
    │   │   │   ├── degrees.h
    │   │   │   ├── degrees.inc
    │   │   │   ├── mix.h
    │   │   │   ├── mix.inc
    │   │   │   ├── radians.h
    │   │   │   ├── radians.inc
    │   │   │   ├── sign.h
    │   │   │   ├── smoothstep.h
    │   │   │   ├── smoothstep.inc
    │   │   │   ├── step.h
    │   │   │   └── step.inc
    │   │   ├── convert.h
    │   │   ├── explicit_fence
    │   │   │   └── explicit_memory_fence.h
    │   │   ├── float
    │   │   │   └── definitions.h
    │   │   ├── geometric
    │   │   │   ├── cross.h
    │   │   │   ├── distance.h
    │   │   │   ├── distance.inc
    │   │   │   ├── dot.h
    │   │   │   ├── dot.inc
    │   │   │   ├── fast_distance.h
    │   │   │   ├── fast_distance.inc
    │   │   │   ├── fast_length.h
    │   │   │   ├── fast_length.inc
    │   │   │   ├── fast_normalize.h
    │   │   │   ├── fast_normalize.inc
    │   │   │   ├── floatn.inc
    │   │   │   ├── length.h
    │   │   │   ├── length.inc
    │   │   │   ├── normalize.h
    │   │   │   └── normalize.inc
    │   │   ├── image
    │   │   │   ├── image.h
    │   │   │   └── image_defines.h
    │   │   ├── integer
    │   │   │   ├── abs.h
    │   │   │   ├── abs.inc
    │   │   │   ├── abs_diff.h
    │   │   │   ├── abs_diff.inc
    │   │   │   ├── add_sat.h
    │   │   │   ├── add_sat.inc
    │   │   │   ├── clz.h
    │   │   │   ├── clz.inc
    │   │   │   ├── definitions.h
    │   │   │   ├── gentype.inc
    │   │   │   ├── hadd.h
    │   │   │   ├── hadd.inc
    │   │   │   ├── integer-gentype.inc
    │   │   │   ├── mad24.h
    │   │   │   ├── mad24.inc
    │   │   │   ├── mad_hi.h
    │   │   │   ├── mad_sat.h
    │   │   │   ├── mad_sat.inc
    │   │   │   ├── mul24.h
    │   │   │   ├── mul24.inc
    │   │   │   ├── mul_hi.h
    │   │   │   ├── mul_hi.inc
    │   │   │   ├── popcount.h
    │   │   │   ├── rhadd.h
    │   │   │   ├── rhadd.inc
    │   │   │   ├── rotate.h
    │   │   │   ├── rotate.inc
    │   │   │   ├── sub_sat.h
    │   │   │   ├── sub_sat.inc
    │   │   │   ├── unary.inc
    │   │   │   └── upsample.h
    │   │   ├── math
    │   │   │   ├── acos.h
    │   │   │   ├── acosh.h
    │   │   │   ├── acospi.h
    │   │   │   ├── asin.h
    │   │   │   ├── asinh.h
    │   │   │   ├── asinpi.h
    │   │   │   ├── atan.h
    │   │   │   ├── atan2.h
    │   │   │   ├── atan2pi.h
    │   │   │   ├── atanh.h
    │   │   │   ├── atanpi.h
    │   │   │   ├── binary_decl.inc
    │   │   │   ├── binary_decl_tt.inc
    │   │   │   ├── cbrt.h
    │   │   │   ├── ceil.h
    │   │   │   ├── copysign.h
    │   │   │   ├── cos.h
    │   │   │   ├── cosh.h
    │   │   │   ├── cospi.h
    │   │   │   ├── erf.h
    │   │   │   ├── erfc.h
    │   │   │   ├── exp.h
    │   │   │   ├── exp10.h
    │   │   │   ├── exp2.h
    │   │   │   ├── expm1.h
    │   │   │   ├── fabs.h
    │   │   │   ├── fdim.h
    │   │   │   ├── floor.h
    │   │   │   ├── fma.h
    │   │   │   ├── fmax.h
    │   │   │   ├── fmin.h
    │   │   │   ├── fmod.h
    │   │   │   ├── fract.h
    │   │   │   ├── fract.inc
    │   │   │   ├── frexp.h
    │   │   │   ├── frexp.inc
    │   │   │   ├── gentype.inc
    │   │   │   ├── half_cos.h
    │   │   │   ├── half_divide.h
    │   │   │   ├── half_exp.h
    │   │   │   ├── half_exp10.h
    │   │   │   ├── half_exp2.h
    │   │   │   ├── half_log.h
    │   │   │   ├── half_log10.h
    │   │   │   ├── half_log2.h
    │   │   │   ├── half_powr.h
    │   │   │   ├── half_recip.h
    │   │   │   ├── half_rsqrt.h
    │   │   │   ├── half_sin.h
    │   │   │   ├── half_sqrt.h
    │   │   │   ├── half_tan.h
    │   │   │   ├── hypot.h
    │   │   │   ├── ilogb.h
    │   │   │   ├── ilogb.inc
    │   │   │   ├── ldexp.h
    │   │   │   ├── ldexp.inc
    │   │   │   ├── lgamma.h
    │   │   │   ├── lgamma_r.h
    │   │   │   ├── lgamma_r.inc
    │   │   │   ├── log.h
    │   │   │   ├── log10.h
    │   │   │   ├── log1p.h
    │   │   │   ├── log2.h
    │   │   │   ├── logb.h
    │   │   │   ├── mad.h
    │   │   │   ├── maxmag.h
    │   │   │   ├── minmag.h
    │   │   │   ├── modf.h
    │   │   │   ├── modf.inc
    │   │   │   ├── nan.h
    │   │   │   ├── nan.inc
    │   │   │   ├── native_cos.h
    │   │   │   ├── native_divide.h
    │   │   │   ├── native_exp.h
    │   │   │   ├── native_exp10.h
    │   │   │   ├── native_exp2.h
    │   │   │   ├── native_log.h
    │   │   │   ├── native_log10.h
    │   │   │   ├── native_log2.h
    │   │   │   ├── native_powr.h
    │   │   │   ├── native_recip.h
    │   │   │   ├── native_rsqrt.h
    │   │   │   ├── native_sin.h
    │   │   │   ├── native_sqrt.h
    │   │   │   ├── native_tan.h
    │   │   │   ├── nextafter.h
    │   │   │   ├── pow.h
    │   │   │   ├── pown.h
    │   │   │   ├── pown.inc
    │   │   │   ├── powr.h
    │   │   │   ├── remainder.h
    │   │   │   ├── remquo.h
    │   │   │   ├── remquo.inc
    │   │   │   ├── rint.h
    │   │   │   ├── rootn.h
    │   │   │   ├── rootn.inc
    │   │   │   ├── round.h
    │   │   │   ├── rsqrt.h
    │   │   │   ├── sin.h
    │   │   │   ├── sincos.h
    │   │   │   ├── sincos.inc
    │   │   │   ├── sinh.h
    │   │   │   ├── sinpi.h
    │   │   │   ├── sqrt.h
    │   │   │   ├── tan.h
    │   │   │   ├── tanh.h
    │   │   │   ├── tanpi.h
    │   │   │   ├── ternary_decl.inc
    │   │   │   ├── tgamma.h
    │   │   │   ├── trunc.h
    │   │   │   └── unary_decl.inc
    │   │   ├── misc
    │   │   │   ├── shuffle.h
    │   │   │   └── shuffle2.h
    │   │   ├── relational
    │   │   │   ├── all.h
    │   │   │   ├── any.h
    │   │   │   ├── binary_decl.inc
    │   │   │   ├── bitselect.h
    │   │   │   ├── bitselect.inc
    │   │   │   ├── floatn.inc
    │   │   │   ├── isequal.h
    │   │   │   ├── isfinite.h
    │   │   │   ├── isgreater.h
    │   │   │   ├── isgreaterequal.h
    │   │   │   ├── isinf.h
    │   │   │   ├── isless.h
    │   │   │   ├── islessequal.h
    │   │   │   ├── islessgreater.h
    │   │   │   ├── isnan.h
    │   │   │   ├── isnormal.h
    │   │   │   ├── isnotequal.h
    │   │   │   ├── isordered.h
    │   │   │   ├── isunordered.h
    │   │   │   ├── select.h
    │   │   │   ├── select.inc
    │   │   │   ├── signbit.h
    │   │   │   └── unary_decl.inc
    │   │   ├── shared
    │   │   │   ├── clamp.h
    │   │   │   ├── clamp.inc
    │   │   │   ├── max.h
    │   │   │   ├── max.inc
    │   │   │   ├── min.h
    │   │   │   ├── min.inc
    │   │   │   ├── vload.h
    │   │   │   └── vstore.h
    │   │   ├── synchronization
    │   │   │   ├── barrier.h
    │   │   │   └── cl_mem_fence_flags.h
    │   │   └── workitem
    │   │   │   ├── get_global_id.h
    │   │   │   ├── get_global_offset.h
    │   │   │   ├── get_global_size.h
    │   │   │   ├── get_group_id.h
    │   │   │   ├── get_local_id.h
    │   │   │   ├── get_local_size.h
    │   │   │   ├── get_num_groups.h
    │   │   │   └── get_work_dim.h
    │   ├── config.h
    │   ├── integer
    │   │   ├── popcount.h
    │   │   └── unary_intrin.inc
    │   ├── math
    │   │   ├── binary_intrin.inc
    │   │   ├── clc_exp10.h
    │   │   ├── clc_fma.h
    │   │   ├── clc_fmod.h
    │   │   ├── clc_hypot.h
    │   │   ├── clc_ldexp.h
    │   │   ├── clc_nextafter.h
    │   │   ├── clc_pow.h
    │   │   ├── clc_pown.h
    │   │   ├── clc_pown.inc
    │   │   ├── clc_powr.h
    │   │   ├── clc_remainder.h
    │   │   ├── clc_remquo.h
    │   │   ├── clc_rootn.h
    │   │   ├── clc_rootn.inc
    │   │   ├── clc_sqrt.h
    │   │   ├── clc_tan.h
    │   │   ├── clc_tanpi.h
    │   │   ├── ternary_intrin.inc
    │   │   └── unary_intrin.inc
    │   └── utils.h
    └── lib
    │   ├── SOURCES
    │   ├── async
    │       ├── async_work_group_copy.cl
    │       ├── async_work_group_copy.inc
    │       ├── async_work_group_strided_copy.cl
    │       ├── async_work_group_strided_copy.inc
    │       ├── prefetch.cl
    │       ├── prefetch.inc
    │       └── wait_group_events.cl
    │   ├── atom_int32_binary.inc
    │   ├── atomic
    │       ├── atomic_add.cl
    │       ├── atomic_and.cl
    │       ├── atomic_cmpxchg.cl
    │       ├── atomic_dec.cl
    │       ├── atomic_inc.cl
    │       ├── atomic_max.cl
    │       ├── atomic_min.cl
    │       ├── atomic_or.cl
    │       ├── atomic_sub.cl
    │       ├── atomic_xchg.cl
    │       └── atomic_xor.cl
    │   ├── cl_khr_global_int32_base_atomics
    │       ├── atom_add.cl
    │       ├── atom_cmpxchg.cl
    │       ├── atom_dec.cl
    │       ├── atom_inc.cl
    │       ├── atom_sub.cl
    │       └── atom_xchg.cl
    │   ├── cl_khr_global_int32_extended_atomics
    │       ├── atom_and.cl
    │       ├── atom_max.cl
    │       ├── atom_min.cl
    │       ├── atom_or.cl
    │       └── atom_xor.cl
    │   ├── cl_khr_int64_base_atomics
    │       ├── atom_add.cl
    │       ├── atom_cmpxchg.cl
    │       ├── atom_dec.cl
    │       ├── atom_inc.cl
    │       ├── atom_sub.cl
    │       └── atom_xchg.cl
    │   ├── cl_khr_int64_extended_atomics
    │       ├── atom_and.cl
    │       ├── atom_max.cl
    │       ├── atom_min.cl
    │       ├── atom_or.cl
    │       └── atom_xor.cl
    │   ├── cl_khr_local_int32_base_atomics
    │       ├── atom_add.cl
    │       ├── atom_cmpxchg.cl
    │       ├── atom_dec.cl
    │       ├── atom_inc.cl
    │       ├── atom_sub.cl
    │       └── atom_xchg.cl
    │   ├── cl_khr_local_int32_extended_atomics
    │       ├── atom_and.cl
    │       ├── atom_max.cl
    │       ├── atom_min.cl
    │       ├── atom_or.cl
    │       └── atom_xor.cl
    │   ├── clc_unary.inc
    │   ├── clcmacro.h
    │   ├── common
    │       ├── degrees.cl
    │       ├── mix.cl
    │       ├── mix.inc
    │       ├── radians.cl
    │       ├── sign.cl
    │       ├── smoothstep.cl
    │       └── step.cl
    │   ├── gen_convert.py
    │   ├── geometric
    │       ├── cross.cl
    │       ├── distance.cl
    │       ├── distance.inc
    │       ├── dot.cl
    │       ├── fast_distance.cl
    │       ├── fast_distance.inc
    │       ├── fast_length.cl
    │       ├── fast_normalize.cl
    │       ├── fast_normalize.inc
    │       ├── length.cl
    │       └── normalize.cl
    │   ├── integer
    │       ├── abs.cl
    │       ├── abs.inc
    │       ├── abs_diff.cl
    │       ├── abs_diff.inc
    │       ├── add_sat.cl
    │       ├── clz.cl
    │       ├── hadd.cl
    │       ├── hadd.inc
    │       ├── mad24.cl
    │       ├── mad24.inc
    │       ├── mad_sat.cl
    │       ├── mul24.cl
    │       ├── mul24.inc
    │       ├── mul_hi.cl
    │       ├── popcount.cl
    │       ├── rhadd.cl
    │       ├── rhadd.inc
    │       ├── rotate.cl
    │       ├── rotate.inc
    │       ├── sub_sat.cl
    │       └── upsample.cl
    │   ├── math
    │       ├── acos.cl
    │       ├── acos.inc
    │       ├── acosh.cl
    │       ├── acospi.cl
    │       ├── asin.cl
    │       ├── asin.inc
    │       ├── asinh.cl
    │       ├── asinpi.cl
    │       ├── atan.cl
    │       ├── atan2.cl
    │       ├── atan2pi.cl
    │       ├── atanh.cl
    │       ├── atanpi.cl
    │       ├── binary_impl.inc
    │       ├── cbrt.cl
    │       ├── ceil.cl
    │       ├── clc_exp10.cl
    │       ├── clc_fma.cl
    │       ├── clc_fmod.cl
    │       ├── clc_hypot.cl
    │       ├── clc_ldexp.cl
    │       ├── clc_nextafter.cl
    │       ├── clc_pow.cl
    │       ├── clc_pown.cl
    │       ├── clc_powr.cl
    │       ├── clc_remainder.cl
    │       ├── clc_remquo.cl
    │       ├── clc_rootn.cl
    │       ├── clc_sqrt.cl
    │       ├── clc_sqrt_impl.inc
    │       ├── clc_sw_binary.inc
    │       ├── clc_sw_unary.inc
    │       ├── clc_tan.cl
    │       ├── clc_tanpi.cl
    │       ├── copysign.cl
    │       ├── cos.cl
    │       ├── cosh.cl
    │       ├── cospi.cl
    │       ├── ep_log.cl
    │       ├── ep_log.h
    │       ├── erf.cl
    │       ├── erfc.cl
    │       ├── exp.cl
    │       ├── exp10.cl
    │       ├── exp2.cl
    │       ├── exp_helper.cl
    │       ├── exp_helper.h
    │       ├── expm1.cl
    │       ├── fabs.cl
    │       ├── fdim.cl
    │       ├── fdim.inc
    │       ├── floor.cl
    │       ├── fma.cl
    │       ├── fma.inc
    │       ├── fmax.cl
    │       ├── fmax.inc
    │       ├── fmin.cl
    │       ├── fmin.inc
    │       ├── fmod.cl
    │       ├── fract.cl
    │       ├── fract.inc
    │       ├── frexp.cl
    │       ├── frexp.inc
    │       ├── half_binary.inc
    │       ├── half_cos.cl
    │       ├── half_divide.cl
    │       ├── half_exp.cl
    │       ├── half_exp10.cl
    │       ├── half_exp2.cl
    │       ├── half_log.cl
    │       ├── half_log10.cl
    │       ├── half_log2.cl
    │       ├── half_powr.cl
    │       ├── half_recip.cl
    │       ├── half_rsqrt.cl
    │       ├── half_sin.cl
    │       ├── half_sqrt.cl
    │       ├── half_tan.cl
    │       ├── half_unary.inc
    │       ├── hypot.cl
    │       ├── ilogb.cl
    │       ├── ldexp.cl
    │       ├── ldexp.inc
    │       ├── lgamma.cl
    │       ├── lgamma_r.cl
    │       ├── lgamma_r.inc
    │       ├── log.cl
    │       ├── log10.cl
    │       ├── log1p.cl
    │       ├── log2.cl
    │       ├── log_base.h
    │       ├── logb.cl
    │       ├── mad.cl
    │       ├── mad.inc
    │       ├── math.h
    │       ├── maxmag.cl
    │       ├── maxmag.inc
    │       ├── minmag.cl
    │       ├── minmag.inc
    │       ├── modf.cl
    │       ├── modf.inc
    │       ├── nan.cl
    │       ├── nan.inc
    │       ├── native_cos.cl
    │       ├── native_divide.cl
    │       ├── native_divide.inc
    │       ├── native_exp.cl
    │       ├── native_exp10.cl
    │       ├── native_exp10.inc
    │       ├── native_exp2.cl
    │       ├── native_log.cl
    │       ├── native_log10.cl
    │       ├── native_log2.cl
    │       ├── native_powr.cl
    │       ├── native_powr.inc
    │       ├── native_recip.cl
    │       ├── native_recip.inc
    │       ├── native_rsqrt.cl
    │       ├── native_rsqrt.inc
    │       ├── native_sin.cl
    │       ├── native_sqrt.cl
    │       ├── native_tan.cl
    │       ├── native_tan.inc
    │       ├── native_unary_intrinsic.inc
    │       ├── nextafter.cl
    │       ├── pow.cl
    │       ├── pown.cl
    │       ├── pown.inc
    │       ├── powr.cl
    │       ├── remainder.cl
    │       ├── remquo.cl
    │       ├── remquo.inc
    │       ├── rint.cl
    │       ├── rootn.cl
    │       ├── rootn.inc
    │       ├── round.cl
    │       ├── sin.cl
    │       ├── sincos.cl
    │       ├── sincos.inc
    │       ├── sincosD_piby4.h
    │       ├── sincos_helpers.cl
    │       ├── sincos_helpers.h
    │       ├── sincospiF_piby4.h
    │       ├── sinh.cl
    │       ├── sinpi.cl
    │       ├── sqrt.cl
    │       ├── tables.cl
    │       ├── tables.h
    │       ├── tan.cl
    │       ├── tanh.cl
    │       ├── tanpi.cl
    │       ├── tgamma.cl
    │       ├── trunc.cl
    │       └── unary_builtin.inc
    │   ├── misc
    │       ├── shuffle.cl
    │       └── shuffle2.cl
    │   ├── relational
    │       ├── all.cl
    │       ├── any.cl
    │       ├── bitselect.cl
    │       ├── bitselect.inc
    │       ├── isequal.cl
    │       ├── isfinite.cl
    │       ├── isgreater.cl
    │       ├── isgreaterequal.cl
    │       ├── isinf.cl
    │       ├── isless.cl
    │       ├── islessequal.cl
    │       ├── islessgreater.cl
    │       ├── isnan.cl
    │       ├── isnormal.cl
    │       ├── isnotequal.cl
    │       ├── isordered.cl
    │       ├── isunordered.cl
    │       ├── relational.h
    │       ├── select.cl
    │       ├── select.inc
    │       └── signbit.cl
    │   ├── shared
    │       ├── clamp.cl
    │       ├── clamp.inc
    │       ├── max.cl
    │       ├── max.inc
    │       ├── min.cl
    │       ├── min.inc
    │       ├── vload.cl
    │       ├── vload_half.inc
    │       ├── vstore.cl
    │       └── vstore_half.inc
    │   ├── subnormal_config.cl
    │   ├── subnormal_disable.ll
    │   ├── subnormal_helper_func.ll
    │   ├── subnormal_use_default.ll
    │   └── workitem
    │       ├── get_global_id.cl
    │       └── get_global_size.cl
├── libclc.pc.in
├── ptx-nvidiacl
    └── lib
    │   ├── SOURCES
    │   ├── mem_fence
    │       └── fence.cl
    │   ├── synchronization
    │       └── barrier.cl
    │   └── workitem
    │       ├── get_global_id.cl
    │       ├── get_group_id.cl
    │       ├── get_local_id.cl
    │       ├── get_local_size.cl
    │       └── get_num_groups.cl
├── ptx
    └── lib
    │   ├── SOURCES
    │   ├── SOURCES_3.9
    │   ├── SOURCES_4.0
    │   ├── SOURCES_5.0
    │   ├── math
    │       └── nextafter.cl
    │   └── shared
    │       ├── vload_half_helpers.ll
    │       └── vstore_half_helpers.ll
├── r600
    └── lib
    │   ├── SOURCES
    │   ├── SOURCES_3.9
    │   ├── image
    │       ├── get_image_attributes_impl.ll
    │       ├── get_image_channel_data_type.cl
    │       ├── get_image_channel_order.cl
    │       ├── get_image_depth.cl
    │       ├── get_image_dim.cl
    │       ├── get_image_height.cl
    │       ├── get_image_width.cl
    │       ├── read_image_impl.ll
    │       ├── read_imagef.cl
    │       ├── read_imagei.cl
    │       ├── read_imageui.cl
    │       ├── write_image_impl.ll
    │       ├── write_imagef.cl
    │       ├── write_imagei.cl
    │       └── write_imageui.cl
    │   ├── math
    │       ├── fmax.cl
    │       └── fmin.cl
    │   ├── synchronization
    │       └── barrier.cl
    │   └── workitem
    │       ├── get_global_offset.cl
    │       ├── get_global_size.cl
    │       ├── get_group_id.cl
    │       ├── get_local_id.cl
    │       ├── get_local_size.cl
    │       ├── get_num_groups.cl
    │       └── get_work_dim.cl
├── test
    ├── add_sat.cl
    ├── as_type.cl
    ├── convert.cl
    ├── cos.cl
    ├── cross.cl
    ├── fabs.cl
    ├── get_group_id.cl
    ├── rsqrt.cl
    └── subsat.cl
├── utils
    └── prepare-builtins.cpp
└── www
    └── index.html


/.gitignore:
--------------------------------------------------------------------------------
 1 | Makefile
 2 | amdgcn--
 3 | amdgcn--amdhsa
 4 | amdgcn-mesa-mesa3d
 5 | build/*.pyc
 6 | built_libs/
 7 | generic--
 8 | generic/lib/convert.cl
 9 | libclc.pc
10 | nvptx--nvidiacl
11 | nvptx64--nvidiacl
12 | r600--
13 | utils/prepare-builtins
14 | utils/prepare-builtins.o
15 | utils/prepare-builtins.o.d
16 | 


--------------------------------------------------------------------------------
/CREDITS.TXT:
--------------------------------------------------------------------------------
1 | N: Peter Collingbourne
2 | E: peter@pcc.me.uk
3 | 


--------------------------------------------------------------------------------
/amdgcn-amdhsa/lib/SOURCES:
--------------------------------------------------------------------------------
1 | workitem/get_global_size.cl
2 | workitem/get_local_size.cl
3 | workitem/get_num_groups.cl
4 | 


--------------------------------------------------------------------------------
/amdgcn-amdhsa/lib/workitem/get_global_size.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #if __clang_major__ >= 8
 4 | #define CONST_AS __constant
 5 | #elif __clang_major__ >= 7
 6 | #define CONST_AS __attribute__((address_space(4)))
 7 | #else
 8 | #define CONST_AS __attribute__((address_space(2)))
 9 | #endif
10 | 
11 | #if __clang_major__ >= 6
12 | #define __dispatch_ptr __builtin_amdgcn_dispatch_ptr
13 | #else
14 | #define __dispatch_ptr __clc_amdgcn_dispatch_ptr
15 | CONST_AS uchar * __clc_amdgcn_dispatch_ptr(void) __asm("llvm.amdgcn.dispatch.ptr");
16 | #endif
17 | 
18 | _CLC_DEF size_t get_global_size(uint dim)
19 | {
20 | 	CONST_AS uint * ptr = (CONST_AS uint *) __dispatch_ptr();
21 | 	if (dim < 3)
22 | 		return ptr[3 + dim];
23 | 	return 1;
24 | }
25 | 


--------------------------------------------------------------------------------
/amdgcn-amdhsa/lib/workitem/get_local_size.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #if __clang_major__ >= 8
 4 | #define CONST_AS __constant
 5 | #elif __clang_major__ >= 7
 6 | #define CONST_AS __attribute__((address_space(4)))
 7 | #else
 8 | #define CONST_AS __attribute__((address_space(2)))
 9 | #endif
10 | 
11 | #if __clang_major__ >= 6
12 | #define __dispatch_ptr __builtin_amdgcn_dispatch_ptr
13 | #else
14 | #define __dispatch_ptr __clc_amdgcn_dispatch_ptr
15 | CONST_AS char * __clc_amdgcn_dispatch_ptr(void) __asm("llvm.amdgcn.dispatch.ptr");
16 | #endif
17 | 
18 | _CLC_DEF size_t get_local_size(uint dim)
19 | {
20 | 	CONST_AS uint * ptr = (CONST_AS uint *) __dispatch_ptr();
21 | 	switch (dim) {
22 | 	case 0:
23 | 		return ptr[1] & 0xffffu;
24 | 	case 1:
25 | 		return ptr[1] >> 16;
26 | 	case 2:
27 | 		return ptr[2] & 0xffffu;
28 | 	}
29 | 	return 1;
30 | }
31 | 


--------------------------------------------------------------------------------
/amdgcn-amdhsa/lib/workitem/get_num_groups.cl:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <clc/clc.h>
 3 | 
 4 | _CLC_DEF size_t get_num_groups(uint dim) {
 5 |   size_t global_size = get_global_size(dim);
 6 |   size_t local_size = get_local_size(dim);
 7 |   size_t num_groups = global_size / local_size;
 8 |   if (global_size % local_size != 0) {
 9 |     num_groups++;
10 |   }
11 |   return num_groups;
12 | }
13 | 


--------------------------------------------------------------------------------
/amdgcn-mesa3d:
--------------------------------------------------------------------------------
1 | amdgcn-amdhsa


--------------------------------------------------------------------------------
/amdgcn/lib/SOURCES:
--------------------------------------------------------------------------------
 1 | cl_khr_int64_extended_atomics/minmax_helpers.ll
 2 | integer/popcount.cl
 3 | math/fmax.cl
 4 | math/fmin.cl
 5 | math/ldexp.cl
 6 | mem_fence/fence.cl
 7 | synchronization/barrier.cl
 8 | workitem/get_global_offset.cl
 9 | workitem/get_group_id.cl
10 | workitem/get_global_size.cl
11 | workitem/get_local_id.cl
12 | workitem/get_local_size.cl
13 | workitem/get_num_groups.cl
14 | workitem/get_work_dim.cl
15 | 


--------------------------------------------------------------------------------
/amdgcn/lib/integer/popcount.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | #include <utils.h>
3 | #include <integer/popcount.h>
4 | 
5 | #define __CLC_BODY "popcount.inc"
6 | #include <clc/integer/gentype.inc>
7 | 


--------------------------------------------------------------------------------
/amdgcn/lib/integer/popcount.inc:
--------------------------------------------------------------------------------
 1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE popcount(__CLC_GENTYPE x) {
 2 | /* LLVM-4+ implements i16 ops for VI+ ASICs. However, ctpop implementation
 3 |  * is missing until r326535. Therefore we have to convert sub i32 types to uint
 4 |  * as a workaround. */
 5 | #if __clang_major__ < 7 && __clang_major__ > 3 && __CLC_GENSIZE < 32
 6 | 	/* Prevent sign extension on uint conversion */
 7 | 	const __CLC_U_GENTYPE y = __CLC_XCONCAT(as_, __CLC_U_GENTYPE)(x);
 8 | 	/* Convert to uintX */
 9 | 	const __CLC_XCONCAT(uint, __CLC_VECSIZE) z = __CLC_XCONCAT(convert_uint, __CLC_VECSIZE)(y);
10 | 	/* Call popcount on uintX type */
11 | 	const __CLC_XCONCAT(uint, __CLC_VECSIZE) res = __clc_native_popcount(z);
12 | 	/* Convert the result back to gentype. */
13 | 	return __CLC_XCONCAT(convert_, __CLC_GENTYPE)(res);
14 | #else
15 | 	return __clc_native_popcount(x);
16 | #endif
17 | }
18 | 


--------------------------------------------------------------------------------
/amdgcn/lib/synchronization/barrier.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | _CLC_DEF void barrier(cl_mem_fence_flags flags)
4 | {
5 | 	mem_fence(flags);
6 | 	__builtin_amdgcn_s_barrier();
7 | }
8 | 


--------------------------------------------------------------------------------
/amdgcn/lib/workitem/get_global_offset.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #if __clang_major__ >= 8
 4 | #define CONST_AS __constant
 5 | #elif __clang_major__ >= 7
 6 | #define CONST_AS __attribute__((address_space(4)))
 7 | #else
 8 | #define CONST_AS __attribute__((address_space(2)))
 9 | #endif
10 | 
11 | _CLC_DEF size_t get_global_offset(uint dim)
12 | {
13 | 	CONST_AS uint * ptr =
14 | 		(CONST_AS uint *) __builtin_amdgcn_implicitarg_ptr();
15 | 	if (dim < 3)
16 | 		return ptr[dim + 1];
17 | 	return 0;
18 | }
19 | 


--------------------------------------------------------------------------------
/amdgcn/lib/workitem/get_global_size.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | uint __clc_amdgcn_get_global_size_x(void) __asm("llvm.r600.read.global.size.x");
 4 | uint __clc_amdgcn_get_global_size_y(void) __asm("llvm.r600.read.global.size.y");
 5 | uint __clc_amdgcn_get_global_size_z(void) __asm("llvm.r600.read.global.size.z");
 6 | 
 7 | _CLC_DEF size_t get_global_size(uint dim)
 8 | {
 9 | 	switch (dim) {
10 | 	case 0: return __clc_amdgcn_get_global_size_x();
11 | 	case 1: return __clc_amdgcn_get_global_size_y();
12 | 	case 2: return __clc_amdgcn_get_global_size_z();
13 | 	default: return 1;
14 | 	}
15 | }
16 | 


--------------------------------------------------------------------------------
/amdgcn/lib/workitem/get_group_id.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | _CLC_DEF size_t get_group_id(uint dim)
 4 | {
 5 | 	switch(dim) {
 6 | 	case 0: return __builtin_amdgcn_workgroup_id_x();
 7 | 	case 1: return __builtin_amdgcn_workgroup_id_y();
 8 | 	case 2: return __builtin_amdgcn_workgroup_id_z();
 9 | 	default: return 1;
10 | 	}
11 | }
12 | 


--------------------------------------------------------------------------------
/amdgcn/lib/workitem/get_local_id.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | _CLC_DEF size_t get_local_id(uint dim)
 4 | {
 5 | 	switch(dim) {
 6 | 	case 0: return __builtin_amdgcn_workitem_id_x();
 7 | 	case 1: return __builtin_amdgcn_workitem_id_y();
 8 | 	case 2: return __builtin_amdgcn_workitem_id_z();
 9 | 	default: return 1;
10 | 	}
11 | }
12 | 


--------------------------------------------------------------------------------
/amdgcn/lib/workitem/get_local_size.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | uint __clc_amdgcn_get_local_size_x(void) __asm("llvm.r600.read.local.size.x");
 4 | uint __clc_amdgcn_get_local_size_y(void) __asm("llvm.r600.read.local.size.y");
 5 | uint __clc_amdgcn_get_local_size_z(void) __asm("llvm.r600.read.local.size.z");
 6 | 
 7 | _CLC_DEF size_t get_local_size(uint dim)
 8 | {
 9 | 	switch (dim) {
10 | 	case 0: return __clc_amdgcn_get_local_size_x();
11 | 	case 1: return __clc_amdgcn_get_local_size_y();
12 | 	case 2: return __clc_amdgcn_get_local_size_z();
13 | 	default: return 1;
14 | 	}
15 | }
16 | 


--------------------------------------------------------------------------------
/amdgcn/lib/workitem/get_num_groups.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | uint __clc_amdgcn_get_num_groups_x(void) __asm("llvm.r600.read.ngroups.x");
 4 | uint __clc_amdgcn_get_num_groups_y(void) __asm("llvm.r600.read.ngroups.y");
 5 | uint __clc_amdgcn_get_num_groups_z(void) __asm("llvm.r600.read.ngroups.z");
 6 | 
 7 | _CLC_DEF size_t get_num_groups(uint dim)
 8 | {
 9 | 	switch (dim) {
10 | 	case 0: return __clc_amdgcn_get_num_groups_x();
11 | 	case 1: return __clc_amdgcn_get_num_groups_y();
12 | 	case 2: return __clc_amdgcn_get_num_groups_z();
13 | 	default: return 1;
14 | 	}
15 | }
16 | 


--------------------------------------------------------------------------------
/amdgcn/lib/workitem/get_work_dim.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #if __clang_major__ >= 8
 4 | #define CONST_AS __constant
 5 | #elif __clang_major__ >= 7
 6 | #define CONST_AS __attribute__((address_space(4)))
 7 | #else
 8 | #define CONST_AS __attribute__((address_space(2)))
 9 | #endif
10 | 
11 | _CLC_DEF uint get_work_dim(void)
12 | {
13 | 	CONST_AS uint * ptr =
14 | 		(CONST_AS uint *) __builtin_amdgcn_implicitarg_ptr();
15 | 	return ptr[0];
16 | }
17 | 


--------------------------------------------------------------------------------
/amdgpu/lib/SOURCES:
--------------------------------------------------------------------------------
 1 | math/native_exp.cl
 2 | math/native_log.cl
 3 | math/native_log10.cl
 4 | math/half_exp.cl
 5 | math/half_exp10.cl
 6 | math/half_exp2.cl
 7 | math/half_log.cl
 8 | math/half_log10.cl
 9 | math/half_log2.cl
10 | math/half_recip.cl
11 | math/half_rsqrt.cl
12 | math/half_sqrt.cl
13 | math/nextafter.cl
14 | math/sqrt.cl
15 | 


--------------------------------------------------------------------------------
/amdgpu/lib/SOURCES_3.9:
--------------------------------------------------------------------------------
1 | shared/vload_half_helpers.ll
2 | shared/vstore_half_helpers.ll
3 | 


--------------------------------------------------------------------------------
/amdgpu/lib/SOURCES_4.0:
--------------------------------------------------------------------------------
1 | shared/vload_half_helpers.ll
2 | shared/vstore_half_helpers.ll
3 | 


--------------------------------------------------------------------------------
/amdgpu/lib/SOURCES_5.0:
--------------------------------------------------------------------------------
1 | shared/vload_half_helpers.ll
2 | shared/vstore_half_helpers.ll
3 | 


--------------------------------------------------------------------------------
/amdgpu/lib/math/half_exp.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 |  
3 | #define __CLC_FUNC exp
4 | #define __FLOAT_ONLY
5 | #define __CLC_BODY <half_native_unary.inc>
6 | #include <clc/math/gentype.inc>
7 | 


--------------------------------------------------------------------------------
/amdgpu/lib/math/half_exp10.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 |  
3 | #define __CLC_FUNC exp10
4 | #define __FLOAT_ONLY
5 | #define __CLC_BODY <half_native_unary.inc>
6 | #include <clc/math/gentype.inc>
7 | 


--------------------------------------------------------------------------------
/amdgpu/lib/math/half_exp2.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 |  
3 | #define __CLC_FUNC exp2
4 | #define __FLOAT_ONLY
5 | #define __CLC_BODY <half_native_unary.inc>
6 | #include <clc/math/gentype.inc>
7 | 


--------------------------------------------------------------------------------
/amdgpu/lib/math/half_log.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 |  
3 | #define __CLC_FUNC log
4 | #define __FLOAT_ONLY
5 | #define __CLC_BODY <half_native_unary.inc>
6 | #include <clc/math/gentype.inc>
7 | 


--------------------------------------------------------------------------------
/amdgpu/lib/math/half_log10.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 |  
3 | #define __CLC_FUNC log10
4 | #define __FLOAT_ONLY
5 | #define __CLC_BODY <half_native_unary.inc>
6 | #include <clc/math/gentype.inc>
7 | 


--------------------------------------------------------------------------------
/amdgpu/lib/math/half_log2.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 |  
3 | #define __CLC_FUNC log2
4 | #define __FLOAT_ONLY
5 | #define __CLC_BODY <half_native_unary.inc>
6 | #include <clc/math/gentype.inc>
7 | 


--------------------------------------------------------------------------------
/amdgpu/lib/math/half_native_unary.inc:
--------------------------------------------------------------------------------
 1 | #include <utils.h>
 2 | 
 3 | #define __CLC_HALF_FUNC(x) __CLC_CONCAT(half_, x)
 4 | #define __CLC_NATIVE_FUNC(x) __CLC_CONCAT(native_, x)
 5 | 
 6 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_HALF_FUNC(__CLC_FUNC)(__CLC_GENTYPE val) {
 7 | 	return __CLC_NATIVE_FUNC(__CLC_FUNC)(val);
 8 | }
 9 | 
10 | #undef __CLC_NATIVE_FUNC
11 | #undef __CLC_HALF_FUNC
12 | 


--------------------------------------------------------------------------------
/amdgpu/lib/math/half_recip.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 |  
3 | #define __CLC_FUNC recip
4 | #define __FLOAT_ONLY
5 | #define __CLC_BODY <half_native_unary.inc>
6 | #include <clc/math/gentype.inc>
7 | 


--------------------------------------------------------------------------------
/amdgpu/lib/math/half_rsqrt.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 |  
3 | #define __CLC_FUNC rsqrt
4 | #define __FLOAT_ONLY
5 | #define __CLC_BODY <half_native_unary.inc>
6 | #include <clc/math/gentype.inc>
7 | 


--------------------------------------------------------------------------------
/amdgpu/lib/math/half_sqrt.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 |  
3 | #define __CLC_FUNC sqrt
4 | #define __FLOAT_ONLY
5 | #define __CLC_BODY <half_native_unary.inc>
6 | #include <clc/math/gentype.inc>
7 | 


--------------------------------------------------------------------------------
/amdgpu/lib/math/native_exp.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_BODY <native_exp.inc>
4 | #define __FLOAT_ONLY
5 | #include <clc/math/gentype.inc>
6 | 


--------------------------------------------------------------------------------
/amdgpu/lib/math/native_exp.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE native_exp(__CLC_GENTYPE val) {
2 |   return native_exp2(val * M_LOG2E_F);
3 | }
4 | 


--------------------------------------------------------------------------------
/amdgpu/lib/math/native_log.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_BODY <native_log.inc>
4 | #define __FLOAT_ONLY
5 | #include <clc/math/gentype.inc>
6 | 


--------------------------------------------------------------------------------
/amdgpu/lib/math/native_log.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE native_log(__CLC_GENTYPE val) {
2 |   return native_log2(val) * (1.0f / M_LOG2E_F);
3 | }
4 | 


--------------------------------------------------------------------------------
/amdgpu/lib/math/native_log10.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_BODY <native_log10.inc>
4 | #define __FLOAT_ONLY
5 | #include <clc/math/gentype.inc>
6 | 


--------------------------------------------------------------------------------
/amdgpu/lib/math/native_log10.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE native_log10(__CLC_GENTYPE val) {
2 |   return native_log2(val) * (M_LN2_F / M_LN10_F);
3 | }
4 | 


--------------------------------------------------------------------------------
/amdgpu/lib/math/nextafter.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | #include "../lib/clcmacro.h"
 3 | #include <math/clc_nextafter.h>
 4 | 
 5 | _CLC_DEFINE_BINARY_BUILTIN(float, nextafter, __clc_nextafter, float, float)
 6 | 
 7 | #ifdef cl_khr_fp64
 8 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 9 | _CLC_DEFINE_BINARY_BUILTIN(double, nextafter, __clc_nextafter, double, double)
10 | #endif
11 | 
12 | #ifdef cl_khr_fp16
13 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable
14 | _CLC_DEFINE_BINARY_BUILTIN(half, nextafter, __clc_nextafter, half, half)
15 | #endif
16 | 


--------------------------------------------------------------------------------
/amdgpu/lib/shared/vload_half_helpers.ll:
--------------------------------------------------------------------------------
 1 | define float @__clc_vload_half_float_helper__private(half addrspace(0)* nocapture %ptr) nounwind alwaysinline {
 2 |   %data = load half, half addrspace(0)* %ptr
 3 |   %res = fpext half %data to float
 4 |   ret float %res
 5 | }
 6 | 
 7 | define float @__clc_vload_half_float_helper__global(half addrspace(1)* nocapture %ptr) nounwind alwaysinline {
 8 |   %data = load half, half addrspace(1)* %ptr
 9 |   %res = fpext half %data to float
10 |   ret float %res
11 | }
12 | 
13 | define float @__clc_vload_half_float_helper__local(half addrspace(3)* nocapture %ptr) nounwind alwaysinline {
14 |   %data = load half, half addrspace(3)* %ptr
15 |   %res = fpext half %data to float
16 |   ret float %res
17 | }
18 | 
19 | define float @__clc_vload_half_float_helper__constant(half addrspace(2)* nocapture %ptr) nounwind alwaysinline {
20 |   %data = load half, half addrspace(2)* %ptr
21 |   %res = fpext half %data to float
22 |   ret float %res
23 | }
24 | 


--------------------------------------------------------------------------------
/check_external_calls.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | FILE=$1
 4 | if [ ! -f $FILE ]; then
 5 | 	echo "ERROR: Not a file: $FILE"
 6 | 	exit 3
 7 | fi
 8 | ret=0
 9 | if [ "x$LLVM_CONFIG" = "x" ]; then
10 | 	LLVM_CONFIG=llvm-config
11 | 	echo 'WARNING: $LLVM_CONFIG not set, falling back to $PATH llvm-config'
12 | 	ret=2
13 | fi
14 | 
15 | 
16 | BIN_DIR=$($LLVM_CONFIG --bindir)
17 | DIS="$BIN_DIR/llvm-dis"
18 | if [ ! -x $DIS ]; then
19 | 	echo "ERROR: Disassembler '$DIS' is not executable"
20 | 	exit 3
21 | fi
22 | 
23 | TMP_FILE=$(mktemp)
24 | 
25 | # Check for calls. Calls to llvm intrinsics are OK
26 | $DIS < $FILE | grep ' call ' | grep -v '@llvm' > "$TMP_FILE"
27 | COUNT=$(wc -l < "$TMP_FILE")
28 | 
29 | if [ "$COUNT" -ne "0" ]; then
30 | 	echo "ERROR: $COUNT unresolved calls detected in $FILE"
31 | 	cat $TMP_FILE
32 | 	ret=1
33 | else
34 | 	echo "File $FILE is OK"
35 | fi
36 | exit $ret
37 | 


--------------------------------------------------------------------------------
/cmake/CMakeCLCCompiler.cmake.in:
--------------------------------------------------------------------------------
 1 | set(CMAKE_CLC_COMPILER "@CMAKE_CLC_COMPILER@")
 2 | set(CMAKE_CLC_COMPILER_LOADED 1)
 3 | 
 4 | set(CMAKE_CLC_SOURCE_FILE_EXTENSIONS cl)
 5 | set(CMAKE_CLC_OUTPUT_EXTENSION .bc)
 6 | set(CMAKE_CLC_OUTPUT_EXTENSION_REPLACE 1)
 7 | set(CMAKE_STATIC_LIBRARY_PREFIX_CLC "")
 8 | set(CMAKE_STATIC_LIBRARY_SUFFIX_CLC ".bc")
 9 | set(CMAKE_CLC_COMPILER_ENV_VAR "CLC_COMPILER")
10 | 


--------------------------------------------------------------------------------
/cmake/CMakeCLCInformation.cmake:
--------------------------------------------------------------------------------
 1 | if(NOT CMAKE_CLC_COMPILE_OBJECT)
 2 |   set(CMAKE_CLC_COMPILE_OBJECT
 3 |     "<CMAKE_CLC_COMPILER> <DEFINES> <INCLUDES> <FLAGS> -o <OBJECT> -c <SOURCE> -emit-llvm")
 4 | endif()
 5 | 
 6 | if(NOT CMAKE_CLC_CREATE_STATIC_LIBRARY)
 7 |   set(CMAKE_CLC_CREATE_STATIC_LIBRARY
 8 |     "<CMAKE_CLC_ARCHIVE> <LINK_FLAGS> -o <TARGET> <OBJECTS>")
 9 | endif()
10 | 
11 | set(CMAKE_INCLUDE_FLAG_CLC "-I")
12 | 


--------------------------------------------------------------------------------
/cmake/CMakeDetermineCLCCompiler.cmake:
--------------------------------------------------------------------------------
 1 | include(${CMAKE_ROOT}/Modules/CMakeDetermineCompiler.cmake)
 2 | 
 3 | if(NOT CMAKE_CLC_COMPILER)
 4 |   find_program(CMAKE_CLC_COMPILER NAMES clang)
 5 | endif()
 6 | mark_as_advanced(CMAKE_CLC_COMPILER)
 7 | 
 8 | if(NOT CMAKE_CLC_ARCHIVE)
 9 |   find_program(CMAKE_CLC_ARCHIVE NAMES llvm-link)
10 | endif()
11 | mark_as_advanced(CMAKE_CLC_ARCHIVE)
12 | 
13 | set(CMAKE_CLC_COMPILER_ENV_VAR "CLC_COMPILER")
14 | set(CMAKE_CLC_ARCHIVE_ENV_VAR "CLC_LINKER")
15 | find_file(clc_comp_in CMakeCLCCompiler.cmake.in PATHS ${CMAKE_ROOT}/Modules ${CMAKE_MODULE_PATH})
16 | # configure all variables set in this file
17 | configure_file(${clc_comp_in} ${CMAKE_PLATFORM_INFO_DIR}/CMakeCLCCompiler.cmake @ONLY)
18 | mark_as_advanced(clc_comp_in)
19 | 


--------------------------------------------------------------------------------
/cmake/CMakeDetermineLLAsmCompiler.cmake:
--------------------------------------------------------------------------------
 1 | include(${CMAKE_ROOT}/Modules/CMakeDetermineCompiler.cmake)
 2 | 
 3 | if(NOT CMAKE_LLAsm_PREPROCESSOR)
 4 |   find_program(CMAKE_LLAsm_PREPROCESSOR NAMES clang)
 5 | endif()
 6 | mark_as_advanced(CMAKE_LLAsm_PREPROCESSOR)
 7 | 
 8 | if(NOT CMAKE_LLAsm_COMPILER)
 9 |   find_program(CMAKE_LLAsm_COMPILER NAMES llvm-as)
10 | endif()
11 | mark_as_advanced(CMAKE_LLAsm_ASSEMBLER)
12 | 
13 | if(NOT CMAKE_LLAsm_ARCHIVE)
14 |   find_program(CMAKE_LLAsm_ARCHIVE NAMES llvm-link)
15 | endif()
16 | mark_as_advanced(CMAKE_LLAsm_ARCHIVE)
17 | 
18 | set(CMAKE_LLAsm_PREPROCESSOR_ENV_VAR "LL_PREPROCESSOR")
19 | set(CMAKE_LLAsm_COMPILER_ENV_VAR "LL_ASSEMBLER")
20 | set(CMAKE_LLAsm_ARCHIVE_ENV_VAR "LL_LINKER")
21 | find_file(ll_comp_in CMakeLLAsmCompiler.cmake.in PATHS ${CMAKE_ROOT}/Modules ${CMAKE_MODULE_PATH})
22 | # configure all variables set in this file
23 | configure_file(${ll_comp_in} ${CMAKE_PLATFORM_INFO_DIR}/CMakeLLAsmCompiler.cmake @ONLY)
24 | mark_as_advanced(ll_comp_in)
25 | 


--------------------------------------------------------------------------------
/cmake/CMakeLLAsmCompiler.cmake.in:
--------------------------------------------------------------------------------
 1 | set(CMAKE_LLAsm_PREPROCESSOR "@CMAKE_LLAsm_PREPROCESSOR@")
 2 | set(CMAKE_LLAsm_COMPILER "@CMAKE_LLAsm_COMPILER@")
 3 | set(CMAKE_LLAsm_ARCHIVE "@CMAKE_LLAsm_ARCHIVE@")
 4 | set(CMAKE_LLAsm_COMPILER_LOADED 1)
 5 | 
 6 | set(CMAKE_LLAsm_SOURCE_FILE_EXTENSIONS ll)
 7 | set(CMAKE_LLAsm_OUTPUT_EXTENSION .bc)
 8 | set(CMAKE_LLAsm_OUTPUT_EXTENSION_REPLACE 1)
 9 | set(CMAKE_STATIC_LIBRARY_PREFIX_LLAsm "")
10 | set(CMAKE_STATIC_LIBRARY_SUFFIX_LLAsm ".bc")
11 | 


--------------------------------------------------------------------------------
/cmake/CMakeLLAsmInformation.cmake:
--------------------------------------------------------------------------------
 1 | if(NOT CMAKE_LLAsm_COMPILE_OBJECT)
 2 |   set(CMAKE_LLAsm_COMPILE_OBJECT
 3 | 	  "${CMAKE_LLAsm_PREPROCESSOR} -E -P  <DEFINES> <INCLUDES> <FLAGS> -x cl  <SOURCE> -o - | <CMAKE_LLAsm_COMPILER> -o <OBJECT> ")
 4 | endif()
 5 | 
 6 | if(NOT CMAKE_LLAsm_CREATE_STATIC_LIBRARY)
 7 |   set(CMAKE_LLAsm_CREATE_STATIC_LIBRARY
 8 |     "<CMAKE_LLAsm_ARCHIVE> <LINK_FLAGS> -o <TARGET> <OBJECTS>")
 9 | endif()
10 | 
11 | set(CMAKE_INCLUDE_FLAG_LLAsm "-I")
12 | 


--------------------------------------------------------------------------------
/compile-test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | clang -target nvptx--nvidiacl -Iptx-nvidiacl/include -Igeneric/include -Xclang -mlink-bitcode-file -Xclang nvptx--nvidiacl/lib/builtins.bc -include clc/clc.h -Dcl_clang_storage_class_specifiers -Dcl_khr_fp64 "$@"
4 | 


--------------------------------------------------------------------------------
/generic/include/clc/async/async_work_group_copy.h:
--------------------------------------------------------------------------------
 1 | #define __CLC_DST_ADDR_SPACE local
 2 | #define __CLC_SRC_ADDR_SPACE global
 3 | #define __CLC_BODY <clc/async/async_work_group_copy.inc>
 4 | #include <clc/async/gentype.inc>
 5 | #undef __CLC_DST_ADDR_SPACE
 6 | #undef __CLC_SRC_ADDR_SPACE
 7 | #undef __CLC_BODY
 8 | 
 9 | #define __CLC_DST_ADDR_SPACE global
10 | #define __CLC_SRC_ADDR_SPACE local
11 | #define __CLC_BODY <clc/async/async_work_group_copy.inc>
12 | #include <clc/async/gentype.inc>
13 | #undef __CLC_DST_ADDR_SPACE
14 | #undef __CLC_SRC_ADDR_SPACE
15 | #undef __CLC_BODY
16 | 


--------------------------------------------------------------------------------
/generic/include/clc/async/async_work_group_copy.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL event_t async_work_group_copy(
2 |   __CLC_DST_ADDR_SPACE __CLC_GENTYPE *dst,
3 |   const __CLC_SRC_ADDR_SPACE __CLC_GENTYPE *src,
4 |   size_t num_gentypes,
5 |   event_t event);
6 | 


--------------------------------------------------------------------------------
/generic/include/clc/async/async_work_group_strided_copy.h:
--------------------------------------------------------------------------------
 1 | #define __CLC_DST_ADDR_SPACE local
 2 | #define __CLC_SRC_ADDR_SPACE global
 3 | #define __CLC_BODY <clc/async/async_work_group_strided_copy.inc>
 4 | #include <clc/async/gentype.inc>
 5 | #undef __CLC_DST_ADDR_SPACE
 6 | #undef __CLC_SRC_ADDR_SPACE
 7 | #undef __CLC_BODY
 8 | 
 9 | #define __CLC_DST_ADDR_SPACE global
10 | #define __CLC_SRC_ADDR_SPACE local
11 | #define __CLC_BODY <clc/async/async_work_group_strided_copy.inc>
12 | #include <clc/async/gentype.inc>
13 | #undef __CLC_DST_ADDR_SPACE
14 | #undef __CLC_SRC_ADDR_SPACE
15 | #undef __CLC_BODY
16 | 


--------------------------------------------------------------------------------
/generic/include/clc/async/async_work_group_strided_copy.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL event_t async_work_group_strided_copy(
2 |   __CLC_DST_ADDR_SPACE __CLC_GENTYPE *dst,
3 |   const __CLC_SRC_ADDR_SPACE __CLC_GENTYPE *src,
4 |   size_t num_gentypes,
5 |   size_t stride,
6 |   event_t event);
7 | 


--------------------------------------------------------------------------------
/generic/include/clc/async/prefetch.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/async/prefetch.inc>
2 | #include <clc/async/gentype.inc>
3 | #undef __CLC_BODY
4 | 


--------------------------------------------------------------------------------
/generic/include/clc/async/prefetch.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL void prefetch(const global __CLC_GENTYPE *p, size_t num_gentypes);
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/async/wait_group_events.h:
--------------------------------------------------------------------------------
1 | void wait_group_events(int num_events, event_t *event_list);
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/atom_decl_int32.inc:
--------------------------------------------------------------------------------
 1 | #define __CLC_DECLARE_ATOM(ADDRSPACE, TYPE) \
 2 | 	_CLC_OVERLOAD _CLC_DECL TYPE __CLC_FUNCTION (volatile ADDRSPACE TYPE *, TYPE);
 3 | 
 4 | __CLC_DECLARE_ATOM(__CLC_ADDRESS_SPACE, int)
 5 | __CLC_DECLARE_ATOM(__CLC_ADDRESS_SPACE, uint)
 6 | 
 7 | #undef __CLC_DECLARE_ATOM
 8 | 
 9 | #undef __CLC_FUNCTION
10 | #undef __CLC_ADDRESS_SPACE
11 | 


--------------------------------------------------------------------------------
/generic/include/clc/atom_decl_int64.inc:
--------------------------------------------------------------------------------
 1 | #define __CLC_DECLARE_ATOM(ADDRSPACE, TYPE) \
 2 | 	_CLC_OVERLOAD _CLC_DECL TYPE __CLC_FUNCTION (volatile ADDRSPACE TYPE *, TYPE);
 3 | 
 4 | __CLC_DECLARE_ATOM(local, long)
 5 | __CLC_DECLARE_ATOM(local, ulong)
 6 | __CLC_DECLARE_ATOM(global, long)
 7 | __CLC_DECLARE_ATOM(global, ulong)
 8 | 
 9 | #undef __CLC_DECLARE_ATOM
10 | 
11 | #undef __CLC_FUNCTION
12 | 


--------------------------------------------------------------------------------
/generic/include/clc/atomic/atomic_add.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION atomic_add
2 | #include <clc/atomic/atomic_decl.inc>
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/atomic/atomic_and.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION atomic_and
2 | #include <clc/atomic/atomic_decl.inc>
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/atomic/atomic_cmpxchg.h:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL int atomic_cmpxchg (volatile local int *, int, int);
2 | _CLC_OVERLOAD _CLC_DECL int atomic_cmpxchg (volatile global int *, int, int);
3 | _CLC_OVERLOAD _CLC_DECL uint atomic_cmpxchg (volatile local uint *, uint, uint);
4 | _CLC_OVERLOAD _CLC_DECL uint atomic_cmpxchg (volatile global uint *, uint, uint);
5 | 


--------------------------------------------------------------------------------
/generic/include/clc/atomic/atomic_dec.h:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL int atomic_dec (volatile local int *);
2 | _CLC_OVERLOAD _CLC_DECL int atomic_dec (volatile global int *);
3 | _CLC_OVERLOAD _CLC_DECL uint atomic_dec (volatile local uint *);
4 | _CLC_OVERLOAD _CLC_DECL uint atomic_dec (volatile global uint *);
5 | 


--------------------------------------------------------------------------------
/generic/include/clc/atomic/atomic_decl.inc:
--------------------------------------------------------------------------------
 1 | #define __CLC_DECLARE_ATOMIC(ADDRSPACE, TYPE) \
 2 | 	_CLC_OVERLOAD _CLC_DECL TYPE __CLC_FUNCTION (volatile ADDRSPACE TYPE *, TYPE);
 3 | 
 4 | #define __CLC_DECLARE_ATOMIC_ADDRSPACE(TYPE) \
 5 | 	__CLC_DECLARE_ATOMIC(global, TYPE) \
 6 | 	__CLC_DECLARE_ATOMIC(local, TYPE)
 7 | 
 8 | __CLC_DECLARE_ATOMIC_ADDRSPACE(int)
 9 | __CLC_DECLARE_ATOMIC_ADDRSPACE(uint)
10 | 
11 | #undef __CLC_DECLARE_ATOMIC_ADDRSPACE
12 | #undef __CLC_DECLARE_ATOMIC
13 | 
14 | #undef __CLC_FUNCTION
15 | 


--------------------------------------------------------------------------------
/generic/include/clc/atomic/atomic_inc.h:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL int atomic_inc (volatile local int *);
2 | _CLC_OVERLOAD _CLC_DECL int atomic_inc (volatile global int *);
3 | _CLC_OVERLOAD _CLC_DECL uint atomic_inc (volatile local uint *);
4 | _CLC_OVERLOAD _CLC_DECL uint atomic_inc (volatile global uint *);
5 | 


--------------------------------------------------------------------------------
/generic/include/clc/atomic/atomic_max.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION atomic_max
2 | #include <clc/atomic/atomic_decl.inc>
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/atomic/atomic_min.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION atomic_min
2 | #include <clc/atomic/atomic_decl.inc>
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/atomic/atomic_or.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION atomic_or
2 | #include <clc/atomic/atomic_decl.inc>
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/atomic/atomic_sub.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION atomic_sub
2 | #include <clc/atomic/atomic_decl.inc>
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/atomic/atomic_xchg.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION atomic_xchg
2 | 
3 | _CLC_OVERLOAD _CLC_DECL float __CLC_FUNCTION (volatile local float *, float);
4 | _CLC_OVERLOAD _CLC_DECL float __CLC_FUNCTION (volatile global float *, float);
5 | #include <clc/atomic/atomic_decl.inc>
6 | 


--------------------------------------------------------------------------------
/generic/include/clc/atomic/atomic_xor.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION atomic_xor
2 | #include <clc/atomic/atomic_decl.inc>
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/cl_khr_global_int32_base_atomics/atom_add.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION atom_add
2 | #define __CLC_ADDRESS_SPACE global
3 | #include <clc/atom_decl_int32.inc>
4 | 


--------------------------------------------------------------------------------
/generic/include/clc/cl_khr_global_int32_base_atomics/atom_cmpxchg.h:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL int atom_cmpxchg(volatile global int *p, int cmp, int val);
2 | _CLC_OVERLOAD _CLC_DECL unsigned int atom_cmpxchg(volatile global unsigned int *p, unsigned int cmp, unsigned int val);
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/cl_khr_global_int32_base_atomics/atom_dec.h:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL int atom_dec(volatile global int *p);
2 | _CLC_OVERLOAD _CLC_DECL unsigned int atom_dec(volatile global unsigned int *p);
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/cl_khr_global_int32_base_atomics/atom_inc.h:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL int atom_inc(volatile global int *p);
2 | _CLC_OVERLOAD _CLC_DECL unsigned int atom_inc(volatile global unsigned int *p);
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/cl_khr_global_int32_base_atomics/atom_sub.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION atom_sub
2 | #define __CLC_ADDRESS_SPACE global
3 | #include <clc/atom_decl_int32.inc>
4 | 


--------------------------------------------------------------------------------
/generic/include/clc/cl_khr_global_int32_base_atomics/atom_xchg.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION atom_xchg
2 | #define __CLC_ADDRESS_SPACE global
3 | #include <clc/atom_decl_int32.inc>
4 | 


--------------------------------------------------------------------------------
/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_and.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION atom_and
2 | #define __CLC_ADDRESS_SPACE global
3 | #include <clc/atom_decl_int32.inc>
4 | 


--------------------------------------------------------------------------------
/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_max.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION atom_max
2 | #define __CLC_ADDRESS_SPACE global
3 | #include <clc/atom_decl_int32.inc>
4 | 


--------------------------------------------------------------------------------
/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_min.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION atom_min
2 | #define __CLC_ADDRESS_SPACE global
3 | #include <clc/atom_decl_int32.inc>
4 | 


--------------------------------------------------------------------------------
/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_or.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION atom_or
2 | #define __CLC_ADDRESS_SPACE global
3 | #include <clc/atom_decl_int32.inc>
4 | 


--------------------------------------------------------------------------------
/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_xor.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION atom_xor
2 | #define __CLC_ADDRESS_SPACE global
3 | #include <clc/atom_decl_int32.inc>
4 | 


--------------------------------------------------------------------------------
/generic/include/clc/cl_khr_int64_base_atomics/atom_add.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION atom_add
2 | #include <clc/atom_decl_int64.inc>
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/cl_khr_int64_base_atomics/atom_cmpxchg.h:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL long atom_cmpxchg(volatile global long *p, long cmp, long val);
2 | _CLC_OVERLOAD _CLC_DECL unsigned long atom_cmpxchg(volatile global unsigned long *p, unsigned long cmp, unsigned long val);
3 | _CLC_OVERLOAD _CLC_DECL long atom_cmpxchg(volatile local long *p, long cmp, long val);
4 | _CLC_OVERLOAD _CLC_DECL unsigned long atom_cmpxchg(volatile local unsigned long *p, unsigned long cmp, unsigned long val);
5 | 


--------------------------------------------------------------------------------
/generic/include/clc/cl_khr_int64_base_atomics/atom_dec.h:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL long atom_dec(volatile global long *p);
2 | _CLC_OVERLOAD _CLC_DECL unsigned long atom_dec(volatile global unsigned long *p);
3 | _CLC_OVERLOAD _CLC_DECL long atom_dec(volatile local long *p);
4 | _CLC_OVERLOAD _CLC_DECL unsigned long atom_dec(volatile local unsigned long *p);
5 | 


--------------------------------------------------------------------------------
/generic/include/clc/cl_khr_int64_base_atomics/atom_inc.h:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL long atom_inc(volatile global long *p);
2 | _CLC_OVERLOAD _CLC_DECL unsigned long atom_inc(volatile global unsigned long *p);
3 | _CLC_OVERLOAD _CLC_DECL long atom_inc(volatile local long *p);
4 | _CLC_OVERLOAD _CLC_DECL unsigned long atom_inc(volatile local unsigned long *p);
5 | 


--------------------------------------------------------------------------------
/generic/include/clc/cl_khr_int64_base_atomics/atom_sub.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION atom_sub
2 | #include <clc/atom_decl_int64.inc>
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/cl_khr_int64_base_atomics/atom_xchg.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION atom_xchg
2 | #include <clc/atom_decl_int64.inc>
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/cl_khr_int64_extended_atomics/atom_and.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION atom_and
2 | #include <clc/atom_decl_int64.inc>
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/cl_khr_int64_extended_atomics/atom_max.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION atom_max
2 | #include <clc/atom_decl_int64.inc>
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/cl_khr_int64_extended_atomics/atom_min.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION atom_min
2 | #include <clc/atom_decl_int64.inc>
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/cl_khr_int64_extended_atomics/atom_or.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION atom_or
2 | #include <clc/atom_decl_int64.inc>
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/cl_khr_int64_extended_atomics/atom_xor.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION atom_xor
2 | #include <clc/atom_decl_int64.inc>
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/cl_khr_local_int32_base_atomics/atom_add.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION atom_add
2 | #define __CLC_ADDRESS_SPACE local
3 | #include <clc/atom_decl_int32.inc>
4 | 


--------------------------------------------------------------------------------
/generic/include/clc/cl_khr_local_int32_base_atomics/atom_cmpxchg.h:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL int atom_cmpxchg(volatile local int *p, int cmp, int val);
2 | _CLC_OVERLOAD _CLC_DECL unsigned int atom_cmpxchg(volatile local unsigned int *p, unsigned int cmp, unsigned int val);
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/cl_khr_local_int32_base_atomics/atom_dec.h:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL int atom_dec(volatile local int *p);
2 | _CLC_OVERLOAD _CLC_DECL unsigned int atom_dec(volatile local unsigned int *p);
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/cl_khr_local_int32_base_atomics/atom_inc.h:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL int atom_inc(volatile local int *p);
2 | _CLC_OVERLOAD _CLC_DECL unsigned int atom_inc(volatile local unsigned int *p);
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/cl_khr_local_int32_base_atomics/atom_sub.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION atom_sub
2 | #define __CLC_ADDRESS_SPACE local
3 | #include <clc/atom_decl_int32.inc>
4 | 


--------------------------------------------------------------------------------
/generic/include/clc/cl_khr_local_int32_base_atomics/atom_xchg.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION atom_xchg
2 | #define __CLC_ADDRESS_SPACE local
3 | #include <clc/atom_decl_int32.inc>
4 | 


--------------------------------------------------------------------------------
/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_and.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION atom_and
2 | #define __CLC_ADDRESS_SPACE local
3 | #include <clc/atom_decl_int32.inc>
4 | 


--------------------------------------------------------------------------------
/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_max.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION atom_max
2 | #define __CLC_ADDRESS_SPACE local
3 | #include <clc/atom_decl_int32.inc>
4 | 


--------------------------------------------------------------------------------
/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_min.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION atom_min
2 | #define __CLC_ADDRESS_SPACE local
3 | #include <clc/atom_decl_int32.inc>
4 | 


--------------------------------------------------------------------------------
/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_or.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION atom_or
2 | #define __CLC_ADDRESS_SPACE local
3 | #include <clc/atom_decl_int32.inc>
4 | 


--------------------------------------------------------------------------------
/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_xor.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION atom_xor
2 | #define __CLC_ADDRESS_SPACE local
3 | #include <clc/atom_decl_int32.inc>
4 | 


--------------------------------------------------------------------------------
/generic/include/clc/clcfunc.h:
--------------------------------------------------------------------------------
1 | #define _CLC_OVERLOAD __attribute__((overloadable))
2 | #define _CLC_DECL
3 | #define _CLC_DEF __attribute__((always_inline))
4 | #define _CLC_INLINE __attribute__((always_inline)) inline
5 | 


--------------------------------------------------------------------------------
/generic/include/clc/clcmacros.h:
--------------------------------------------------------------------------------
 1 | /* 6.9 Preprocessor Directives and Macros
 2 |  * Some of these are handled by clang or passed by clover */
 3 | #if __OPENCL_VERSION__ >= 110
 4 | #define CLC_VERSION_1_0 100
 5 | #define CLC_VERSION_1_1 110
 6 | #endif
 7 | 
 8 | #if __OPENCL_VERSION__ >= 120
 9 | #define CLC_VERSION_1_2 120
10 | #endif
11 | 
12 | #define NULL ((void*)0)
13 | 
14 | #define __kernel_exec(X, typen) __kernel \
15 |                                 __attribute__((work_group_size_hint(X, 1, 1))) \
16 |                                 __attribute__((vec_type_hint(typen)))
17 | 
18 | #define kernel_exec(X, typen) __kernel_exec(X, typen)
19 | 


--------------------------------------------------------------------------------
/generic/include/clc/common/mix.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/common/mix.inc>
2 | #include <clc/math/gentype.inc>
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/common/mix.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mix(__CLC_GENTYPE a, __CLC_GENTYPE b, __CLC_GENTYPE c);
2 | 
3 | #ifndef __CLC_SCALAR
4 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mix(__CLC_GENTYPE a, __CLC_GENTYPE b, __CLC_SCALAR_GENTYPE c);
5 | #endif
6 | 


--------------------------------------------------------------------------------
/generic/include/clc/common/sign.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION sign
2 | #define __CLC_BODY <clc/math/unary_decl.inc>
3 | #include <clc/math/gentype.inc>
4 | #undef __CLC_FUNCTION
5 | #undef __CLC_BODY
6 | 


--------------------------------------------------------------------------------
/generic/include/clc/explicit_fence/explicit_memory_fence.h:
--------------------------------------------------------------------------------
1 | _CLC_DECL void mem_fence(cl_mem_fence_flags flags);
2 | _CLC_DECL void read_mem_fence(cl_mem_fence_flags flags);
3 | _CLC_DECL void write_mem_fence(cl_mem_fence_flags flags);
4 | 


--------------------------------------------------------------------------------
/generic/include/clc/geometric/cross.h:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL float3 cross(float3 p0, float3 p1);
2 | _CLC_OVERLOAD _CLC_DECL float4 cross(float4 p0, float4 p1);
3 | 
4 | #ifdef cl_khr_fp64
5 | _CLC_OVERLOAD _CLC_DECL double3 cross(double3 p0, double3 p1);
6 | _CLC_OVERLOAD _CLC_DECL double4 cross(double4 p0, double4 p1);
7 | #endif
8 | 


--------------------------------------------------------------------------------
/generic/include/clc/geometric/distance.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/geometric/distance.inc>
2 | #include <clc/geometric/floatn.inc>
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/geometric/dot.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/geometric/dot.inc>
2 | #include <clc/geometric/floatn.inc>
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/geometric/dot.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL __CLC_FLOAT dot(__CLC_FLOATN p0, __CLC_FLOATN p1);
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/geometric/length.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/geometric/length.inc>
2 | #include <clc/geometric/floatn.inc>
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/geometric/length.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL __CLC_FLOAT length(__CLC_FLOATN p0);
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/geometric/normalize.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/geometric/normalize.inc>
2 | #include <clc/geometric/floatn.inc>
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/geometric/normalize.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL __CLC_FLOATN normalize(__CLC_FLOATN p);
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/integer/abs.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/integer/abs.inc>
2 | #include <clc/integer/gentype.inc>
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/integer/abs.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL __CLC_U_GENTYPE abs(__CLC_GENTYPE x);
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/integer/abs_diff.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/integer/abs_diff.inc>
2 | #include <clc/integer/gentype.inc>
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/integer/abs_diff.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL __CLC_U_GENTYPE abs_diff(__CLC_GENTYPE x, __CLC_GENTYPE y);
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/integer/add_sat.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/integer/add_sat.inc>
2 | #include <clc/integer/gentype.inc>
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/integer/add_sat.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE add_sat(__CLC_GENTYPE x, __CLC_GENTYPE y);
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/integer/clz.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/integer/clz.inc>
2 | #include <clc/integer/gentype.inc>
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/integer/clz.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE clz(__CLC_GENTYPE x);
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/integer/definitions.h:
--------------------------------------------------------------------------------
 1 | #define CHAR_BIT 8
 2 | #define INT_MAX 2147483647
 3 | #define INT_MIN (-2147483647 - 1)
 4 | #define LONG_MAX  0x7fffffffffffffffL
 5 | #define LONG_MIN (-0x7fffffffffffffffL - 1)
 6 | #define CHAR_MAX SCHAR_MAX
 7 | #define CHAR_MIN SCHAR_MIN
 8 | #define SCHAR_MAX 127
 9 | #define SCHAR_MIN (-127 - 1)
10 | #define SHRT_MAX 32767
11 | #define SHRT_MIN (-32767 - 1)
12 | #define UCHAR_MAX 255
13 | #define USHRT_MAX 65535
14 | #define UINT_MAX 0xffffffff
15 | #define ULONG_MAX 0xffffffffffffffffUL
16 | 


--------------------------------------------------------------------------------
/generic/include/clc/integer/hadd.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/integer/hadd.inc>
2 | #include <clc/integer/gentype.inc>
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/integer/hadd.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE hadd(__CLC_GENTYPE x, __CLC_GENTYPE y);
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/integer/integer-gentype.inc:
--------------------------------------------------------------------------------
 1 | #define __CLC_GENTYPE int
 2 | #include __CLC_BODY
 3 | #undef __CLC_GENTYPE
 4 | 
 5 | #define __CLC_GENTYPE int2
 6 | #include __CLC_BODY
 7 | #undef __CLC_GENTYPE
 8 | 
 9 | #define __CLC_GENTYPE int3
10 | #include __CLC_BODY
11 | #undef __CLC_GENTYPE
12 | 
13 | #define __CLC_GENTYPE int4
14 | #include __CLC_BODY
15 | #undef __CLC_GENTYPE
16 | 
17 | #define __CLC_GENTYPE int8
18 | #include __CLC_BODY
19 | #undef __CLC_GENTYPE
20 | 
21 | #define __CLC_GENTYPE int16
22 | #include __CLC_BODY
23 | #undef __CLC_GENTYPE
24 | 
25 | #define __CLC_GENTYPE uint
26 | #include __CLC_BODY
27 | #undef __CLC_GENTYPE
28 | 
29 | #define __CLC_GENTYPE uint2
30 | #include __CLC_BODY
31 | #undef __CLC_GENTYPE
32 | 
33 | #define __CLC_GENTYPE uint3
34 | #include __CLC_BODY
35 | #undef __CLC_GENTYPE
36 | 
37 | #define __CLC_GENTYPE uint4
38 | #include __CLC_BODY
39 | #undef __CLC_GENTYPE
40 | 
41 | #define __CLC_GENTYPE uint8
42 | #include __CLC_BODY
43 | #undef __CLC_GENTYPE
44 | 
45 | #define __CLC_GENTYPE uint16
46 | #include __CLC_BODY
47 | #undef __CLC_GENTYPE
48 | 


--------------------------------------------------------------------------------
/generic/include/clc/integer/mad24.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/integer/mad24.inc>
2 | #include <clc/integer/integer-gentype.inc>
3 | #undef __CLC_BODY
4 | 


--------------------------------------------------------------------------------
/generic/include/clc/integer/mad24.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mad24(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z);
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/integer/mad_hi.h:
--------------------------------------------------------------------------------
1 | #define mad_hi(a, b, c) (mul_hi((a),(b))+(c))
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/integer/mad_sat.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/integer/mad_sat.inc>
2 | #include <clc/integer/gentype.inc>
3 | #undef __CLC_BODY
4 | 


--------------------------------------------------------------------------------
/generic/include/clc/integer/mad_sat.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mad_sat(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z);
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/integer/mul24.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/integer/mul24.inc>
2 | #include <clc/integer/integer-gentype.inc>
3 | #undef __CLC_BODY
4 | 


--------------------------------------------------------------------------------
/generic/include/clc/integer/mul24.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mul24(__CLC_GENTYPE x, __CLC_GENTYPE y);
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/integer/mul_hi.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/integer/mul_hi.inc>
2 | #include <clc/integer/gentype.inc>
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/integer/mul_hi.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mul_hi(__CLC_GENTYPE x, __CLC_GENTYPE y);
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/integer/popcount.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION popcount
2 | #define __CLC_BODY <clc/integer/unary.inc>
3 | #include <clc/integer/gentype.inc>
4 | #undef __CLC_FUNCTION
5 | #undef __CLC_BODY
6 | 


--------------------------------------------------------------------------------
/generic/include/clc/integer/rhadd.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/integer/rhadd.inc>
2 | #include <clc/integer/gentype.inc>
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/integer/rhadd.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE rhadd(__CLC_GENTYPE x, __CLC_GENTYPE y);
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/integer/rotate.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/integer/rotate.inc>
2 | #include <clc/integer/gentype.inc>
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/integer/rotate.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE rotate(__CLC_GENTYPE x, __CLC_GENTYPE y);
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/integer/sub_sat.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/integer/sub_sat.inc>
2 | #include <clc/integer/gentype.inc>
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/integer/sub_sat.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sub_sat(__CLC_GENTYPE x, __CLC_GENTYPE y);
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/integer/unary.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x);
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/integer/upsample.h:
--------------------------------------------------------------------------------
 1 | #define __CLC_UPSAMPLE_DECL(BGENTYPE, GENTYPE, UGENTYPE) \
 2 |     _CLC_OVERLOAD _CLC_DECL BGENTYPE upsample(GENTYPE hi, UGENTYPE lo);
 3 | 
 4 | #define __CLC_UPSAMPLE_VEC(BGENTYPE, GENTYPE, UGENTYPE) \
 5 |     __CLC_UPSAMPLE_DECL(BGENTYPE, GENTYPE, UGENTYPE) \
 6 |     __CLC_UPSAMPLE_DECL(BGENTYPE##2, GENTYPE##2, UGENTYPE##2) \
 7 |     __CLC_UPSAMPLE_DECL(BGENTYPE##3, GENTYPE##3, UGENTYPE##3) \
 8 |     __CLC_UPSAMPLE_DECL(BGENTYPE##4, GENTYPE##4, UGENTYPE##4) \
 9 |     __CLC_UPSAMPLE_DECL(BGENTYPE##8, GENTYPE##8, UGENTYPE##8) \
10 |     __CLC_UPSAMPLE_DECL(BGENTYPE##16, GENTYPE##16, UGENTYPE##16) \
11 | 
12 | #define __CLC_UPSAMPLE_TYPES() \
13 |     __CLC_UPSAMPLE_VEC(short, char, uchar) \
14 |     __CLC_UPSAMPLE_VEC(ushort, uchar, uchar) \
15 |     __CLC_UPSAMPLE_VEC(int, short, ushort) \
16 |     __CLC_UPSAMPLE_VEC(uint, ushort, ushort) \
17 |     __CLC_UPSAMPLE_VEC(long, int, uint) \
18 |     __CLC_UPSAMPLE_VEC(ulong, uint, uint) \
19 | 
20 | __CLC_UPSAMPLE_TYPES()
21 | 
22 | #undef __CLC_UPSAMPLE_TYPES
23 | #undef __CLC_UPSAMPLE_DECL
24 | #undef __CLC_UPSAMPLE_VEC
25 | 
26 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/acos.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/math/unary_decl.inc>
2 | #define __CLC_FUNCTION acos
3 | 
4 | #include <clc/math/gentype.inc>
5 | 
6 | #undef __CLC_BODY
7 | #undef __CLC_FUNCTION
8 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/asin.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/math/unary_decl.inc>
2 | #define __CLC_FUNCTION asin
3 | 
4 | #include <clc/math/gentype.inc>
5 | 
6 | #undef __CLC_BODY
7 | #undef __CLC_FUNCTION
8 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/binary_decl.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE a, __CLC_GENTYPE b);
2 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE a, __CLC_SCALAR_GENTYPE b);
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/binary_decl_tt.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE a, __CLC_GENTYPE b);
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/ceil.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/math/unary_decl.inc>
2 | #define __CLC_FUNCTION ceil
3 | 
4 | #include <clc/math/gentype.inc>
5 | 
6 | #undef __CLC_BODY
7 | #undef __CLC_FUNCTION
8 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/copysign.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION copysign
2 | #define __CLC_BODY <clc/math/binary_decl_tt.inc>
3 | 
4 | #include <clc/math/gentype.inc>
5 | 
6 | #undef __CLC_BODY
7 | #undef __CLC_FUNCTION
8 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/cos.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/math/unary_decl.inc>
2 | #define __CLC_FUNCTION cos
3 | 
4 | #include <clc/math/gentype.inc>
5 | 
6 | #undef __CLC_BODY
7 | #undef __CLC_FUNCTION
8 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/cospi.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/math/unary_decl.inc>
2 | #define __CLC_FUNCTION cospi
3 | 
4 | #include <clc/math/gentype.inc>
5 | 
6 | #undef __CLC_BODY
7 | #undef __CLC_FUNCTION
8 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/erf.h:
--------------------------------------------------------------------------------
 1 | #undef erfc
 2 | 
 3 | #define __CLC_BODY <clc/math/unary_decl.inc>
 4 | #define __CLC_FUNCTION erf
 5 | 
 6 | #include <clc/math/gentype.inc>
 7 | 
 8 | #undef __CLC_BODY
 9 | #undef __CLC_FUNCTION
10 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/erfc.h:
--------------------------------------------------------------------------------
 1 | #undef erfc
 2 | 
 3 | #define __CLC_BODY <clc/math/unary_decl.inc>
 4 | #define __CLC_FUNCTION erfc
 5 | 
 6 | #include <clc/math/gentype.inc>
 7 | 
 8 | #undef __CLC_BODY
 9 | #undef __CLC_FUNCTION
10 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/exp.h:
--------------------------------------------------------------------------------
 1 | #undef exp
 2 | 
 3 | #define __CLC_BODY <clc/math/unary_decl.inc>
 4 | #define __CLC_FUNCTION exp
 5 | 
 6 | #include <clc/math/gentype.inc>
 7 | 
 8 | #undef __CLC_BODY
 9 | #undef __CLC_FUNCTION
10 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/exp10.h:
--------------------------------------------------------------------------------
 1 | #undef exp10
 2 | 
 3 | #define __CLC_BODY <clc/math/unary_decl.inc>
 4 | #define __CLC_FUNCTION exp10
 5 | 
 6 | #include <clc/math/gentype.inc>
 7 | 
 8 | #undef __CLC_BODY
 9 | #undef __CLC_FUNCTION
10 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/expm1.h:
--------------------------------------------------------------------------------
 1 | #undef exp
 2 | 
 3 | #define __CLC_BODY <clc/math/unary_decl.inc>
 4 | #define __CLC_FUNCTION expm1
 5 | 
 6 | #include <clc/math/gentype.inc>
 7 | 
 8 | #undef __CLC_BODY
 9 | #undef __CLC_FUNCTION
10 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/fabs.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/math/unary_decl.inc>
2 | #define __CLC_FUNCTION fabs
3 | 
4 | #include <clc/math/gentype.inc>
5 | 
6 | #undef __CLC_BODY
7 | #undef __CLC_FUNCTION
8 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/fdim.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION fdim
2 | #define __CLC_BODY <clc/math/binary_decl_tt.inc>
3 | 
4 | #include <clc/math/gentype.inc>
5 | 
6 | #undef __CLC_BODY
7 | #undef __CLC_FUNCTION
8 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/floor.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/math/unary_decl.inc>
2 | #define __CLC_FUNCTION floor
3 | 
4 | #include <clc/math/gentype.inc>
5 | 
6 | #undef __CLC_BODY
7 | #undef __CLC_FUNCTION
8 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/fma.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/math/ternary_decl.inc>
2 | #define __CLC_FUNCTION fma
3 | 
4 | #include <clc/math/gentype.inc>
5 | 
6 | #undef __CLC_BODY
7 | #undef __CLC_FUNCTION
8 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/fmax.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/math/binary_decl.inc>
2 | #define __CLC_FUNCTION fmax
3 | 
4 | #include <clc/math/gentype.inc>
5 | 
6 | #undef __CLC_BODY
7 | #undef __CLC_FUNCTION
8 | 
9 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/fmin.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/math/binary_decl.inc>
2 | #define __CLC_FUNCTION fmin
3 | 
4 | #include <clc/math/gentype.inc>
5 | 
6 | #undef __CLC_BODY
7 | #undef __CLC_FUNCTION
8 | 
9 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/fmod.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION fmod
2 | #define __CLC_BODY <clc/math/binary_decl_tt.inc>
3 | #include <clc/math/gentype.inc>
4 | #undef __CLC_FUNCTION
5 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/frexp.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/math/frexp.inc>
2 | #include <clc/math/gentype.inc>
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/frexp.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE frexp(__CLC_GENTYPE x, global __CLC_INTN *iptr);
2 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE frexp(__CLC_GENTYPE x, local __CLC_INTN *iptr);
3 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE frexp(__CLC_GENTYPE x, private __CLC_INTN *iptr);
4 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/half_cos.h:
--------------------------------------------------------------------------------
 1 | #define __CLC_BODY <clc/math/unary_decl.inc>
 2 | #define __CLC_FUNCTION half_cos
 3 | #define __FLOAT_ONLY
 4 | 
 5 | #include <clc/math/gentype.inc>
 6 | 
 7 | #undef __FLOAT_ONLY
 8 | #undef __CLC_BODY
 9 | #undef __CLC_FUNCTION
10 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/half_divide.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/math/binary_decl_tt.inc>
2 | #define __CLC_FUNCTION half_divide
3 | 
4 | #include <clc/math/gentype.inc>
5 | 
6 | #undef __CLC_BODY
7 | #undef __CLC_FUNCTION
8 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/half_exp.h:
--------------------------------------------------------------------------------
 1 | #define __CLC_BODY <clc/math/unary_decl.inc>
 2 | #define __CLC_FUNCTION half_exp
 3 | #define __FLOAT_ONLY
 4 | 
 5 | #include <clc/math/gentype.inc>
 6 | 
 7 | #undef __FLOAT_ONLY
 8 | #undef __CLC_BODY
 9 | #undef __CLC_FUNCTION
10 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/half_exp10.h:
--------------------------------------------------------------------------------
 1 | #define __CLC_BODY <clc/math/unary_decl.inc>
 2 | #define __CLC_FUNCTION half_exp10
 3 | #define __FLOAT_ONLY
 4 | 
 5 | #include <clc/math/gentype.inc>
 6 | 
 7 | #undef __FLOAT_ONLY
 8 | #undef __CLC_BODY
 9 | #undef __CLC_FUNCTION
10 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/half_exp2.h:
--------------------------------------------------------------------------------
 1 | #define __CLC_BODY <clc/math/unary_decl.inc>
 2 | #define __CLC_FUNCTION half_exp2
 3 | #define __FLOAT_ONLY
 4 | 
 5 | #include <clc/math/gentype.inc>
 6 | 
 7 | #undef __FLOAT_ONLY
 8 | #undef __CLC_BODY
 9 | #undef __CLC_FUNCTION
10 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/half_log.h:
--------------------------------------------------------------------------------
 1 | #define __CLC_BODY <clc/math/unary_decl.inc>
 2 | #define __CLC_FUNCTION half_log
 3 | #define __FLOAT_ONLY
 4 | 
 5 | #include <clc/math/gentype.inc>
 6 | 
 7 | #undef __FLOAT_ONLY
 8 | #undef __CLC_BODY
 9 | #undef __CLC_FUNCTION
10 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/half_log10.h:
--------------------------------------------------------------------------------
 1 | #define __CLC_BODY <clc/math/unary_decl.inc>
 2 | #define __CLC_FUNCTION half_log10
 3 | #define __FLOAT_ONLY
 4 | 
 5 | #include <clc/math/gentype.inc>
 6 | 
 7 | #undef __FLOAT_ONLY
 8 | #undef __CLC_BODY
 9 | #undef __CLC_FUNCTION
10 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/half_log2.h:
--------------------------------------------------------------------------------
 1 | #define __CLC_BODY <clc/math/unary_decl.inc>
 2 | #define __CLC_FUNCTION half_log2
 3 | #define __FLOAT_ONLY
 4 | 
 5 | #include <clc/math/gentype.inc>
 6 | 
 7 | #undef __FLOAT_ONLY
 8 | #undef __CLC_BODY
 9 | #undef __CLC_FUNCTION
10 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/half_powr.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/math/binary_decl_tt.inc>
2 | #define __CLC_FUNCTION half_powr
3 | 
4 | #include <clc/math/gentype.inc>
5 | 
6 | #undef __CLC_BODY
7 | #undef __CLC_FUNCTION
8 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/half_recip.h:
--------------------------------------------------------------------------------
 1 | #define __CLC_BODY <clc/math/unary_decl.inc>
 2 | #define __CLC_FUNCTION half_recip
 3 | #define __FLOAT_ONLY
 4 | 
 5 | #include <clc/math/gentype.inc>
 6 | 
 7 | #undef __FLOAT_ONLY
 8 | #undef __CLC_BODY
 9 | #undef __CLC_FUNCTION
10 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/half_sin.h:
--------------------------------------------------------------------------------
 1 | #define __CLC_BODY <clc/math/unary_decl.inc>
 2 | #define __CLC_FUNCTION half_sin
 3 | #define __FLOAT_ONLY
 4 | 
 5 | #include <clc/math/gentype.inc>
 6 | 
 7 | #undef __FLOAT_ONLY
 8 | #undef __CLC_BODY
 9 | #undef __CLC_FUNCTION
10 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/half_tan.h:
--------------------------------------------------------------------------------
 1 | #define __CLC_BODY <clc/math/unary_decl.inc>
 2 | #define __CLC_FUNCTION half_tan
 3 | #define __FLOAT_ONLY
 4 | 
 5 | #include <clc/math/gentype.inc>
 6 | 
 7 | #undef __FLOAT_ONLY
 8 | #undef __CLC_BODY
 9 | #undef __CLC_FUNCTION
10 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/hypot.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION hypot
2 | #define __CLC_BODY <clc/math/binary_decl_tt.inc>
3 | 
4 | #include <clc/math/gentype.inc>
5 | 
6 | #undef __CLC_BODY
7 | #undef __CLC_FUNCTION
8 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/ilogb.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/math/ilogb.inc>
2 | 
3 | #include <clc/math/gentype.inc>
4 | 
5 | #undef __CLC_BODY
6 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/ilogb.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL __CLC_INTN ilogb(__CLC_GENTYPE x);
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/lgamma.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/math/unary_decl.inc>
2 | #define __CLC_FUNCTION lgamma
3 | 
4 | #include <clc/math/gentype.inc>
5 | 
6 | #undef __CLC_BODY
7 | #undef __CLC_FUNCTION
8 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/lgamma_r.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/math/lgamma_r.inc>
2 | #include <clc/math/gentype.inc>
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/lgamma_r.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE lgamma_r(__CLC_GENTYPE x, global __CLC_INTN *iptr);
2 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE lgamma_r(__CLC_GENTYPE x, local __CLC_INTN *iptr);
3 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE lgamma_r(__CLC_GENTYPE x, private __CLC_INTN *iptr);
4 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/log10.h:
--------------------------------------------------------------------------------
 1 | #undef log10
 2 | 
 3 | #define __CLC_BODY <clc/math/unary_decl.inc>
 4 | #define __CLC_FUNCTION log10
 5 | 
 6 | #include <clc/math/gentype.inc>
 7 | 
 8 | #undef __CLC_BODY
 9 | #undef __CLC_FUNCTION
10 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/logb.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/math/unary_decl.inc>
2 | #define __CLC_FUNCTION logb
3 | 
4 | #include <clc/math/gentype.inc>
5 | 
6 | #undef __CLC_BODY
7 | #undef __CLC_FUNCTION
8 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/mad.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/math/ternary_decl.inc>
2 | #define __CLC_FUNCTION mad
3 | 
4 | #include <clc/math/gentype.inc>
5 | 
6 | #undef __CLC_BODY
7 | #undef __CLC_FUNCTION
8 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/maxmag.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/math/binary_decl_tt.inc>
2 | #define __CLC_FUNCTION maxmag
3 | 
4 | #include <clc/math/gentype.inc>
5 | 
6 | #undef __CLC_BODY
7 | #undef __CLC_FUNCTION
8 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/minmag.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/math/binary_decl_tt.inc>
2 | #define __CLC_FUNCTION minmag
3 | 
4 | #include <clc/math/gentype.inc>
5 | 
6 | #undef __CLC_BODY
7 | #undef __CLC_FUNCTION
8 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/nan.h:
--------------------------------------------------------------------------------
1 | #define __CLC_CONCAT(x, y) x ## y
2 | #define __CLC_XCONCAT(x, y) __CLC_CONCAT(x, y)
3 | 
4 | #define __CLC_BODY <clc/math/nan.inc>
5 | #include <clc/math/gentype.inc>
6 | 
7 | #undef __CLC_XCONCAT
8 | #undef __CLC_CONCAT
9 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/nan.inc:
--------------------------------------------------------------------------------
 1 | #ifdef __CLC_SCALAR
 2 | #define __CLC_VECSIZE
 3 | #endif
 4 | 
 5 | #if __CLC_FPSIZE == 64
 6 | #define __CLC_NATN __CLC_XCONCAT(ulong, __CLC_VECSIZE)
 7 | #elif __CLC_FPSIZE == 32
 8 | #define __CLC_NATN __CLC_XCONCAT(uint, __CLC_VECSIZE)
 9 | #elif __CLC_FPSIZE == 16
10 | #define __CLC_NATN __CLC_XCONCAT(ushort, __CLC_VECSIZE)
11 | #endif
12 | 
13 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE nan(__CLC_NATN code);
14 | 
15 | #undef __CLC_NATN
16 | #ifdef __CLC_SCALAR
17 | #undef __CLC_VECSIZE
18 | #endif
19 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/native_cos.h:
--------------------------------------------------------------------------------
 1 | #define __CLC_BODY <clc/math/unary_decl.inc>
 2 | #define __CLC_FUNCTION native_cos
 3 | #define __FLOAT_ONLY
 4 | 
 5 | #include <clc/math/gentype.inc>
 6 | 
 7 | #undef __FLOAT_ONLY
 8 | #undef __CLC_BODY
 9 | #undef __CLC_FUNCTION
10 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/native_divide.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/math/binary_decl_tt.inc>
2 | #define __CLC_FUNCTION native_divide
3 | 
4 | #include <clc/math/gentype.inc>
5 | 
6 | #undef __CLC_BODY
7 | #undef __CLC_FUNCTION
8 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/native_exp.h:
--------------------------------------------------------------------------------
 1 | #define __CLC_BODY <clc/math/unary_decl.inc>
 2 | #define __CLC_FUNCTION native_exp
 3 | #define __FLOAT_ONLY
 4 | 
 5 | #include <clc/math/gentype.inc>
 6 | 
 7 | #undef __FLOAT_ONLY
 8 | #undef __CLC_BODY
 9 | #undef __CLC_FUNCTION
10 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/native_exp10.h:
--------------------------------------------------------------------------------
 1 | #define __CLC_BODY <clc/math/unary_decl.inc>
 2 | #define __CLC_FUNCTION native_exp10
 3 | #define __FLOAT_ONLY
 4 | 
 5 | #include <clc/math/gentype.inc>
 6 | 
 7 | #undef __FLOAT_ONLY
 8 | #undef __CLC_BODY
 9 | #undef __CLC_FUNCTION
10 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/native_exp2.h:
--------------------------------------------------------------------------------
 1 | #define __CLC_BODY <clc/math/unary_decl.inc>
 2 | #define __CLC_FUNCTION native_exp2
 3 | #define __FLOAT_ONLY
 4 | 
 5 | #include <clc/math/gentype.inc>
 6 | 
 7 | #undef __FLOAT_ONLY
 8 | #undef __CLC_BODY
 9 | #undef __CLC_FUNCTION
10 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/native_log10.h:
--------------------------------------------------------------------------------
 1 | #define __CLC_BODY <clc/math/unary_decl.inc>
 2 | #define __CLC_FUNCTION native_log10
 3 | #define __FLOAT_ONLY
 4 | 
 5 | #include <clc/math/gentype.inc>
 6 | 
 7 | #undef __FLOAT_ONLY
 8 | #undef __CLC_BODY
 9 | #undef __CLC_FUNCTION
10 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/native_powr.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/math/binary_decl_tt.inc>
2 | #define __CLC_FUNCTION native_powr
3 | 
4 | #include <clc/math/gentype.inc>
5 | 
6 | #undef __CLC_BODY
7 | #undef __CLC_FUNCTION
8 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/native_recip.h:
--------------------------------------------------------------------------------
 1 | #define __CLC_BODY <clc/math/unary_decl.inc>
 2 | #define __CLC_FUNCTION native_recip
 3 | #define __FLOAT_ONLY
 4 | 
 5 | #include <clc/math/gentype.inc>
 6 | 
 7 | #undef __FLOAT_ONLY
 8 | #undef __CLC_BODY
 9 | #undef __CLC_FUNCTION
10 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/native_rsqrt.h:
--------------------------------------------------------------------------------
 1 | #define __CLC_BODY <clc/math/unary_decl.inc>
 2 | #define __CLC_FUNCTION native_rsqrt
 3 | #define __FLOAT_ONLY
 4 | 
 5 | #include <clc/math/gentype.inc>
 6 | 
 7 | #undef __FLOAT_ONLY
 8 | #undef __CLC_BODY
 9 | #undef __CLC_FUNCTION
10 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/native_sin.h:
--------------------------------------------------------------------------------
 1 | #define __CLC_BODY <clc/math/unary_decl.inc>
 2 | #define __CLC_FUNCTION native_sin
 3 | #define __FLOAT_ONLY
 4 | 
 5 | #include <clc/math/gentype.inc>
 6 | 
 7 | #undef __FLOAT_ONLY
 8 | #undef __CLC_BODY
 9 | #undef __CLC_FUNCTION
10 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/native_sqrt.h:
--------------------------------------------------------------------------------
 1 | #define __CLC_BODY <clc/math/unary_decl.inc>
 2 | #define __CLC_FUNCTION native_sqrt
 3 | #define __FLOAT_ONLY
 4 | 
 5 | #include <clc/math/gentype.inc>
 6 | 
 7 | #undef __FLOAT_ONLY
 8 | #undef __CLC_BODY
 9 | #undef __CLC_FUNCTION
10 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/native_tan.h:
--------------------------------------------------------------------------------
 1 | #define __CLC_BODY <clc/math/unary_decl.inc>
 2 | #define __CLC_FUNCTION native_tan
 3 | #define __FLOAT_ONLY
 4 | 
 5 | #include <clc/math/gentype.inc>
 6 | 
 7 | #undef __FLOAT_ONLY
 8 | #undef __CLC_BODY
 9 | #undef __CLC_FUNCTION
10 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/nextafter.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION nextafter
2 | #define __CLC_BODY <clc/math/binary_decl_tt.inc>
3 | 
4 | #include <clc/math/gentype.inc>
5 | 
6 | #undef __CLC_BODY
7 | #undef __CLC_FUNCTION
8 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/pow.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION pow
2 | #define __CLC_BODY <clc/math/binary_decl_tt.inc>
3 | #include <clc/math/gentype.inc>
4 | #undef __CLC_BODY
5 | #undef __CLC_FUNCTION
6 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/pown.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/math/pown.inc>
2 | #include <clc/math/gentype.inc>
3 | #undef __CLC_BODY
4 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/pown.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE pown(__CLC_GENTYPE a, __CLC_INTN b);
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/powr.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION powr
2 | #define __CLC_BODY <clc/math/binary_decl_tt.inc>
3 | #include <clc/math/gentype.inc>
4 | #undef __CLC_BODY
5 | #undef __CLC_FUNCTION
6 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/remainder.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION remainder
2 | #define __CLC_BODY <clc/math/binary_decl_tt.inc>
3 | #include <clc/math/gentype.inc>
4 | #undef __CLC_FUNCTION
5 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/remquo.h:
--------------------------------------------------------------------------------
 1 | #define __CLC_FUNCTION remquo
 2 | 
 3 | #define __CLC_BODY <clc/math/remquo.inc>
 4 | #define __CLC_ADDRESS_SPACE global
 5 | #include <clc/math/gentype.inc>
 6 | #undef __CLC_ADDRESS_SPACE
 7 | 
 8 | #define __CLC_BODY <clc/math/remquo.inc>
 9 | #define __CLC_ADDRESS_SPACE local
10 | #include <clc/math/gentype.inc>
11 | #undef __CLC_ADDRESS_SPACE
12 | 
13 | #define __CLC_BODY <clc/math/remquo.inc>
14 | #define __CLC_ADDRESS_SPACE private
15 | #include <clc/math/gentype.inc>
16 | #undef __CLC_ADDRESS_SPACE
17 | 
18 | #undef __CLC_FUNCTION
19 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/remquo.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_ADDRESS_SPACE __CLC_INTN *q);
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/rint.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/math/unary_decl.inc>
2 | #define __CLC_FUNCTION rint
3 | 
4 | #include <clc/math/gentype.inc>
5 | 
6 | #undef __CLC_BODY
7 | #undef __CLC_FUNCTION
8 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/rootn.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/math/rootn.inc>
2 | #include <clc/math/gentype.inc>
3 | #undef __CLC_BODY
4 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/rootn.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE rootn(__CLC_GENTYPE a, __CLC_INTN b);
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/round.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/math/unary_decl.inc>
2 | #define __CLC_FUNCTION round
3 | 
4 | #include <clc/math/gentype.inc>
5 | 
6 | #undef __CLC_BODY
7 | #undef __CLC_FUNCTION
8 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/rsqrt.h:
--------------------------------------------------------------------------------
1 | #define rsqrt(x) (1.f/sqrt(x))
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/sin.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/math/unary_decl.inc>
2 | #define __CLC_FUNCTION sin
3 | 
4 | #include <clc/math/gentype.inc>
5 | 
6 | #undef __CLC_BODY
7 | #undef __CLC_FUNCTION
8 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/sincos.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/math/sincos.inc>
2 | #include <clc/math/gentype.inc>
3 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/sincos.inc:
--------------------------------------------------------------------------------
1 |  _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sincos (__CLC_GENTYPE x, global __CLC_GENTYPE * cosval);
2 |  _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sincos (__CLC_GENTYPE x, local __CLC_GENTYPE * cosval);
3 |  _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sincos (__CLC_GENTYPE x, private __CLC_GENTYPE * cosval);
4 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/sinpi.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/math/unary_decl.inc>
2 | #define __CLC_FUNCTION sinpi
3 | 
4 | #include <clc/math/gentype.inc>
5 | 
6 | #undef __CLC_BODY
7 | #undef __CLC_FUNCTION
8 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/sqrt.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/math/unary_decl.inc>
2 | #define __CLC_FUNCTION sqrt
3 | 
4 | #include <clc/math/gentype.inc>
5 | 
6 | #undef __CLC_BODY
7 | #undef __CLC_FUNCTION
8 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/tan.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/math/unary_decl.inc>
2 | #define __CLC_FUNCTION tan
3 | 
4 | #include <clc/math/gentype.inc>
5 | 
6 | #undef __CLC_BODY
7 | #undef __CLC_FUNCTION
8 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/tanpi.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/math/unary_decl.inc>
2 | #define __CLC_FUNCTION tanpi
3 | 
4 | #include <clc/math/gentype.inc>
5 | 
6 | #undef __CLC_BODY
7 | #undef __CLC_FUNCTION
8 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/ternary_decl.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE a, __CLC_GENTYPE b, __CLC_GENTYPE c);
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/tgamma.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/math/unary_decl.inc>
2 | #define __CLC_FUNCTION tgamma
3 | 
4 | #include <clc/math/gentype.inc>
5 | 
6 | #undef __CLC_BODY
7 | #undef __CLC_FUNCTION
8 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/trunc.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/math/unary_decl.inc>
2 | #define __CLC_FUNCTION trunc
3 | 
4 | #include <clc/math/gentype.inc>
5 | 
6 | #undef __CLC_BODY
7 | #undef __CLC_FUNCTION
8 | 


--------------------------------------------------------------------------------
/generic/include/clc/math/unary_decl.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x);
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/relational/all.h:
--------------------------------------------------------------------------------
 1 | #define _CLC_ALL_DECL(TYPE) \
 2 |   _CLC_OVERLOAD _CLC_DECL int all(TYPE v);
 3 | 
 4 | #define _CLC_VECTOR_ALL_DECL(TYPE) \
 5 |   _CLC_ALL_DECL(TYPE)     \
 6 |   _CLC_ALL_DECL(TYPE##2)  \
 7 |   _CLC_ALL_DECL(TYPE##3)  \
 8 |   _CLC_ALL_DECL(TYPE##4)  \
 9 |   _CLC_ALL_DECL(TYPE##8)  \
10 |   _CLC_ALL_DECL(TYPE##16)
11 | 
12 | _CLC_VECTOR_ALL_DECL(char)
13 | _CLC_VECTOR_ALL_DECL(short)
14 | _CLC_VECTOR_ALL_DECL(int)
15 | _CLC_VECTOR_ALL_DECL(long)
16 | 
17 | #undef _CLC_ALL_DECL
18 | #undef _CLC_VECTOR_ALL_DECL
19 | 


--------------------------------------------------------------------------------
/generic/include/clc/relational/any.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #define _CLC_ANY_DECL(TYPE) \
 3 |   _CLC_OVERLOAD _CLC_DECL int any(TYPE v);
 4 | 
 5 | #define _CLC_VECTOR_ANY_DECL(TYPE) \
 6 |   _CLC_ANY_DECL(TYPE)     \
 7 |   _CLC_ANY_DECL(TYPE##2)  \
 8 |   _CLC_ANY_DECL(TYPE##3)  \
 9 |   _CLC_ANY_DECL(TYPE##4)  \
10 |   _CLC_ANY_DECL(TYPE##8)  \
11 |   _CLC_ANY_DECL(TYPE##16)
12 | 
13 | _CLC_VECTOR_ANY_DECL(char)
14 | _CLC_VECTOR_ANY_DECL(short)
15 | _CLC_VECTOR_ANY_DECL(int)
16 | _CLC_VECTOR_ANY_DECL(long)
17 | 


--------------------------------------------------------------------------------
/generic/include/clc/relational/binary_decl.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL __CLC_INTN __CLC_FUNCTION(__CLC_FLOATN a, __CLC_FLOATN b);
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/relational/isequal.h:
--------------------------------------------------------------------------------
 1 | #define _CLC_ISEQUAL_DECL(TYPE, RETTYPE) \
 2 |   _CLC_OVERLOAD _CLC_DECL RETTYPE isequal(TYPE x, TYPE y);
 3 | 
 4 | #define _CLC_VECTOR_ISEQUAL_DECL(TYPE, RETTYPE) \
 5 |   _CLC_ISEQUAL_DECL(TYPE##2, RETTYPE##2)  \
 6 |   _CLC_ISEQUAL_DECL(TYPE##3, RETTYPE##3)  \
 7 |   _CLC_ISEQUAL_DECL(TYPE##4, RETTYPE##4)  \
 8 |   _CLC_ISEQUAL_DECL(TYPE##8, RETTYPE##8)  \
 9 |   _CLC_ISEQUAL_DECL(TYPE##16, RETTYPE##16)
10 | 
11 | _CLC_ISEQUAL_DECL(float, int)
12 | _CLC_VECTOR_ISEQUAL_DECL(float, int)
13 | 
14 | #ifdef cl_khr_fp64
15 | _CLC_ISEQUAL_DECL(double, int)
16 | _CLC_VECTOR_ISEQUAL_DECL(double, long)
17 | #endif
18 | #ifdef cl_khr_fp16
19 | _CLC_ISEQUAL_DECL(half, int)
20 | _CLC_VECTOR_ISEQUAL_DECL(half, short)
21 | #endif
22 | 
23 | #undef _CLC_ISEQUAL_DECL
24 | #undef _CLC_VECTOR_ISEQUAL_DEC
25 | 


--------------------------------------------------------------------------------
/generic/include/clc/relational/isfinite.h:
--------------------------------------------------------------------------------
 1 | #undef isfinite
 2 | 
 3 | #define __CLC_FUNCTION isfinite
 4 | #define __CLC_BODY <clc/relational/unary_decl.inc>
 5 | 
 6 | #include <clc/relational/floatn.inc>
 7 | 
 8 | #undef __CLC_BODY
 9 | #undef __CLC_FUNCTION
10 | 


--------------------------------------------------------------------------------
/generic/include/clc/relational/isgreater.h:
--------------------------------------------------------------------------------
 1 | #undef isgreater
 2 | 
 3 | #define __CLC_FUNCTION isgreater
 4 | #define __CLC_BODY <clc/relational/binary_decl.inc>
 5 | 
 6 | #include <clc/relational/floatn.inc>
 7 | 
 8 | #undef __CLC_BODY
 9 | #undef __CLC_FUNCTION
10 | 


--------------------------------------------------------------------------------
/generic/include/clc/relational/isgreaterequal.h:
--------------------------------------------------------------------------------
 1 | #undef isgreaterequal
 2 | 
 3 | #define __CLC_FUNCTION isgreaterequal
 4 | #define __CLC_BODY <clc/relational/binary_decl.inc>
 5 | 
 6 | #include <clc/relational/floatn.inc>
 7 | 
 8 | #undef __CLC_BODY
 9 | #undef __CLC_FUNCTION
10 | 


--------------------------------------------------------------------------------
/generic/include/clc/relational/isinf.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #define _CLC_ISINF_DECL(RET_TYPE, ARG_TYPE) \
 3 |   _CLC_OVERLOAD _CLC_DECL RET_TYPE isinf(ARG_TYPE);
 4 | 
 5 | #define _CLC_VECTOR_ISINF_DECL(RET_TYPE, ARG_TYPE) \
 6 |   _CLC_ISINF_DECL(RET_TYPE##2, ARG_TYPE##2) \
 7 |   _CLC_ISINF_DECL(RET_TYPE##3, ARG_TYPE##3) \
 8 |   _CLC_ISINF_DECL(RET_TYPE##4, ARG_TYPE##4) \
 9 |   _CLC_ISINF_DECL(RET_TYPE##8, ARG_TYPE##8) \
10 |   _CLC_ISINF_DECL(RET_TYPE##16, ARG_TYPE##16)
11 | 
12 | _CLC_ISINF_DECL(int, float)
13 | _CLC_VECTOR_ISINF_DECL(int, float)
14 | 
15 | #ifdef cl_khr_fp64
16 | _CLC_ISINF_DECL(int, double)
17 | _CLC_VECTOR_ISINF_DECL(long, double)
18 | #endif
19 | 
20 | #ifdef cl_khr_fp16
21 | _CLC_ISINF_DECL(int, half)
22 | _CLC_VECTOR_ISINF_DECL(short, half)
23 | #endif
24 | 
25 | #undef _CLC_ISINF_DECL
26 | #undef _CLC_VECTOR_ISINF_DECL
27 | 


--------------------------------------------------------------------------------
/generic/include/clc/relational/isless.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION isless
2 | #define __CLC_BODY <clc/relational/binary_decl.inc>
3 | 
4 | #include <clc/relational/floatn.inc>
5 | 
6 | #undef __CLC_BODY
7 | #undef __CLC_FUNCTION
8 | 


--------------------------------------------------------------------------------
/generic/include/clc/relational/islessequal.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION islessequal
2 | #define __CLC_BODY <clc/relational/binary_decl.inc>
3 | 
4 | #include <clc/relational/floatn.inc>
5 | 
6 | #undef __CLC_BODY
7 | #undef __CLC_FUNCTION
8 | 


--------------------------------------------------------------------------------
/generic/include/clc/relational/islessgreater.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION islessgreater
2 | #define __CLC_BODY <clc/relational/binary_decl.inc>
3 | 
4 | #include <clc/relational/floatn.inc>
5 | 
6 | #undef __CLC_BODY
7 | #undef __CLC_FUNCTION
8 | 


--------------------------------------------------------------------------------
/generic/include/clc/relational/isnan.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #define _CLC_ISNAN_DECL(RET_TYPE, ARG_TYPE) \
 3 |   _CLC_OVERLOAD _CLC_DECL RET_TYPE isnan(ARG_TYPE);
 4 | 
 5 | #define _CLC_VECTOR_ISNAN_DECL(RET_TYPE, ARG_TYPE) \
 6 |   _CLC_ISNAN_DECL(RET_TYPE##2, ARG_TYPE##2) \
 7 |   _CLC_ISNAN_DECL(RET_TYPE##3, ARG_TYPE##3) \
 8 |   _CLC_ISNAN_DECL(RET_TYPE##4, ARG_TYPE##4) \
 9 |   _CLC_ISNAN_DECL(RET_TYPE##8, ARG_TYPE##8) \
10 |   _CLC_ISNAN_DECL(RET_TYPE##16, ARG_TYPE##16)
11 | 
12 | _CLC_ISNAN_DECL(int, float)
13 | _CLC_VECTOR_ISNAN_DECL(int, float)
14 | 
15 | #ifdef cl_khr_fp64
16 | _CLC_ISNAN_DECL(int, double)
17 | _CLC_VECTOR_ISNAN_DECL(long, double)
18 | #endif
19 | 
20 | #ifdef cl_khr_fp16
21 | _CLC_ISNAN_DECL(int, half)
22 | _CLC_VECTOR_ISNAN_DECL(short, half)
23 | #endif
24 | 
25 | #undef _CLC_ISNAN_DECL
26 | #undef _CLC_VECTOR_ISNAN_DECL
27 | 


--------------------------------------------------------------------------------
/generic/include/clc/relational/isnormal.h:
--------------------------------------------------------------------------------
 1 | #undef isnormal
 2 | 
 3 | #define __CLC_FUNCTION isnormal
 4 | #define __CLC_BODY <clc/relational/unary_decl.inc>
 5 | 
 6 | #include <clc/relational/floatn.inc>
 7 | 
 8 | #undef __CLC_BODY
 9 | #undef __CLC_FUNCTION
10 | 


--------------------------------------------------------------------------------
/generic/include/clc/relational/isnotequal.h:
--------------------------------------------------------------------------------
 1 | #undef isnotequal
 2 | 
 3 | #define __CLC_FUNCTION isnotequal
 4 | #define __CLC_BODY <clc/relational/binary_decl.inc>
 5 | 
 6 | #include <clc/relational/floatn.inc>
 7 | 
 8 | #undef __CLC_BODY
 9 | #undef __CLC_FUNCTION
10 | 


--------------------------------------------------------------------------------
/generic/include/clc/relational/isordered.h:
--------------------------------------------------------------------------------
 1 | #undef isordered
 2 | 
 3 | #define __CLC_FUNCTION isordered
 4 | #define __CLC_BODY <clc/relational/binary_decl.inc>
 5 | 
 6 | #include <clc/relational/floatn.inc>
 7 | 
 8 | #undef __CLC_BODY
 9 | #undef __CLC_FUNCTION
10 | 


--------------------------------------------------------------------------------
/generic/include/clc/relational/isunordered.h:
--------------------------------------------------------------------------------
 1 | #undef isunordered
 2 | 
 3 | #define __CLC_FUNCTION isunordered
 4 | #define __CLC_BODY <clc/relational/binary_decl.inc>
 5 | 
 6 | #include <clc/relational/floatn.inc>
 7 | 
 8 | #undef __CLC_BODY
 9 | #undef __CLC_FUNCTION
10 | 


--------------------------------------------------------------------------------
/generic/include/clc/relational/select.h:
--------------------------------------------------------------------------------
 1 | /* Duplciate these so we don't have to distribute utils.h */
 2 | #define __CLC_CONCAT(x, y) x ## y
 3 | #define __CLC_XCONCAT(x, y) __CLC_CONCAT(x, y)
 4 | 
 5 | #define __CLC_BODY <clc/relational/select.inc>
 6 | #include <clc/math/gentype.inc>
 7 | #define __CLC_BODY <clc/relational/select.inc>
 8 | #include <clc/integer/gentype.inc>
 9 | 
10 | #undef __CLC_CONCAT
11 | #undef __CLC_XCONCAT
12 | 


--------------------------------------------------------------------------------
/generic/include/clc/relational/select.inc:
--------------------------------------------------------------------------------
 1 | #ifdef __CLC_SCALAR
 2 | #define __CLC_VECSIZE
 3 | #endif
 4 | 
 5 | #if __CLC_FPSIZE == 64
 6 | #define __CLC_S_GENTYPE __CLC_XCONCAT(long, __CLC_VECSIZE)
 7 | #define __CLC_U_GENTYPE __CLC_XCONCAT(ulong, __CLC_VECSIZE)
 8 | #elif __CLC_FPSIZE == 32
 9 | #define __CLC_S_GENTYPE __CLC_XCONCAT(int, __CLC_VECSIZE)
10 | #define __CLC_U_GENTYPE __CLC_XCONCAT(uint, __CLC_VECSIZE)
11 | #elif __CLC_FPSIZE == 16
12 | #define __CLC_S_GENTYPE __CLC_XCONCAT(short, __CLC_VECSIZE)
13 | #define __CLC_U_GENTYPE __CLC_XCONCAT(ushort, __CLC_VECSIZE)
14 | #endif
15 | 
16 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE select(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_S_GENTYPE z);
17 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE select(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_U_GENTYPE z);
18 | 
19 | #ifdef __CLC_FPSIZE
20 | #undef __CLC_S_GENTYPE
21 | #undef __CLC_U_GENTYPE
22 | #endif
23 | #ifdef __CLC_SCALAR
24 | #undef __CLC_VECSIZE
25 | #endif
26 | 


--------------------------------------------------------------------------------
/generic/include/clc/relational/signbit.h:
--------------------------------------------------------------------------------
 1 | #undef signbit
 2 | 
 3 | #define __CLC_FUNCTION signbit
 4 | #define __CLC_BODY <clc/relational/unary_decl.inc>
 5 | 
 6 | #include <clc/relational/floatn.inc>
 7 | 
 8 | #undef __CLC_BODY
 9 | #undef __CLC_FUNCTION
10 | 


--------------------------------------------------------------------------------
/generic/include/clc/relational/unary_decl.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL __CLC_INTN __CLC_FUNCTION(__CLC_FLOATN x);
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/shared/clamp.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/shared/clamp.inc>
2 | #include <clc/integer/gentype.inc>
3 | 
4 | #define __CLC_BODY <clc/shared/clamp.inc>
5 | #include <clc/math/gentype.inc>
6 | 


--------------------------------------------------------------------------------
/generic/include/clc/shared/clamp.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE clamp(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z);
2 | 
3 | #ifndef __CLC_SCALAR
4 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE clamp(__CLC_GENTYPE x, __CLC_SCALAR_GENTYPE y, __CLC_SCALAR_GENTYPE z);
5 | #endif
6 | 


--------------------------------------------------------------------------------
/generic/include/clc/shared/max.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/shared/max.inc>
2 | #include <clc/integer/gentype.inc>
3 | 
4 | #define __CLC_BODY <clc/shared/max.inc>
5 | #include <clc/math/gentype.inc>
6 | 


--------------------------------------------------------------------------------
/generic/include/clc/shared/max.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE max(__CLC_GENTYPE a, __CLC_GENTYPE b);
2 | 
3 | #ifndef __CLC_SCALAR
4 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE max(__CLC_GENTYPE a, __CLC_SCALAR_GENTYPE b);
5 | #endif
6 | 


--------------------------------------------------------------------------------
/generic/include/clc/shared/min.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/shared/min.inc>
2 | #include <clc/integer/gentype.inc>
3 | 
4 | #define __CLC_BODY <clc/shared/min.inc>
5 | #include <clc/math/gentype.inc>
6 | 


--------------------------------------------------------------------------------
/generic/include/clc/shared/min.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE min(__CLC_GENTYPE a, __CLC_GENTYPE b);
2 | 
3 | #ifndef __CLC_SCALAR
4 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE min(__CLC_GENTYPE a, __CLC_SCALAR_GENTYPE b);
5 | #endif
6 | 


--------------------------------------------------------------------------------
/generic/include/clc/synchronization/barrier.h:
--------------------------------------------------------------------------------
1 | _CLC_DECL void barrier(cl_mem_fence_flags flags);
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/synchronization/cl_mem_fence_flags.h:
--------------------------------------------------------------------------------
1 | typedef uint cl_mem_fence_flags;
2 | 
3 | #define CLK_LOCAL_MEM_FENCE 1
4 | #define CLK_GLOBAL_MEM_FENCE 2
5 | 


--------------------------------------------------------------------------------
/generic/include/clc/workitem/get_global_id.h:
--------------------------------------------------------------------------------
1 | _CLC_DECL size_t get_global_id(uint dim);
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/workitem/get_global_offset.h:
--------------------------------------------------------------------------------
1 | _CLC_DECL size_t get_global_offset(uint dim);
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/workitem/get_global_size.h:
--------------------------------------------------------------------------------
1 | _CLC_DECL size_t get_global_size(uint dim);
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/workitem/get_group_id.h:
--------------------------------------------------------------------------------
1 | _CLC_DECL size_t get_group_id(uint dim);
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/workitem/get_local_id.h:
--------------------------------------------------------------------------------
1 | _CLC_DECL size_t get_local_id(uint dim);
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/workitem/get_local_size.h:
--------------------------------------------------------------------------------
1 | _CLC_DECL size_t get_local_size(uint dim);
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/workitem/get_num_groups.h:
--------------------------------------------------------------------------------
1 | _CLC_DECL size_t get_num_groups(uint dim);
2 | 


--------------------------------------------------------------------------------
/generic/include/clc/workitem/get_work_dim.h:
--------------------------------------------------------------------------------
1 | _CLC_DECL uint get_work_dim(void);
2 | 


--------------------------------------------------------------------------------
/generic/include/integer/popcount.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION __clc_native_popcount
2 | #define __CLC_INTRINSIC "llvm.ctpop"
3 | #include <integer/unary_intrin.inc>
4 | 


--------------------------------------------------------------------------------
/generic/include/integer/unary_intrin.inc:
--------------------------------------------------------------------------------
 1 | #define __CLC_INTRINSIC_DEF(SCALAR_TYPE, BIT_SIZE) \
 2 | _CLC_OVERLOAD SCALAR_TYPE __CLC_FUNCTION(SCALAR_TYPE x) __asm(__CLC_INTRINSIC ".i" BIT_SIZE); \
 3 | _CLC_OVERLOAD SCALAR_TYPE##2 __CLC_FUNCTION(SCALAR_TYPE##2 x) __asm(__CLC_INTRINSIC ".v2i" BIT_SIZE); \
 4 | _CLC_OVERLOAD SCALAR_TYPE##3 __CLC_FUNCTION(SCALAR_TYPE##3 x) __asm(__CLC_INTRINSIC ".v3i" BIT_SIZE); \
 5 | _CLC_OVERLOAD SCALAR_TYPE##4 __CLC_FUNCTION(SCALAR_TYPE##4 x) __asm(__CLC_INTRINSIC ".v4i" BIT_SIZE); \
 6 | _CLC_OVERLOAD SCALAR_TYPE##8 __CLC_FUNCTION(SCALAR_TYPE##8 x) __asm(__CLC_INTRINSIC ".v8i" BIT_SIZE); \
 7 | _CLC_OVERLOAD SCALAR_TYPE##16 __CLC_FUNCTION(SCALAR_TYPE##16 x) __asm(__CLC_INTRINSIC ".v16i" BIT_SIZE);
 8 | 
 9 | __CLC_INTRINSIC_DEF(char, "8")
10 | __CLC_INTRINSIC_DEF(uchar, "8")
11 | __CLC_INTRINSIC_DEF(short, "16")
12 | __CLC_INTRINSIC_DEF(ushort, "16")
13 | __CLC_INTRINSIC_DEF(int, "32")
14 | __CLC_INTRINSIC_DEF(uint, "32")
15 | __CLC_INTRINSIC_DEF(long, "64")
16 | __CLC_INTRINSIC_DEF(ulong, "64")
17 | 
18 | #undef __CLC_FUNCTION
19 | #undef __CLC_INTRINSIC
20 | #undef __CLC_INTRINSIC_DEF
21 | 


--------------------------------------------------------------------------------
/generic/include/math/clc_exp10.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION __clc_exp10
2 | #define __CLC_BODY <clc/math/unary_decl.inc>
3 | #include <clc/math/gentype.inc>
4 | #undef __CLC_FUNCTION
5 | 


--------------------------------------------------------------------------------
/generic/include/math/clc_fma.h:
--------------------------------------------------------------------------------
 1 | #define __CLC_FUNCTION __clc_fma
 2 | #define __CLC_INTRINSIC "llvm.fma"
 3 | #include "math/ternary_intrin.inc"
 4 | 
 5 | #define __FLOAT_ONLY
 6 | #define __CLC_FUNCTION __clc_sw_fma
 7 | #define __CLC_BODY <clc/math/ternary_decl.inc>
 8 | #include <clc/math/gentype.inc>
 9 | #undef __CLC_BODY
10 | #undef __CLC_FUNCTION
11 | #undef __FLOAT_ONLY
12 | 


--------------------------------------------------------------------------------
/generic/include/math/clc_fmod.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION __clc_fmod
2 | #define __CLC_BODY <clc/math/binary_decl_tt.inc>
3 | #include <clc/math/gentype.inc>
4 | #undef __CLC_FUNCTION
5 | 


--------------------------------------------------------------------------------
/generic/include/math/clc_hypot.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION __clc_hypot
2 | #define __CLC_BODY <clc/math/binary_decl_tt.inc>
3 | #include <clc/math/gentype.inc>
4 | #undef __CLC_BODY
5 | #undef __CLC_FUNCTION
6 | 


--------------------------------------------------------------------------------
/generic/include/math/clc_ldexp.h:
--------------------------------------------------------------------------------
 1 | _CLC_DEF _CLC_OVERLOAD float __clc_ldexp(float, int);
 2 | 
 3 | #ifdef cl_khr_fp64
 4 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 5 | _CLC_DEF _CLC_OVERLOAD double __clc_ldexp(double, int);
 6 | #endif
 7 | 
 8 | #ifdef cl_khr_fp16
 9 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable
10 | _CLC_DEF _CLC_OVERLOAD float __clc_ldexp(half, int);
11 | #endif
12 | 


--------------------------------------------------------------------------------
/generic/include/math/clc_nextafter.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <clc/math/binary_decl.inc>
2 | #define __CLC_FUNCTION __clc_nextafter
3 | 
4 | #include <clc/math/gentype.inc>
5 | 
6 | #undef __CLC_BODY
7 | #undef __CLC_FUNCTION
8 | 


--------------------------------------------------------------------------------
/generic/include/math/clc_pow.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION __clc_pow
2 | #define __CLC_BODY <clc/math/binary_decl_tt.inc>
3 | #include <clc/math/gentype.inc>
4 | #undef __CLC_BODY
5 | #undef __CLC_FUNCTION
6 | 


--------------------------------------------------------------------------------
/generic/include/math/clc_pown.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <math/clc_pown.inc>
2 | #include <clc/math/gentype.inc>
3 | #undef __CLC_BODY
4 | 


--------------------------------------------------------------------------------
/generic/include/math/clc_pown.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_pown(__CLC_GENTYPE a, __CLC_INTN b);
2 | 


--------------------------------------------------------------------------------
/generic/include/math/clc_powr.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION __clc_powr
2 | #define __CLC_BODY <clc/math/binary_decl_tt.inc>
3 | #include <clc/math/gentype.inc>
4 | #undef __CLC_BODY
5 | #undef __CLC_FUNCTION
6 | 


--------------------------------------------------------------------------------
/generic/include/math/clc_remainder.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION __clc_remainder
2 | #define __CLC_BODY <clc/math/binary_decl_tt.inc>
3 | #include <clc/math/gentype.inc>
4 | #undef __CLC_FUNCTION
5 | 


--------------------------------------------------------------------------------
/generic/include/math/clc_remquo.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION __clc_remquo
2 | 
3 | #define __CLC_BODY <clc/math/remquo.inc>
4 | #define __CLC_ADDRESS_SPACE private
5 | #include <clc/math/gentype.inc>
6 | #undef __CLC_ADDRESS_SPACE
7 | 
8 | #undef __CLC_FUNCTION
9 | 


--------------------------------------------------------------------------------
/generic/include/math/clc_rootn.h:
--------------------------------------------------------------------------------
1 | #define __CLC_BODY <math/clc_rootn.inc>
2 | #include <clc/math/gentype.inc>
3 | #undef __CLC_BODY
4 | 


--------------------------------------------------------------------------------
/generic/include/math/clc_rootn.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_rootn(__CLC_GENTYPE a, __CLC_INTN b);
2 | 


--------------------------------------------------------------------------------
/generic/include/math/clc_sqrt.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION __clc_sqrt
2 | #define __CLC_BODY <clc/math/unary_decl.inc>
3 | #include <clc/math/gentype.inc>
4 | #undef __CLC_BODY
5 | #undef __CLC_FUNCTION
6 | 


--------------------------------------------------------------------------------
/generic/include/math/clc_tan.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION __clc_tan
2 | #define __CLC_BODY <clc/math/unary_decl.inc>
3 | #include <clc/math/gentype.inc>
4 | #undef __CLC_BODY
5 | #undef __CLC_FUNCTION
6 | 


--------------------------------------------------------------------------------
/generic/include/math/clc_tanpi.h:
--------------------------------------------------------------------------------
1 | #define __CLC_FUNCTION __clc_tanpi
2 | #define __CLC_BODY <clc/math/unary_decl.inc>
3 | #include <clc/math/gentype.inc>
4 | #undef __CLC_BODY
5 | #undef __CLC_FUNCTION
6 | 


--------------------------------------------------------------------------------
/generic/include/utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CLC_UTILS_H_
 2 | #define __CLC_UTILS_H_
 3 | 
 4 | #define __CLC_CONCAT(x, y) x ## y
 5 | #define __CLC_XCONCAT(x, y) __CLC_CONCAT(x, y)
 6 | 
 7 | #define __CLC_STR(x) #x
 8 | #define __CLC_XSTR(x) __CLC_STR(x)
 9 | 
10 | #endif
11 | 


--------------------------------------------------------------------------------
/generic/lib/async/async_work_group_copy.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_BODY <async_work_group_copy.inc>
4 | #include <clc/async/gentype.inc>
5 | 


--------------------------------------------------------------------------------
/generic/lib/async/async_work_group_copy.inc:
--------------------------------------------------------------------------------
 1 | _CLC_OVERLOAD _CLC_DEF event_t async_work_group_copy(
 2 |     local __CLC_GENTYPE *dst,
 3 |     const global __CLC_GENTYPE *src,
 4 |     size_t num_gentypes,
 5 |     event_t event) {
 6 | 
 7 |   return async_work_group_strided_copy(dst, src, num_gentypes, 1, event);
 8 | }
 9 | 
10 | _CLC_OVERLOAD _CLC_DEF event_t async_work_group_copy(
11 |     global __CLC_GENTYPE *dst,
12 |     const local __CLC_GENTYPE *src,
13 |     size_t num_gentypes,
14 |     event_t event) {
15 | 
16 |   return async_work_group_strided_copy(dst, src, num_gentypes, 1, event);
17 | }
18 | 


--------------------------------------------------------------------------------
/generic/lib/async/async_work_group_strided_copy.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_BODY <async_work_group_strided_copy.inc>
4 | #include <clc/async/gentype.inc>
5 | 


--------------------------------------------------------------------------------
/generic/lib/async/prefetch.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_BODY <prefetch.inc>
4 | #include <clc/async/gentype.inc>
5 | 


--------------------------------------------------------------------------------
/generic/lib/async/prefetch.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DEF void prefetch(const global __CLC_GENTYPE *p, size_t num_gentypes) { }
2 | 


--------------------------------------------------------------------------------
/generic/lib/async/wait_group_events.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | _CLC_DEF void wait_group_events(int num_events, event_t *event_list) {
4 |   barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
5 | }
6 | 


--------------------------------------------------------------------------------
/generic/lib/atom_int32_binary.inc:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | #include "utils.h"
 3 | 
 4 | #define __CLC_ATOM_IMPL(AS, TYPE) \
 5 | _CLC_OVERLOAD _CLC_DEF TYPE __CLC_XCONCAT(atom_, __CLC_ATOMIC_OP) (volatile AS TYPE *p, TYPE val) { \
 6 |   return __CLC_XCONCAT(atomic_, __CLC_ATOMIC_OP) (p, val); \
 7 | }
 8 | 
 9 | __CLC_ATOM_IMPL(__CLC_ATOMIC_ADDRESS_SPACE, int)
10 | __CLC_ATOM_IMPL(__CLC_ATOMIC_ADDRESS_SPACE, uint)
11 | 
12 | #undef __CLC_ATOM_IMPL
13 | #undef __CLC_ATOMIC_OP
14 | #undef __CLC_ATOMIC_ADDRESS_SPACE
15 | 


--------------------------------------------------------------------------------
/generic/lib/atomic/atomic_add.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #define IMPL(TYPE, AS) \
 4 | _CLC_OVERLOAD _CLC_DEF TYPE atomic_add(volatile AS TYPE *p, TYPE val) { \
 5 |   return __sync_fetch_and_add(p, val); \
 6 | }
 7 | 
 8 | IMPL(int, global)
 9 | IMPL(unsigned int, global)
10 | IMPL(int, local)
11 | IMPL(unsigned int, local)
12 | #undef IMPL
13 | 


--------------------------------------------------------------------------------
/generic/lib/atomic/atomic_and.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #define IMPL(TYPE, AS) \
 4 | _CLC_OVERLOAD _CLC_DEF TYPE atomic_and(volatile AS TYPE *p, TYPE val) { \
 5 |   return __sync_fetch_and_and(p, val); \
 6 | }
 7 | 
 8 | IMPL(int, global)
 9 | IMPL(unsigned int, global)
10 | IMPL(int, local)
11 | IMPL(unsigned int, local)
12 | #undef IMPL
13 | 


--------------------------------------------------------------------------------
/generic/lib/atomic/atomic_cmpxchg.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #define IMPL(TYPE, AS) \
 4 | _CLC_OVERLOAD _CLC_DEF TYPE atomic_cmpxchg(volatile AS TYPE *p, TYPE cmp, TYPE val) { \
 5 |   return __sync_val_compare_and_swap(p, cmp, val); \
 6 | }
 7 | 
 8 | IMPL(int, global)
 9 | IMPL(unsigned int, global)
10 | IMPL(int, local)
11 | IMPL(unsigned int, local)
12 | #undef IMPL
13 | 


--------------------------------------------------------------------------------
/generic/lib/atomic/atomic_dec.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #define IMPL(TYPE, AS) \
 4 | _CLC_OVERLOAD _CLC_DEF TYPE atomic_dec(volatile AS TYPE *p) { \
 5 |   return __sync_fetch_and_sub(p, (TYPE)1); \
 6 | }
 7 | 
 8 | IMPL(int, global)
 9 | IMPL(unsigned int, global)
10 | IMPL(int, local)
11 | IMPL(unsigned int, local)
12 | #undef IMPL
13 | 


--------------------------------------------------------------------------------
/generic/lib/atomic/atomic_inc.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #define IMPL(TYPE, AS) \
 4 | _CLC_OVERLOAD _CLC_DEF TYPE atomic_inc(volatile AS TYPE *p) { \
 5 |   return __sync_fetch_and_add(p, (TYPE)1); \
 6 | }
 7 | 
 8 | IMPL(int, global)
 9 | IMPL(unsigned int, global)
10 | IMPL(int, local)
11 | IMPL(unsigned int, local)
12 | #undef IMPL
13 | 


--------------------------------------------------------------------------------
/generic/lib/atomic/atomic_max.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #define IMPL(TYPE, AS, OP) \
 4 | _CLC_OVERLOAD _CLC_DEF TYPE atomic_max(volatile AS TYPE *p, TYPE val) { \
 5 |   return __sync_fetch_and_##OP(p, val); \
 6 | }
 7 | 
 8 | IMPL(int, global, max)
 9 | IMPL(unsigned int, global, umax)
10 | IMPL(int, local, max)
11 | IMPL(unsigned int, local, umax)
12 | #undef IMPL
13 | 


--------------------------------------------------------------------------------
/generic/lib/atomic/atomic_min.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #define IMPL(TYPE, AS, OP) \
 4 | _CLC_OVERLOAD _CLC_DEF TYPE atomic_min(volatile AS TYPE *p, TYPE val) { \
 5 |   return __sync_fetch_and_##OP(p, val); \
 6 | }
 7 | 
 8 | IMPL(int, global, min)
 9 | IMPL(unsigned int, global, umin)
10 | IMPL(int, local, min)
11 | IMPL(unsigned int, local, umin)
12 | #undef IMPL
13 | 


--------------------------------------------------------------------------------
/generic/lib/atomic/atomic_or.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #define IMPL(TYPE, AS) \
 4 | _CLC_OVERLOAD _CLC_DEF TYPE atomic_or(volatile AS TYPE *p, TYPE val) { \
 5 |   return __sync_fetch_and_or(p, val); \
 6 | }
 7 | 
 8 | IMPL(int, global)
 9 | IMPL(unsigned int, global)
10 | IMPL(int, local)
11 | IMPL(unsigned int, local)
12 | #undef IMPL
13 | 


--------------------------------------------------------------------------------
/generic/lib/atomic/atomic_sub.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #define IMPL(TYPE, AS) \
 4 | _CLC_OVERLOAD _CLC_DEF TYPE atomic_sub(volatile AS TYPE *p, TYPE val) { \
 5 |   return __sync_fetch_and_sub(p, val); \
 6 | }
 7 | 
 8 | IMPL(int, global)
 9 | IMPL(unsigned int, global)
10 | IMPL(int, local)
11 | IMPL(unsigned int, local)
12 | #undef IMPL
13 | 


--------------------------------------------------------------------------------
/generic/lib/atomic/atomic_xchg.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | _CLC_OVERLOAD _CLC_DEF float atomic_xchg(volatile global float *p, float val) {
 4 |   return as_float(atomic_xchg((volatile global uint *)p, as_uint(val)));
 5 | }
 6 | 
 7 | _CLC_OVERLOAD _CLC_DEF float atomic_xchg(volatile local float *p, float val) {
 8 |   return as_float(atomic_xchg((volatile local uint *)p, as_uint(val)));
 9 | }
10 | 
11 | #define IMPL(TYPE, AS) \
12 | _CLC_OVERLOAD _CLC_DEF TYPE atomic_xchg(volatile AS TYPE *p, TYPE val) { \
13 |   return __sync_swap_4(p, val); \
14 | }
15 | 
16 | IMPL(int, global)
17 | IMPL(unsigned int, global)
18 | IMPL(int, local)
19 | IMPL(unsigned int, local)
20 | #undef IMPL
21 | 


--------------------------------------------------------------------------------
/generic/lib/atomic/atomic_xor.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #define IMPL(TYPE, AS) \
 4 | _CLC_OVERLOAD _CLC_DEF TYPE atomic_xor(volatile AS TYPE *p, TYPE val) { \
 5 |   return __sync_fetch_and_xor(p, val); \
 6 | }
 7 | 
 8 | IMPL(int, global)
 9 | IMPL(unsigned int, global)
10 | IMPL(int, local)
11 | IMPL(unsigned int, local)
12 | #undef IMPL
13 | 


--------------------------------------------------------------------------------
/generic/lib/cl_khr_global_int32_base_atomics/atom_add.cl:
--------------------------------------------------------------------------------
1 | #define __CLC_ATOMIC_OP add
2 | #define __CLC_ATOMIC_ADDRESS_SPACE global
3 | #include "../atom_int32_binary.inc"
4 | 


--------------------------------------------------------------------------------
/generic/lib/cl_khr_global_int32_base_atomics/atom_cmpxchg.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #define IMPL(TYPE) \
 4 | _CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(volatile global TYPE *p, TYPE cmp, TYPE val) { \
 5 |   return atomic_cmpxchg(p, cmp, val); \
 6 | }
 7 | 
 8 | IMPL(int)
 9 | IMPL(unsigned int)
10 | 


--------------------------------------------------------------------------------
/generic/lib/cl_khr_global_int32_base_atomics/atom_dec.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #define IMPL(TYPE) \
 4 | _CLC_OVERLOAD _CLC_DEF TYPE atom_dec(volatile global TYPE *p) { \
 5 |   return atomic_dec(p); \
 6 | }
 7 | 
 8 | IMPL(int)
 9 | IMPL(unsigned int)
10 | 


--------------------------------------------------------------------------------
/generic/lib/cl_khr_global_int32_base_atomics/atom_inc.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #define IMPL(TYPE) \
 4 | _CLC_OVERLOAD _CLC_DEF TYPE atom_inc(volatile global TYPE *p) { \
 5 |   return atomic_inc(p); \
 6 | }
 7 | 
 8 | IMPL(int)
 9 | IMPL(unsigned int)
10 | 


--------------------------------------------------------------------------------
/generic/lib/cl_khr_global_int32_base_atomics/atom_sub.cl:
--------------------------------------------------------------------------------
1 | #define __CLC_ATOMIC_OP sub
2 | #define __CLC_ATOMIC_ADDRESS_SPACE global
3 | #include "../atom_int32_binary.inc"
4 | 


--------------------------------------------------------------------------------
/generic/lib/cl_khr_global_int32_base_atomics/atom_xchg.cl:
--------------------------------------------------------------------------------
1 | #define __CLC_ATOMIC_OP xchg
2 | #define __CLC_ATOMIC_ADDRESS_SPACE global
3 | #include "../atom_int32_binary.inc"
4 | 


--------------------------------------------------------------------------------
/generic/lib/cl_khr_global_int32_extended_atomics/atom_and.cl:
--------------------------------------------------------------------------------
1 | #define __CLC_ATOMIC_OP and
2 | #define __CLC_ATOMIC_ADDRESS_SPACE global
3 | #include "../atom_int32_binary.inc"
4 | 


--------------------------------------------------------------------------------
/generic/lib/cl_khr_global_int32_extended_atomics/atom_max.cl:
--------------------------------------------------------------------------------
1 | #define __CLC_ATOMIC_OP max
2 | #define __CLC_ATOMIC_ADDRESS_SPACE global
3 | #include "../atom_int32_binary.inc"
4 | 


--------------------------------------------------------------------------------
/generic/lib/cl_khr_global_int32_extended_atomics/atom_min.cl:
--------------------------------------------------------------------------------
1 | #define __CLC_ATOMIC_OP min
2 | #define __CLC_ATOMIC_ADDRESS_SPACE global
3 | #include "../atom_int32_binary.inc"
4 | 


--------------------------------------------------------------------------------
/generic/lib/cl_khr_global_int32_extended_atomics/atom_or.cl:
--------------------------------------------------------------------------------
1 | #define __CLC_ATOMIC_OP or
2 | #define __CLC_ATOMIC_ADDRESS_SPACE global
3 | #include "../atom_int32_binary.inc"
4 | 


--------------------------------------------------------------------------------
/generic/lib/cl_khr_global_int32_extended_atomics/atom_xor.cl:
--------------------------------------------------------------------------------
1 | #define __CLC_ATOMIC_OP xor
2 | #define __CLC_ATOMIC_ADDRESS_SPACE global
3 | #include "../atom_int32_binary.inc"
4 | 


--------------------------------------------------------------------------------
/generic/lib/cl_khr_int64_base_atomics/atom_add.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #ifdef cl_khr_int64_base_atomics
 4 | 
 5 | #define IMPL(AS, TYPE) \
 6 | _CLC_OVERLOAD _CLC_DEF TYPE atom_add(volatile AS TYPE *p, TYPE val) { \
 7 |   return __sync_fetch_and_add_8(p, val); \
 8 | }
 9 | 
10 | IMPL(global, long)
11 | IMPL(global, unsigned long)
12 | IMPL(local, long)
13 | IMPL(local, unsigned long)
14 | #undef IMPL
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/generic/lib/cl_khr_int64_base_atomics/atom_cmpxchg.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #ifdef cl_khr_int64_base_atomics
 4 | 
 5 | #define IMPL(AS, TYPE) \
 6 | _CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(volatile AS TYPE *p, TYPE cmp, TYPE val) { \
 7 |   return __sync_val_compare_and_swap_8(p, cmp, val); \
 8 | }
 9 | 
10 | IMPL(global, long)
11 | IMPL(global, unsigned long)
12 | IMPL(local, long)
13 | IMPL(local, unsigned long)
14 | #undef IMPL
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/generic/lib/cl_khr_int64_base_atomics/atom_dec.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #ifdef cl_khr_int64_base_atomics
 4 | 
 5 | #define IMPL(AS, TYPE) \
 6 | _CLC_OVERLOAD _CLC_DEF TYPE atom_dec(volatile AS TYPE *p) { \
 7 |   return atom_sub(p, (TYPE)1); \
 8 | }
 9 | 
10 | IMPL(global, long)
11 | IMPL(global, unsigned long)
12 | IMPL(local, long)
13 | IMPL(local, unsigned long)
14 | #undef IMPL
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/generic/lib/cl_khr_int64_base_atomics/atom_inc.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #ifdef cl_khr_int64_base_atomics
 4 | 
 5 | #define IMPL(AS, TYPE) \
 6 | _CLC_OVERLOAD _CLC_DEF TYPE atom_inc(volatile AS TYPE *p) { \
 7 |   return atom_add(p, (TYPE)1); \
 8 | }
 9 | 
10 | IMPL(global, long)
11 | IMPL(global, unsigned long)
12 | IMPL(local, long)
13 | IMPL(local, unsigned long)
14 | #undef IMPL
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/generic/lib/cl_khr_int64_base_atomics/atom_sub.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #ifdef cl_khr_int64_base_atomics
 4 | 
 5 | #define IMPL(AS, TYPE) \
 6 | _CLC_OVERLOAD _CLC_DEF TYPE atom_sub(volatile AS TYPE *p, TYPE val) { \
 7 |   return __sync_fetch_and_sub_8(p, val); \
 8 | }
 9 | 
10 | IMPL(global, long)
11 | IMPL(global, unsigned long)
12 | IMPL(local, long)
13 | IMPL(local, unsigned long)
14 | #undef IMPL
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/generic/lib/cl_khr_int64_base_atomics/atom_xchg.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #ifdef cl_khr_int64_base_atomics
 4 | 
 5 | #define IMPL(AS, TYPE) \
 6 | _CLC_OVERLOAD _CLC_DEF TYPE atom_xchg(volatile AS TYPE *p, TYPE val) { \
 7 |   return __sync_swap_8(p, val); \
 8 | }
 9 | 
10 | IMPL(global, long)
11 | IMPL(global, unsigned long)
12 | IMPL(local, long)
13 | IMPL(local, unsigned long)
14 | #undef IMPL
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/generic/lib/cl_khr_int64_extended_atomics/atom_and.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #ifdef cl_khr_int64_extended_atomics
 4 | 
 5 | #define IMPL(AS, TYPE) \
 6 | _CLC_OVERLOAD _CLC_DEF TYPE atom_and(volatile AS TYPE *p, TYPE val) { \
 7 |   return __sync_fetch_and_and_8(p, val); \
 8 | }
 9 | 
10 | IMPL(global, long)
11 | IMPL(global, unsigned long)
12 | IMPL(local, long)
13 | IMPL(local, unsigned long)
14 | #undef IMPL
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/generic/lib/cl_khr_int64_extended_atomics/atom_max.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #ifdef cl_khr_int64_extended_atomics
 4 | 
 5 | unsigned long __clc__sync_fetch_and_max_local_8(volatile local long *, long);
 6 | unsigned long __clc__sync_fetch_and_max_global_8(volatile global long *, long);
 7 | unsigned long __clc__sync_fetch_and_umax_local_8(volatile local unsigned long *, unsigned long);
 8 | unsigned long __clc__sync_fetch_and_umax_global_8(volatile global unsigned long *, unsigned long);
 9 | 
10 | #define IMPL(AS, TYPE, OP) \
11 | _CLC_OVERLOAD _CLC_DEF TYPE atom_max(volatile AS TYPE *p, TYPE val) { \
12 |   return __clc__sync_fetch_and_##OP##_##AS##_8(p, val); \
13 | }
14 | 
15 | IMPL(global, long, max)
16 | IMPL(global, unsigned long, umax)
17 | IMPL(local, long, max)
18 | IMPL(local, unsigned long, umax)
19 | #undef IMPL
20 | 
21 | #endif
22 | 


--------------------------------------------------------------------------------
/generic/lib/cl_khr_int64_extended_atomics/atom_min.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #ifdef cl_khr_int64_extended_atomics
 4 | 
 5 | unsigned long __clc__sync_fetch_and_min_local_8(volatile local long *, long);
 6 | unsigned long __clc__sync_fetch_and_min_global_8(volatile global long *, long);
 7 | unsigned long __clc__sync_fetch_and_umin_local_8(volatile local unsigned long *, unsigned long);
 8 | unsigned long __clc__sync_fetch_and_umin_global_8(volatile global unsigned long *, unsigned long);
 9 | 
10 | #define IMPL(AS, TYPE, OP) \
11 | _CLC_OVERLOAD _CLC_DEF TYPE atom_min(volatile AS TYPE *p, TYPE val) { \
12 |   return __clc__sync_fetch_and_##OP##_##AS##_8(p, val); \
13 | }
14 | 
15 | IMPL(global, long, min)
16 | IMPL(global, unsigned long, umin)
17 | IMPL(local, long, min)
18 | IMPL(local, unsigned long, umin)
19 | #undef IMPL
20 | 
21 | #endif
22 | 


--------------------------------------------------------------------------------
/generic/lib/cl_khr_int64_extended_atomics/atom_or.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #ifdef cl_khr_int64_extended_atomics
 4 | 
 5 | #define IMPL(AS, TYPE) \
 6 | _CLC_OVERLOAD _CLC_DEF TYPE atom_or(volatile AS TYPE *p, TYPE val) { \
 7 |   return __sync_fetch_and_or_8(p, val); \
 8 | }
 9 | 
10 | IMPL(global, long)
11 | IMPL(global, unsigned long)
12 | IMPL(local, long)
13 | IMPL(local, unsigned long)
14 | #undef IMPL
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/generic/lib/cl_khr_int64_extended_atomics/atom_xor.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #ifdef cl_khr_int64_extended_atomics
 4 | 
 5 | #define IMPL(AS, TYPE) \
 6 | _CLC_OVERLOAD _CLC_DEF TYPE atom_xor(volatile AS TYPE *p, TYPE val) { \
 7 |   return __sync_fetch_and_xor_8(p, val); \
 8 | }
 9 | 
10 | IMPL(global, long)
11 | IMPL(global, unsigned long)
12 | IMPL(local, long)
13 | IMPL(local, unsigned long)
14 | #undef IMPL
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/generic/lib/cl_khr_local_int32_base_atomics/atom_add.cl:
--------------------------------------------------------------------------------
1 | #define __CLC_ATOMIC_OP add
2 | #define __CLC_ATOMIC_ADDRESS_SPACE local
3 | #include "../atom_int32_binary.inc"
4 | 


--------------------------------------------------------------------------------
/generic/lib/cl_khr_local_int32_base_atomics/atom_cmpxchg.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #define IMPL(TYPE) \
 4 | _CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(volatile local TYPE *p, TYPE cmp, TYPE val) { \
 5 |   return atomic_cmpxchg(p, cmp, val); \
 6 | }
 7 | 
 8 | IMPL(int)
 9 | IMPL(unsigned int)
10 | 


--------------------------------------------------------------------------------
/generic/lib/cl_khr_local_int32_base_atomics/atom_dec.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #define IMPL(TYPE) \
 4 | _CLC_OVERLOAD _CLC_DEF TYPE atom_dec(volatile local TYPE *p) { \
 5 |   return atomic_dec(p); \
 6 | }
 7 | 
 8 | IMPL(int)
 9 | IMPL(unsigned int)
10 | 


--------------------------------------------------------------------------------
/generic/lib/cl_khr_local_int32_base_atomics/atom_inc.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #define IMPL(TYPE) \
 4 | _CLC_OVERLOAD _CLC_DEF TYPE atom_inc(volatile local TYPE *p) { \
 5 |   return atomic_inc(p); \
 6 | }
 7 | 
 8 | IMPL(int)
 9 | IMPL(unsigned int)
10 | 


--------------------------------------------------------------------------------
/generic/lib/cl_khr_local_int32_base_atomics/atom_sub.cl:
--------------------------------------------------------------------------------
1 | #define __CLC_ATOMIC_OP sub
2 | #define __CLC_ATOMIC_ADDRESS_SPACE local
3 | #include "../atom_int32_binary.inc"
4 | 


--------------------------------------------------------------------------------
/generic/lib/cl_khr_local_int32_base_atomics/atom_xchg.cl:
--------------------------------------------------------------------------------
1 | #define __CLC_ATOMIC_OP xchg
2 | #define __CLC_ATOMIC_ADDRESS_SPACE local
3 | #include "../atom_int32_binary.inc"
4 | 


--------------------------------------------------------------------------------
/generic/lib/cl_khr_local_int32_extended_atomics/atom_and.cl:
--------------------------------------------------------------------------------
1 | #define __CLC_ATOMIC_OP and
2 | #define __CLC_ATOMIC_ADDRESS_SPACE local
3 | #include "../atom_int32_binary.inc"
4 | 


--------------------------------------------------------------------------------
/generic/lib/cl_khr_local_int32_extended_atomics/atom_max.cl:
--------------------------------------------------------------------------------
1 | #define __CLC_ATOMIC_OP max
2 | #define __CLC_ATOMIC_ADDRESS_SPACE local
3 | #include "../atom_int32_binary.inc"
4 | 


--------------------------------------------------------------------------------
/generic/lib/cl_khr_local_int32_extended_atomics/atom_min.cl:
--------------------------------------------------------------------------------
1 | #define __CLC_ATOMIC_OP min
2 | #define __CLC_ATOMIC_ADDRESS_SPACE local
3 | #include "../atom_int32_binary.inc"
4 | 


--------------------------------------------------------------------------------
/generic/lib/cl_khr_local_int32_extended_atomics/atom_or.cl:
--------------------------------------------------------------------------------
1 | #define __CLC_ATOMIC_OP or
2 | #define __CLC_ATOMIC_ADDRESS_SPACE local
3 | #include "../atom_int32_binary.inc"
4 | 


--------------------------------------------------------------------------------
/generic/lib/cl_khr_local_int32_extended_atomics/atom_xor.cl:
--------------------------------------------------------------------------------
1 | #define __CLC_ATOMIC_OP xor
2 | #define __CLC_ATOMIC_ADDRESS_SPACE local
3 | #include "../atom_int32_binary.inc"
4 | 


--------------------------------------------------------------------------------
/generic/lib/clc_unary.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x) {
2 |   return __CLC_IMPL_FUNC(x);
3 | }
4 | 


--------------------------------------------------------------------------------
/generic/lib/common/mix.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_BODY <mix.inc>
4 | #include <clc/math/gentype.inc>
5 | 


--------------------------------------------------------------------------------
/generic/lib/common/mix.inc:
--------------------------------------------------------------------------------
 1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE mix(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE a) {
 2 |   return mad( y - x, a, x );
 3 | }
 4 | 
 5 | #ifndef __CLC_SCALAR
 6 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE mix(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_SCALAR_GENTYPE a) {
 7 |     return mix(x, y, (__CLC_GENTYPE)a);
 8 | }
 9 | #endif
10 | 


--------------------------------------------------------------------------------
/generic/lib/common/sign.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | #include "../clcmacro.h"
 3 | 
 4 | #define SIGN(TYPE, F) \
 5 | _CLC_DEF _CLC_OVERLOAD TYPE sign(TYPE x) { \
 6 |   if (isnan(x)) { \
 7 |     return 0.0F;   \
 8 |   }               \
 9 |   if (x > 0.0F) { \
10 |     return 1.0F;  \
11 |   }               \
12 |   if (x < 0.0F) { \
13 |     return -1.0F; \
14 |   }               \
15 |   return x; /* -0.0 or +0.0 */  \
16 | }
17 | 
18 | SIGN(float, f)
19 | _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, sign, float)
20 | 
21 | #ifdef cl_khr_fp64
22 | 
23 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable
24 | 
25 | SIGN(double, )
26 | _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sign, double)
27 | 
28 | #endif
29 | 


--------------------------------------------------------------------------------
/generic/lib/geometric/cross.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | _CLC_OVERLOAD _CLC_DEF float3 cross(float3 p0, float3 p1) {
 4 |   return (float3)(p0.y*p1.z - p0.z*p1.y, p0.z*p1.x - p0.x*p1.z,
 5 |                   p0.x*p1.y - p0.y*p1.x);
 6 | }
 7 | 
 8 | _CLC_OVERLOAD _CLC_DEF float4 cross(float4 p0, float4 p1) {
 9 |   return (float4)(p0.y*p1.z - p0.z*p1.y, p0.z*p1.x - p0.x*p1.z,
10 |                   p0.x*p1.y - p0.y*p1.x, 0.f);
11 | }
12 | 
13 | #ifdef cl_khr_fp64
14 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable
15 | 
16 | _CLC_OVERLOAD _CLC_DEF double3 cross(double3 p0, double3 p1) {
17 |   return (double3)(p0.y*p1.z - p0.z*p1.y, p0.z*p1.x - p0.x*p1.z,
18 |                    p0.x*p1.y - p0.y*p1.x);
19 | }
20 | 
21 | _CLC_OVERLOAD _CLC_DEF double4 cross(double4 p0, double4 p1) {
22 |   return (double4)(p0.y*p1.z - p0.z*p1.y, p0.z*p1.x - p0.x*p1.z,
23 |                    p0.x*p1.y - p0.y*p1.x, 0.f);
24 | }
25 | #endif
26 | 


--------------------------------------------------------------------------------
/generic/lib/integer/abs.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_BODY <abs.inc>
4 | #include <clc/integer/gentype.inc>
5 | 


--------------------------------------------------------------------------------
/generic/lib/integer/abs.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DEF __CLC_U_GENTYPE abs(__CLC_GENTYPE x) {
2 |   return __builtin_astype((__CLC_GENTYPE)(x > (__CLC_GENTYPE)(0) ? x : -x), __CLC_U_GENTYPE);
3 | }
4 | 


--------------------------------------------------------------------------------
/generic/lib/integer/abs_diff.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_BODY <abs_diff.inc>
4 | #include <clc/integer/gentype.inc>
5 | 


--------------------------------------------------------------------------------
/generic/lib/integer/abs_diff.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DEF __CLC_U_GENTYPE abs_diff(__CLC_GENTYPE x, __CLC_GENTYPE y) {
2 |   return __builtin_astype((__CLC_GENTYPE)(x > y ? x-y : y-x), __CLC_U_GENTYPE);
3 | }
4 | 


--------------------------------------------------------------------------------
/generic/lib/integer/hadd.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_BODY <hadd.inc>
4 | #include <clc/integer/gentype.inc>
5 | 


--------------------------------------------------------------------------------
/generic/lib/integer/hadd.inc:
--------------------------------------------------------------------------------
1 | //hadd = (x+y)>>1
2 | //This can be simplified to x>>1 + y>>1 + (1 if both x and y have the 1s bit set)
3 | //This saves us having to do any checks for overflow in the addition sum
4 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE hadd(__CLC_GENTYPE x, __CLC_GENTYPE y) {
5 |     return (x>>(__CLC_GENTYPE)1)+(y>>(__CLC_GENTYPE)1)+(x&y&(__CLC_GENTYPE)1);
6 | }
7 | 


--------------------------------------------------------------------------------
/generic/lib/integer/mad24.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_BODY <mad24.inc>
4 | #include <clc/integer/integer-gentype.inc>
5 | 


--------------------------------------------------------------------------------
/generic/lib/integer/mad24.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE mad24(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z){
2 |   return mul24(x, y) + z;
3 | }
4 | 


--------------------------------------------------------------------------------
/generic/lib/integer/mul24.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_BODY <mul24.inc>
4 | #include <clc/integer/integer-gentype.inc>
5 | 


--------------------------------------------------------------------------------
/generic/lib/integer/mul24.inc:
--------------------------------------------------------------------------------
 1 | 
 2 | // We need to use shifts here in order to mantain the sign bit for signed
 3 | // integers.  The compiler should optimize this to (x & 0x00FFFFFF) for
 4 | // unsigned integers.
 5 | #define CONVERT_TO_24BIT(x) (((x) << 8) >> 8)
 6 | 
 7 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE mul24(__CLC_GENTYPE x, __CLC_GENTYPE y){
 8 |   return CONVERT_TO_24BIT(x) * CONVERT_TO_24BIT(y);
 9 | }
10 | 
11 | #undef CONVERT_TO_24BIT
12 | 


--------------------------------------------------------------------------------
/generic/lib/integer/popcount.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | #include <integer/popcount.h>
3 | 
4 | #define __CLC_FUNC popcount
5 | #define __CLC_IMPL_FUNC __clc_native_popcount
6 | 
7 | #define __CLC_BODY "../clc_unary.inc"
8 | #include <clc/integer/gentype.inc>
9 | 


--------------------------------------------------------------------------------
/generic/lib/integer/rhadd.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_BODY <rhadd.inc>
4 | #include <clc/integer/gentype.inc>
5 | 


--------------------------------------------------------------------------------
/generic/lib/integer/rhadd.inc:
--------------------------------------------------------------------------------
1 | //rhadd = (x+y+1)>>1
2 | //This can be simplified to x>>1 + y>>1 + (1 if either x or y have the 1s bit set)
3 | //This saves us having to do any checks for overflow in the addition sums
4 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE rhadd(__CLC_GENTYPE x, __CLC_GENTYPE y) {
5 |     return (x>>(__CLC_GENTYPE)1)+(y>>(__CLC_GENTYPE)1)+((x&(__CLC_GENTYPE)1)|(y&(__CLC_GENTYPE)1));
6 | }
7 | 


--------------------------------------------------------------------------------
/generic/lib/integer/rotate.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_BODY <rotate.inc>
4 | #include <clc/integer/gentype.inc>
5 | 


--------------------------------------------------------------------------------
/generic/lib/math/acos.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_BODY <acos.inc>
4 | #include <clc/math/gentype.inc>
5 | 


--------------------------------------------------------------------------------
/generic/lib/math/acos.inc:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * There are multiple formulas for calculating arccosine of x:
 3 |  * 1) acos(x) = (1/2*pi) + i * ln(i*x + sqrt(1-x^2)) (notice the 'i'...)
 4 |  * 2) acos(x) = pi/2 + asin(-x) (asin isn't implemented yet)
 5 |  * 3) acos(x) = pi/2 - asin(x) (ditto)
 6 |  * 4) acos(x) = 2*atan2(sqrt(1-x), sqrt(1+x))
 7 |  * 5) acos(x) = pi/2 - atan2(x, ( sqrt(1-x^2) ) )
 8 |  *
 9 |  * Options 1-3 are not currently usable, #5 generates more concise radeonsi
10 |  * bitcode and assembly than #4 (134 vs 132 instructions on radeonsi), but
11 |  * precision of #4 may be better.
12 |  */
13 | 
14 | // TODO: Enable half precision when atan2 is implemented
15 | #if __CLC_FPSIZE > 16
16 | 
17 | #if __CLC_FPSIZE == 64
18 | #define __CLC_CONST(x) x
19 | #elif __CLC_FPSIZE == 32
20 | #define __CLC_CONST(x) x ## f
21 | #elif __CLC_FPSIZE == 16
22 | #define __CLC_CONST(x) x ## h
23 | #endif
24 | 
25 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE acos(__CLC_GENTYPE x) {
26 |   return (
27 |     (__CLC_GENTYPE) __CLC_CONST(2.0) * atan2(
28 |       sqrt((__CLC_GENTYPE) __CLC_CONST(1.0) - x),
29 |       sqrt((__CLC_GENTYPE) __CLC_CONST(1.0) + x)
30 |     )
31 |   );
32 | }
33 | 
34 | #undef __CLC_CONST
35 | 
36 | #endif
37 | 


--------------------------------------------------------------------------------
/generic/lib/math/asin.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_BODY <asin.inc>
4 | #include <clc/math/gentype.inc>
5 | 


--------------------------------------------------------------------------------
/generic/lib/math/asin.inc:
--------------------------------------------------------------------------------
 1 | // TODO: Enable half precision when atan2 is implemented
 2 | #if __CLC_FPSIZE > 16
 3 | 
 4 | #if __CLC_FPSIZE == 64
 5 | #define __CLC_CONST(x) x
 6 | #elif __CLC_FPSIZE == 32
 7 | #define __CLC_CONST(x) x ## f
 8 | #elif __CLC_FPSIZE == 16
 9 | #define __CLC_CONST(x) x ## h
10 | #endif
11 | 
12 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE asin(__CLC_GENTYPE x) {
13 |   return atan2(x, sqrt( (__CLC_GENTYPE)__CLC_CONST(1.0) - (x*x) ));
14 | }
15 | 
16 | #undef __CLC_CONST
17 | 
18 | #endif
19 | 


--------------------------------------------------------------------------------
/generic/lib/math/binary_impl.inc:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef __CLC_SCALAR
 3 | 
 4 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE x, __CLC_GENTYPE y) {
 5 |   return FUNCTION_IMPL(x, y);
 6 | }
 7 | 
 8 | #endif
 9 | 
10 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE x, float y) {
11 |   __CLC_GENTYPE vec_y = (__CLC_GENTYPE) (y);
12 |   return FUNCTION_IMPL(x, vec_y);
13 | }
14 | 
15 | #ifdef cl_khr_fp64
16 | 
17 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE x, double y) {
18 |   __CLC_GENTYPE vec_y = (__CLC_GENTYPE) (y);
19 |   return FUNCTION_IMPL(x, vec_y);
20 | }
21 | 
22 | #endif
23 | 


--------------------------------------------------------------------------------
/generic/lib/math/ceil.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | #include "../clcmacro.h"
 3 | 
 4 | // Map the llvm intrinsic to an OpenCL function.
 5 | #define __CLC_FUNCTION __clc_ceil
 6 | #define __CLC_INTRINSIC "llvm.ceil"
 7 | #include "math/unary_intrin.inc"
 8 | 
 9 | #undef __CLC_FUNCTION
10 | #define __CLC_FUNCTION ceil
11 | #include "unary_builtin.inc"
12 | 


--------------------------------------------------------------------------------
/generic/lib/math/clc_sw_binary.inc:
--------------------------------------------------------------------------------
 1 | #include <utils.h>
 2 | 
 3 | #define __CLC_SW_FUNC(x) __CLC_CONCAT(__clc_, x)
 4 | 
 5 | // TODO: Enable half precision when the sw routine is implemented
 6 | #if __CLC_FPSIZE > 16
 7 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x, __CLC_GENTYPE y) {
 8 |   return __CLC_SW_FUNC(__CLC_FUNC)(x, y);
 9 | }
10 | #endif
11 | 
12 | #undef __CLC_SW_FUNC
13 | 


--------------------------------------------------------------------------------
/generic/lib/math/clc_sw_unary.inc:
--------------------------------------------------------------------------------
 1 | #include <utils.h>
 2 | 
 3 | #define __CLC_SW_FUNC(x) __CLC_CONCAT(__clc_, x)
 4 | 
 5 | // TODO: Enable half precision when the sw routine is implemented
 6 | #if __CLC_FPSIZE > 16
 7 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x) {
 8 |   return __CLC_SW_FUNC(__CLC_FUNC)(x);
 9 | }
10 | #endif
11 | 
12 | #undef __CLC_SW_FUNC
13 | 


--------------------------------------------------------------------------------
/generic/lib/math/copysign.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | #include "../clcmacro.h"
 3 | 
 4 | _CLC_DEFINE_BINARY_BUILTIN(float, copysign, __builtin_copysignf, float, float)
 5 | 
 6 | #ifdef cl_khr_fp64
 7 | 
 8 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 9 | 
10 | _CLC_DEFINE_BINARY_BUILTIN(double, copysign, __builtin_copysign, double, double)
11 | 
12 | #endif
13 | 
14 | #ifdef cl_khr_fp16
15 | 
16 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable
17 | 
18 | _CLC_DEF _CLC_OVERLOAD half copysign(half x, half y)
19 | {
20 |    ushort sign_x = as_ushort(x) & 0x8000u;
21 |    ushort unsigned_y = as_ushort(y) & 0x7ffffu;
22 | 
23 |    return as_half((ushort)(sign_x | unsigned_y));
24 | }
25 | _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, copysign, half, half)
26 | 
27 | #endif
28 | 


--------------------------------------------------------------------------------
/generic/lib/math/exp10.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | #include <math/clc_exp10.h>
3 | 
4 | #define __CLC_FUNC exp10
5 | #define __CLC_BODY <clc_sw_unary.inc>
6 | #include <clc/math/gentype.inc>
7 | 


--------------------------------------------------------------------------------
/generic/lib/math/fabs.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | #include "../clcmacro.h"
 3 | 
 4 | // Map the llvm intrinsic to an OpenCL function.
 5 | #define __CLC_FUNCTION __clc_fabs
 6 | #define __CLC_INTRINSIC "llvm.fabs"
 7 | #include "math/unary_intrin.inc"
 8 | 
 9 | #undef __CLC_FUNCTION
10 | #define __CLC_FUNCTION fabs
11 | #include "unary_builtin.inc"
12 | 


--------------------------------------------------------------------------------
/generic/lib/math/fdim.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #include "math.h"
4 | 
5 | #define __CLC_BODY <fdim.inc>
6 | #include <clc/math/gentype.inc>
7 | 


--------------------------------------------------------------------------------
/generic/lib/math/floor.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | #include "../clcmacro.h"
 3 | 
 4 | // Map the llvm intrinsic to an OpenCL function.
 5 | #define __CLC_FUNCTION __clc_floor
 6 | #define __CLC_INTRINSIC "llvm.floor"
 7 | #include "math/unary_intrin.inc"
 8 | 
 9 | #undef __CLC_FUNCTION
10 | #define __CLC_FUNCTION floor
11 | #include "unary_builtin.inc"
12 | 


--------------------------------------------------------------------------------
/generic/lib/math/fma.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #include "math.h"
4 | #include "math/clc_fma.h"
5 | 
6 | #define __CLC_BODY <fma.inc>
7 | #include <clc/math/gentype.inc>
8 | 


--------------------------------------------------------------------------------
/generic/lib/math/fma.inc:
--------------------------------------------------------------------------------
1 | _CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE fma(__CLC_GENTYPE a, __CLC_GENTYPE b, __CLC_GENTYPE c) {
2 | #if __CLC_FPSIZE == 32 && HAVE_HW_FMA32() == 0
3 | 	return __clc_sw_fma(a, b, c);
4 | #else
5 | 	return __clc_fma(a, b, c);
6 | #endif
7 | }
8 | 


--------------------------------------------------------------------------------
/generic/lib/math/fmax.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #include "../clcmacro.h"
 4 | 
 5 | _CLC_DEFINE_BINARY_BUILTIN(float, fmax, __builtin_fmaxf, float, float);
 6 | 
 7 | #ifdef cl_khr_fp64
 8 | 
 9 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable
10 | 
11 | _CLC_DEFINE_BINARY_BUILTIN(double, fmax, __builtin_fmax, double, double);
12 | 
13 | #endif
14 | 
15 | #ifdef cl_khr_fp16
16 | 
17 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable
18 | 
19 | _CLC_DEF _CLC_OVERLOAD half fmax(half x, half y)
20 | {
21 |    if (isnan(x))
22 |       return y;
23 |    if (isnan(y))
24 |       return x;
25 |    return (x < y) ? y : x;
26 | }
27 | _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, fmax, half, half)
28 | 
29 | #endif
30 | 
31 | #define __CLC_BODY <fmax.inc>
32 | #include <clc/math/gentype.inc>
33 | 


--------------------------------------------------------------------------------
/generic/lib/math/fmax.inc:
--------------------------------------------------------------------------------
 1 | 
 2 | #if !defined(__CLC_SCALAR)
 3 | 
 4 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fmax(__CLC_GENTYPE x, float y) {
 5 |   return fmax(x, (__CLC_GENTYPE)((__CLC_SCALAR_GENTYPE)y));
 6 | }
 7 | 
 8 | #ifdef cl_khr_fp64
 9 | 
10 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable
11 | 
12 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fmax(__CLC_GENTYPE x, double y) {
13 |   return fmax(x, (__CLC_GENTYPE)((__CLC_SCALAR_GENTYPE)y));
14 | }
15 | 
16 | #endif // ifdef cl_khr_fp64
17 | 
18 | #ifdef cl_khr_fp16
19 | 
20 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable
21 | 
22 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fmax(__CLC_GENTYPE x, half y) {
23 |   return fmax(x, (__CLC_GENTYPE)((__CLC_SCALAR_GENTYPE)y));
24 | }
25 | 
26 | #endif // ifdef cl_khr_fp16
27 | 
28 | #endif // !defined(__CLC_SCALAR)
29 | 


--------------------------------------------------------------------------------
/generic/lib/math/fmin.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #include "../clcmacro.h"
 4 | 
 5 | _CLC_DEFINE_BINARY_BUILTIN(float, fmin, __builtin_fminf, float, float);
 6 | 
 7 | #ifdef cl_khr_fp64
 8 | 
 9 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable
10 | 
11 | _CLC_DEFINE_BINARY_BUILTIN(double, fmin, __builtin_fmin, double, double);
12 | 
13 | #endif
14 | #ifdef cl_khr_fp16
15 | 
16 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable
17 | 
18 | _CLC_DEF _CLC_OVERLOAD half fmin(half x, half y)
19 | {
20 |    if (isnan(x))
21 |       return y;
22 |    if (isnan(y))
23 |       return x;
24 |    return (y < x) ? y : x;
25 | }
26 | _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, fmin, half, half)
27 | 
28 | #endif
29 | 
30 | #define __CLC_BODY <fmin.inc>
31 | #include <clc/math/gentype.inc>
32 | 


--------------------------------------------------------------------------------
/generic/lib/math/fmin.inc:
--------------------------------------------------------------------------------
 1 | 
 2 | #if !defined(__CLC_SCALAR)
 3 | 
 4 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fmin(__CLC_GENTYPE x, float y) {
 5 |   return fmin(x, (__CLC_GENTYPE)((__CLC_SCALAR_GENTYPE)y));
 6 | }
 7 | 
 8 | #ifdef cl_khr_fp64
 9 | 
10 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable
11 | 
12 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fmin(__CLC_GENTYPE x, double y) {
13 |   return fmin(x, (__CLC_GENTYPE)((__CLC_SCALAR_GENTYPE)y));
14 | }
15 | 
16 | #endif // ifdef cl_khr_fp64
17 | 
18 | #ifdef cl_khr_fp16
19 | 
20 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable
21 | 
22 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fmin(__CLC_GENTYPE x, half y) {
23 |   return fmin(x, (__CLC_GENTYPE)((__CLC_SCALAR_GENTYPE)y));
24 | }
25 | 
26 | #endif // ifdef cl_khr_fp16
27 | 
28 | #endif // !defined(__CLC_SCALAR)
29 | 


--------------------------------------------------------------------------------
/generic/lib/math/fmod.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | #include <math/clc_fmod.h>
3 | 
4 | #define __CLC_FUNC fmod
5 | #define __CLC_BODY <clc_sw_binary.inc>
6 | #include <clc/math/gentype.inc>
7 | 


--------------------------------------------------------------------------------
/generic/lib/math/frexp.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | #include <utils.h>
 3 | 
 4 | #define __CLC_BODY <frexp.inc>
 5 | #define __CLC_ADDRESS_SPACE private
 6 | #include <clc/math/gentype.inc>
 7 | #undef __CLC_ADDRESS_SPACE
 8 | 
 9 | #define __CLC_BODY <frexp.inc>
10 | #define __CLC_ADDRESS_SPACE global
11 | #include <clc/math/gentype.inc>
12 | #undef __CLC_ADDRESS_SPACE
13 | 
14 | #define __CLC_BODY <frexp.inc>
15 | #define __CLC_ADDRESS_SPACE local
16 | #include <clc/math/gentype.inc>
17 | #undef __CLC_ADDRESS_SPACE
18 | 


--------------------------------------------------------------------------------
/generic/lib/math/half_binary.inc:
--------------------------------------------------------------------------------
 1 | #include <utils.h>
 2 | 
 3 | #define __CLC_HALF_FUNC(x) __CLC_CONCAT(half_, x)
 4 | 
 5 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_HALF_FUNC(__CLC_FUNC)(__CLC_GENTYPE x, __CLC_GENTYPE y) {
 6 |   return __CLC_FUNC(x, y);
 7 | }
 8 | 
 9 | #undef __CLC_HALF_FUNC
10 | 


--------------------------------------------------------------------------------
/generic/lib/math/half_cos.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_FUNC cos
4 | #define __CLC_BODY <half_unary.inc>
5 | #define __FLOAT_ONLY
6 | #include <clc/math/gentype.inc>
7 | 


--------------------------------------------------------------------------------
/generic/lib/math/half_divide.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #define divide(x,y) (x/y)
 4 | 
 5 | #define __CLC_FUNC divide
 6 | #define __CLC_BODY <half_binary.inc>
 7 | #define __FLOAT_ONLY
 8 | #include <clc/math/gentype.inc>
 9 | #undef divide
10 | 


--------------------------------------------------------------------------------
/generic/lib/math/half_exp.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_FUNC exp
4 | #define __CLC_BODY <half_unary.inc>
5 | #define __FLOAT_ONLY
6 | #include <clc/math/gentype.inc>
7 | 


--------------------------------------------------------------------------------
/generic/lib/math/half_exp10.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_FUNC exp10
4 | #define __CLC_BODY <half_unary.inc>
5 | #define __FLOAT_ONLY
6 | #include <clc/math/gentype.inc>
7 | 


--------------------------------------------------------------------------------
/generic/lib/math/half_exp2.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_FUNC exp2
4 | #define __CLC_BODY <half_unary.inc>
5 | #define __FLOAT_ONLY
6 | #include <clc/math/gentype.inc>
7 | 


--------------------------------------------------------------------------------
/generic/lib/math/half_log.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_FUNC log
4 | #define __CLC_BODY <half_unary.inc>
5 | #define __FLOAT_ONLY
6 | #include <clc/math/gentype.inc>
7 | 


--------------------------------------------------------------------------------
/generic/lib/math/half_log10.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_FUNC log10
4 | #define __CLC_BODY <half_unary.inc>
5 | #define __FLOAT_ONLY
6 | #include <clc/math/gentype.inc>
7 | 


--------------------------------------------------------------------------------
/generic/lib/math/half_log2.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_FUNC log2
4 | #define __CLC_BODY <half_unary.inc>
5 | #define __FLOAT_ONLY
6 | #include <clc/math/gentype.inc>
7 | 


--------------------------------------------------------------------------------
/generic/lib/math/half_powr.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_FUNC powr
4 | #define __CLC_BODY <half_binary.inc>
5 | #define __FLOAT_ONLY
6 | #include <clc/math/gentype.inc>
7 | 


--------------------------------------------------------------------------------
/generic/lib/math/half_recip.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #define recip(x) (1.0f/x)
 4 | 
 5 | #define __CLC_FUNC recip
 6 | #define __CLC_BODY <half_unary.inc>
 7 | #define __FLOAT_ONLY
 8 | #include <clc/math/gentype.inc>
 9 | 
10 | #undef recip
11 | 


--------------------------------------------------------------------------------
/generic/lib/math/half_rsqrt.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_FUNC rsqrt
4 | #define __CLC_BODY <half_unary.inc>
5 | #define __FLOAT_ONLY
6 | #include <clc/math/gentype.inc>
7 | 


--------------------------------------------------------------------------------
/generic/lib/math/half_sin.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_FUNC sin
4 | #define __CLC_BODY <half_unary.inc>
5 | #define __FLOAT_ONLY
6 | #include <clc/math/gentype.inc>
7 | 


--------------------------------------------------------------------------------
/generic/lib/math/half_sqrt.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_FUNC sqrt
4 | #define __CLC_BODY <half_unary.inc>
5 | #define __FLOAT_ONLY
6 | #include <clc/math/gentype.inc>
7 | 


--------------------------------------------------------------------------------
/generic/lib/math/half_tan.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_FUNC tan
4 | #define __CLC_BODY <half_unary.inc>
5 | #define __FLOAT_ONLY
6 | #include <clc/math/gentype.inc>
7 | 


--------------------------------------------------------------------------------
/generic/lib/math/half_unary.inc:
--------------------------------------------------------------------------------
 1 | #include <utils.h>
 2 | 
 3 | #define __CLC_HALF_FUNC(x) __CLC_CONCAT(half_, x)
 4 | 
 5 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_HALF_FUNC(__CLC_FUNC)(__CLC_GENTYPE val) {
 6 |   return __CLC_FUNC(val);
 7 | }
 8 | 
 9 | #undef __CLC_HALF_FUNC
10 | 


--------------------------------------------------------------------------------
/generic/lib/math/hypot.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #include <math/clc_hypot.h>
4 | 
5 | #define __CLC_FUNC hypot
6 | #define __CLC_BODY <clc_sw_binary.inc>
7 | #include <clc/math/gentype.inc>
8 | 


--------------------------------------------------------------------------------
/generic/lib/math/log.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | #include "../clcmacro.h"
 3 | 
 4 | /*
 5 |  *log(x) = log2(x) * (1/log2(e))
 6 |  */
 7 | 
 8 | _CLC_OVERLOAD _CLC_DEF float log(float x)
 9 | {
10 |     return log2(x) * (1.0f / M_LOG2E_F);
11 | }
12 | 
13 | _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, log, float);
14 | 
15 | #ifdef cl_khr_fp64
16 | 
17 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable
18 | 
19 | _CLC_OVERLOAD _CLC_DEF double log(double x)
20 | {
21 |     return log2(x) * (1.0 / M_LOG2E);
22 | }
23 | 
24 | _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, log, double);
25 | 
26 | #endif // cl_khr_fp64
27 | 


--------------------------------------------------------------------------------
/generic/lib/math/logb.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | #include "math.h"
 3 | #include "../clcmacro.h"
 4 | 
 5 | _CLC_OVERLOAD _CLC_DEF float logb(float x) {
 6 |     int ax = as_int(x) & EXSIGNBIT_SP32;
 7 |     float s = -118 - clz(ax);
 8 |     float r = (ax >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
 9 |     r = ax >= PINFBITPATT_SP32 ? as_float(ax) : r;
10 |     r = ax < 0x00800000 ? s : r;
11 |     r = ax == 0 ? as_float(NINFBITPATT_SP32) : r;
12 |     return r;
13 | }
14 | 
15 | _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, logb, float);
16 | 
17 | #ifdef cl_khr_fp64
18 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable
19 | 
20 | _CLC_OVERLOAD _CLC_DEF double logb(double x) {
21 |     long ax = as_long(x) & EXSIGNBIT_DP64;
22 |     double s = -1011L - clz(ax);
23 |     double r = (int) (ax >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
24 |     r = ax >= PINFBITPATT_DP64 ? as_double(ax) : r;
25 |     r = ax < 0x0010000000000000L ? s : r;
26 |     r = ax == 0L ? as_double(NINFBITPATT_DP64) : r;
27 |     return r;
28 | }
29 | 
30 | _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, logb, double)
31 | #endif
32 | 


--------------------------------------------------------------------------------
/generic/lib/math/mad.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_BODY <mad.inc>
4 | #include <clc/math/gentype.inc>
5 | 


--------------------------------------------------------------------------------
/generic/lib/math/mad.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE mad(__CLC_GENTYPE a, __CLC_GENTYPE b, __CLC_GENTYPE c) {
2 |   return a * b + c;
3 | }
4 | 


--------------------------------------------------------------------------------
/generic/lib/math/maxmag.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | #include <utils.h>
3 | 
4 | #define __CLC_BODY <maxmag.inc>
5 | #include <clc/math/gentype.inc>
6 | 


--------------------------------------------------------------------------------
/generic/lib/math/maxmag.inc:
--------------------------------------------------------------------------------
 1 | #ifdef __CLC_SCALAR
 2 | #define __CLC_VECSIZE
 3 | #endif
 4 | 
 5 | #if __CLC_FPSIZE == 64
 6 | #define __CLC_CONVERT_NATN __CLC_XCONCAT(convert_long, __CLC_VECSIZE)
 7 | #elif __CLC_FPSIZE == 32
 8 | #define __CLC_CONVERT_NATN __CLC_XCONCAT(convert_int, __CLC_VECSIZE)
 9 | #elif __CLC_FPSIZE == 16
10 | #define __CLC_CONVERT_NATN __CLC_XCONCAT(convert_short, __CLC_VECSIZE)
11 | #endif
12 | 
13 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE maxmag(__CLC_GENTYPE x, __CLC_GENTYPE y) {
14 |   const __CLC_GENTYPE res = select(y, x, __CLC_CONVERT_NATN(isgreater(fabs(x), fabs(y))));
15 |   return select(res, fmax(x, y), __CLC_CONVERT_NATN(isnan(x) | isnan(y) | isequal(fabs(x), fabs(y))));
16 | }
17 | 
18 | #undef __CLC_CONVERT_NATN
19 | 
20 | #ifdef __CLC_SCALAR
21 | #undef __CLC_VECSIZE
22 | #endif
23 | 


--------------------------------------------------------------------------------
/generic/lib/math/minmag.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | #include <utils.h>
3 | 
4 | #define __CLC_BODY <minmag.inc>
5 | #include <clc/math/gentype.inc>
6 | 


--------------------------------------------------------------------------------
/generic/lib/math/minmag.inc:
--------------------------------------------------------------------------------
 1 | #ifdef __CLC_SCALAR
 2 | #define __CLC_VECSIZE
 3 | #endif
 4 | 
 5 | #if __CLC_FPSIZE == 64
 6 | #define __CLC_CONVERT_NATN __CLC_XCONCAT(convert_long, __CLC_VECSIZE)
 7 | #elif __CLC_FPSIZE == 32
 8 | #define __CLC_CONVERT_NATN __CLC_XCONCAT(convert_int, __CLC_VECSIZE)
 9 | #elif __CLC_FPSIZE == 16
10 | #define __CLC_CONVERT_NATN __CLC_XCONCAT(convert_short, __CLC_VECSIZE)
11 | #endif
12 | 
13 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE minmag(__CLC_GENTYPE x, __CLC_GENTYPE y) {
14 |   const __CLC_GENTYPE res = select(y, x, __CLC_CONVERT_NATN(isless(fabs(x), fabs(y))));
15 |   return select(res, fmin(x, y), __CLC_CONVERT_NATN(isnan(x) | isnan(y) | isequal(fabs(x), fabs(y))));
16 | }
17 | 
18 | #undef __CLC_CONVERT_NATN
19 | 
20 | #ifdef __CLC_SCALAR
21 | #undef __CLC_VECSIZE
22 | #endif
23 | 


--------------------------------------------------------------------------------
/generic/lib/math/nan.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | #include "utils.h"
3 | 
4 | #define __CLC_AS_GENTYPE __CLC_XCONCAT(as_, __CLC_GENTYPE)
5 | #define __CLC_BODY <nan.inc>
6 | #include <clc/math/gentype.inc>
7 | 


--------------------------------------------------------------------------------
/generic/lib/math/nan.inc:
--------------------------------------------------------------------------------
 1 | #ifdef __CLC_SCALAR
 2 | #define __CLC_VECSIZE
 3 | #endif
 4 | 
 5 | #if __CLC_FPSIZE == 64
 6 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE nan(__CLC_XCONCAT(ulong, __CLC_VECSIZE) code)
 7 | {
 8 | 	return __CLC_AS_GENTYPE(code | 0x7ff0000000000000ul);
 9 | }
10 | #elif __CLC_FPSIZE == 32
11 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE nan(__CLC_XCONCAT(uint, __CLC_VECSIZE) code)
12 | {
13 | 	return __CLC_AS_GENTYPE(code | 0x7fc00000);
14 | }
15 | #elif __CLC_FPSIZE == 16
16 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE nan(__CLC_XCONCAT(ushort, __CLC_VECSIZE) code)
17 | {
18 | 	const ushort mask = 0x7e00;
19 | 	const __CLC_XCONCAT(ushort, __CLC_VECSIZE) res = code | mask;
20 | 	return __CLC_AS_GENTYPE(res);
21 | }
22 | #endif
23 | 
24 | 
25 | #ifdef __CLC_SCALAR
26 | #undef __CLC_VECSIZE
27 | #endif
28 | 


--------------------------------------------------------------------------------
/generic/lib/math/native_cos.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_NATIVE_INTRINSIC cos
4 | 
5 | #define __CLC_BODY <native_unary_intrinsic.inc>
6 | #define __FLOAT_ONLY
7 | #include <clc/math/gentype.inc>
8 | 


--------------------------------------------------------------------------------
/generic/lib/math/native_divide.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_BODY <native_divide.inc>
4 | #define __FLOAT_ONLY
5 | #include <clc/math/gentype.inc>
6 | 


--------------------------------------------------------------------------------
/generic/lib/math/native_divide.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE native_divide(__CLC_GENTYPE x, __CLC_GENTYPE y) {
2 |   return x / y;
3 | }
4 | 


--------------------------------------------------------------------------------
/generic/lib/math/native_exp.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_NATIVE_INTRINSIC exp
4 | 
5 | #define __CLC_BODY <native_unary_intrinsic.inc>
6 | #define __FLOAT_ONLY
7 | #include <clc/math/gentype.inc>
8 | 


--------------------------------------------------------------------------------
/generic/lib/math/native_exp10.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_BODY <native_exp10.inc>
4 | #define __FLOAT_ONLY
5 | #include <clc/math/gentype.inc>
6 | 


--------------------------------------------------------------------------------
/generic/lib/math/native_exp10.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE native_exp10(__CLC_GENTYPE val) {
2 |   return native_exp2(val * M_LOG210_F);
3 | }
4 | 


--------------------------------------------------------------------------------
/generic/lib/math/native_exp2.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_NATIVE_INTRINSIC exp2
4 | 
5 | #define __CLC_BODY <native_unary_intrinsic.inc>
6 | #define __FLOAT_ONLY
7 | #include <clc/math/gentype.inc>
8 | 


--------------------------------------------------------------------------------
/generic/lib/math/native_log10.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_NATIVE_INTRINSIC log10
4 | 
5 | #define __CLC_BODY <native_unary_intrinsic.inc>
6 | #define __FLOAT_ONLY
7 | #include <clc/math/gentype.inc>
8 | 


--------------------------------------------------------------------------------
/generic/lib/math/native_powr.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_BODY <native_powr.inc>
4 | #define __FLOAT_ONLY
5 | #include <clc/math/gentype.inc>
6 | 


--------------------------------------------------------------------------------
/generic/lib/math/native_powr.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE native_powr(__CLC_GENTYPE x, __CLC_GENTYPE y) {
2 |   // x^y == 2^{log2 x^y} == 2^{y * log2 x}
3 |   // for x < 0 propagate nan created by log2
4 |   return native_exp2(y * native_log2(x));
5 | }
6 | 


--------------------------------------------------------------------------------
/generic/lib/math/native_recip.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_BODY <native_recip.inc>
4 | #define __FLOAT_ONLY
5 | #include <clc/math/gentype.inc>
6 | 


--------------------------------------------------------------------------------
/generic/lib/math/native_recip.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE native_recip(__CLC_GENTYPE val) {
2 |   return 1.0f / val;
3 | }
4 | 


--------------------------------------------------------------------------------
/generic/lib/math/native_rsqrt.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_BODY <native_rsqrt.inc>
4 | #define __FLOAT_ONLY
5 | #include <clc/math/gentype.inc>
6 | 


--------------------------------------------------------------------------------
/generic/lib/math/native_rsqrt.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE native_rsqrt(__CLC_GENTYPE val) {
2 |   return 1.0f / native_sqrt(val);
3 | }
4 | 


--------------------------------------------------------------------------------
/generic/lib/math/native_sin.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_NATIVE_INTRINSIC sin
4 | 
5 | #define __CLC_BODY <native_unary_intrinsic.inc>
6 | #define __FLOAT_ONLY
7 | #include <clc/math/gentype.inc>
8 | 


--------------------------------------------------------------------------------
/generic/lib/math/native_sqrt.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_NATIVE_INTRINSIC sqrt
4 | 
5 | #define __CLC_BODY <native_unary_intrinsic.inc>
6 | #define __FLOAT_ONLY
7 | #include <clc/math/gentype.inc>
8 | 


--------------------------------------------------------------------------------
/generic/lib/math/native_tan.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_BODY <native_tan.inc>
4 | #define __FLOAT_ONLY
5 | #include <clc/math/gentype.inc>
6 | 


--------------------------------------------------------------------------------
/generic/lib/math/native_tan.inc:
--------------------------------------------------------------------------------
1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE native_tan(__CLC_GENTYPE val) {
2 |   return native_sin(val) / native_cos(val);
3 | }
4 | 


--------------------------------------------------------------------------------
/generic/lib/math/nextafter.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | #include "../clcmacro.h"
 3 | 
 4 | _CLC_DEFINE_BINARY_BUILTIN(float, nextafter, __builtin_nextafterf, float, float)
 5 | 
 6 | #ifdef cl_khr_fp64
 7 | 
 8 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 9 | 
10 | _CLC_DEFINE_BINARY_BUILTIN(double, nextafter, __builtin_nextafter, double, double)
11 | 
12 | #endif
13 | 


--------------------------------------------------------------------------------
/generic/lib/math/pow.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #include <math/clc_pow.h>
4 | 
5 | #define __CLC_FUNC pow
6 | #define __CLC_BODY <clc_sw_binary.inc>
7 | #include <clc/math/gentype.inc>
8 | 


--------------------------------------------------------------------------------
/generic/lib/math/pown.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #include <math/clc_pown.h>
4 | 
5 | #define __CLC_BODY <pown.inc>
6 | #include <clc/math/gentype.inc>
7 | 


--------------------------------------------------------------------------------
/generic/lib/math/pown.inc:
--------------------------------------------------------------------------------
1 | // TODO: Enable half precision when the sw routine is implemented
2 | #if __CLC_FPSIZE > 16
3 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE pown(__CLC_GENTYPE x, __CLC_INTN y) {
4 |   return __clc_pown(x, y);
5 | }
6 | #endif
7 | 


--------------------------------------------------------------------------------
/generic/lib/math/powr.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #include <math/clc_powr.h>
4 | 
5 | #define __CLC_FUNC powr
6 | #define __CLC_BODY <clc_sw_binary.inc>
7 | #include <clc/math/gentype.inc>
8 | 


--------------------------------------------------------------------------------
/generic/lib/math/remainder.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | #include <math/clc_remainder.h>
3 | 
4 | #define __CLC_FUNC remainder
5 | #define __CLC_BODY <clc_sw_binary.inc>
6 | #include <clc/math/gentype.inc>
7 | 


--------------------------------------------------------------------------------
/generic/lib/math/remquo.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | #include <math/clc_remquo.h>
 3 | 
 4 | #define __CLC_BODY <remquo.inc>
 5 | #define __CLC_ADDRESS_SPACE global
 6 | #include <clc/math/gentype.inc>
 7 | #undef __CLC_ADDRESS_SPACE
 8 | 
 9 | #define __CLC_BODY <remquo.inc>
10 | #define __CLC_ADDRESS_SPACE local
11 | #include <clc/math/gentype.inc>
12 | #undef __CLC_ADDRESS_SPACE
13 | 
14 | #define __CLC_BODY <remquo.inc>
15 | #define __CLC_ADDRESS_SPACE private
16 | #include <clc/math/gentype.inc>
17 | #undef __CLC_ADDRESS_SPACE
18 | 


--------------------------------------------------------------------------------
/generic/lib/math/remquo.inc:
--------------------------------------------------------------------------------
 1 | // TODO: Enable half precision when the sw routine is implemented
 2 | #if __CLC_FPSIZE > 16
 3 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE remquo(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_ADDRESS_SPACE __CLC_INTN *q) {
 4 |   __CLC_INTN local_q;
 5 |   __CLC_GENTYPE ret = __clc_remquo(x, y, &local_q);
 6 |   *q = local_q;
 7 |   return ret;
 8 | }
 9 | #endif
10 | 


--------------------------------------------------------------------------------
/generic/lib/math/rint.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | // Map the llvm intrinsic to an OpenCL function.
 4 | #define __CLC_FUNCTION __clc_rint
 5 | #define __CLC_INTRINSIC "llvm.rint"
 6 | #include "math/unary_intrin.inc"
 7 | 
 8 | #undef __CLC_FUNCTION
 9 | #define __CLC_FUNCTION rint
10 | #include "unary_builtin.inc"
11 | 


--------------------------------------------------------------------------------
/generic/lib/math/rootn.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #include <math/clc_rootn.h>
4 | 
5 | #define __CLC_BODY <rootn.inc>
6 | #include <clc/math/gentype.inc>
7 | 


--------------------------------------------------------------------------------
/generic/lib/math/rootn.inc:
--------------------------------------------------------------------------------
1 | // TODO: Enable half precision when the sw routine is implemented
2 | #if __CLC_FPSIZE > 16
3 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE rootn(__CLC_GENTYPE x, __CLC_INTN y) {
4 |   return __clc_rootn(x, y);
5 | }
6 | #endif
7 | 


--------------------------------------------------------------------------------
/generic/lib/math/round.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | // Map the llvm intrinsic to an OpenCL function.
 4 | #define __CLC_FUNCTION __clc_round
 5 | #define __CLC_INTRINSIC "llvm.round"
 6 | #include "math/unary_intrin.inc"
 7 | 
 8 | #undef __CLC_FUNCTION
 9 | #define __CLC_FUNCTION round
10 | #include "unary_builtin.inc"
11 | 


--------------------------------------------------------------------------------
/generic/lib/math/sincos.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_BODY <sincos.inc>
4 | #include <clc/math/gentype.inc>
5 | 


--------------------------------------------------------------------------------
/generic/lib/math/sincos.inc:
--------------------------------------------------------------------------------
 1 | // TODO: Enable half precision when sin/cos is implemented
 2 | #if __CLC_FPSIZE > 16
 3 | #define __CLC_DECLARE_SINCOS(ADDRSPACE, TYPE) \
 4 |   _CLC_OVERLOAD _CLC_DEF TYPE sincos (TYPE x, ADDRSPACE TYPE * cosval) { \
 5 |     *cosval = cos(x); \
 6 |     return sin(x); \
 7 |   }
 8 | 
 9 | __CLC_DECLARE_SINCOS(global, __CLC_GENTYPE)
10 | __CLC_DECLARE_SINCOS(local, __CLC_GENTYPE)
11 | __CLC_DECLARE_SINCOS(private, __CLC_GENTYPE)
12 | 
13 | #undef __CLC_DECLARE_SINCOS
14 | #endif
15 | 


--------------------------------------------------------------------------------
/generic/lib/math/tan.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #include <math/clc_tan.h>
4 | 
5 | #define __CLC_FUNC tan
6 | #define __CLC_BODY <clc_sw_unary.inc>
7 | #include <clc/math/gentype.inc>
8 | 


--------------------------------------------------------------------------------
/generic/lib/math/tanpi.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #include <math/clc_tanpi.h>
4 | 
5 | #define __CLC_FUNC tanpi
6 | #define __CLC_BODY <clc_sw_unary.inc>
7 | #include <clc/math/gentype.inc>
8 | 


--------------------------------------------------------------------------------
/generic/lib/math/trunc.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | // Map the llvm intrinsic to an OpenCL function.
 4 | #define __CLC_FUNCTION __clc_trunc
 5 | #define __CLC_INTRINSIC "llvm.trunc"
 6 | #include "math/unary_intrin.inc"
 7 | 
 8 | #undef __CLC_FUNCTION
 9 | #define __CLC_FUNCTION trunc
10 | #include "unary_builtin.inc"
11 | 


--------------------------------------------------------------------------------
/generic/lib/math/unary_builtin.inc:
--------------------------------------------------------------------------------
 1 | #include "../clcmacro.h"
 2 | #include "utils.h"
 3 | 
 4 | #ifndef __CLC_BUILTIN
 5 | #define __CLC_BUILTIN __CLC_XCONCAT(__clc_, __CLC_FUNCTION)
 6 | #endif
 7 | 
 8 | _CLC_DEFINE_UNARY_BUILTIN(float, __CLC_FUNCTION, __CLC_BUILTIN, float)
 9 | 
10 | #ifdef cl_khr_fp64
11 | 
12 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable
13 | 
14 | _CLC_DEFINE_UNARY_BUILTIN(double, __CLC_FUNCTION, __CLC_BUILTIN, double)
15 | 
16 | #endif
17 | 
18 | #ifdef cl_khr_fp16
19 | 
20 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable
21 | 
22 | _CLC_DEFINE_UNARY_BUILTIN(half, __CLC_FUNCTION, __CLC_BUILTIN, half)
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/generic/lib/relational/isfinite.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | #include "relational.h"
 3 | 
 4 | _CLC_DEFINE_RELATIONAL_UNARY(int, isfinite, __builtin_isfinite, float)
 5 | 
 6 | #ifdef cl_khr_fp64
 7 | 
 8 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 9 | 
10 | // The scalar version of isfinite(double) returns an int, but the vector versions
11 | // return long.
12 | _CLC_DEF _CLC_OVERLOAD int isfinite(double x) {
13 |   return __builtin_isfinite(x);
14 | }
15 | 
16 | _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, isfinite, double)
17 | 
18 | #endif
19 | #ifdef cl_khr_fp16
20 | 
21 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable
22 | 
23 | // The scalar version of isfinite(half) returns an int, but the vector versions
24 | // return short.
25 | _CLC_DEF _CLC_OVERLOAD int isfinite(half x) {
26 |   return __builtin_isfinite(x);
27 | }
28 | 
29 | _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, isfinite, half)
30 | 
31 | #endif
32 | 


--------------------------------------------------------------------------------
/generic/lib/relational/isgreater.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | #include "relational.h"
 3 | 
 4 | //Note: It would be nice to use __builtin_isgreater with vector inputs, but it seems to only take scalar values as
 5 | //      input, which will produce incorrect output for vector input types.
 6 | 
 7 | _CLC_DEFINE_RELATIONAL_BINARY(int, isgreater, __builtin_isgreater, float, float)
 8 | 
 9 | #ifdef cl_khr_fp64
10 | 
11 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable
12 | 
13 | // The scalar version of isgreater(double, double) returns an int, but the vector versions
14 | // return long.
15 | 
16 | _CLC_DEF _CLC_OVERLOAD int isgreater(double x, double y){
17 | 	return __builtin_isgreater(x, y);
18 | }
19 | 
20 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isgreater, double, double)
21 | 
22 | #endif
23 | 
24 | #ifdef cl_khr_fp16
25 | 
26 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable
27 | 
28 | // The scalar version of isgreater(half, half) returns an int, but the vector versions
29 | // return short.
30 | 
31 | _CLC_DEF _CLC_OVERLOAD int isgreater(half x, half y){
32 | 	return __builtin_isgreater(x, y);
33 | }
34 | 
35 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isgreater, half, half)
36 | 
37 | #endif
38 | 


--------------------------------------------------------------------------------
/generic/lib/relational/isinf.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | #include "relational.h"
 3 | 
 4 | _CLC_DEFINE_RELATIONAL_UNARY(int, isinf, __builtin_isinf, float)
 5 | 
 6 | #ifdef cl_khr_fp64
 7 | 
 8 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 9 | 
10 | // The scalar version of isinf(double) returns an int, but the vector versions
11 | // return long.
12 | _CLC_DEF _CLC_OVERLOAD int isinf(double x) {
13 |   return __builtin_isinf(x);
14 | }
15 | 
16 | _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, isinf, double)
17 | #endif
18 | 
19 | #ifdef cl_khr_fp16
20 | 
21 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable
22 | 
23 | // The scalar version of isinf(half) returns an int, but the vector versions
24 | // return short.
25 | _CLC_DEF _CLC_OVERLOAD int isinf(half x) {
26 |   return __builtin_isinf(x);
27 | }
28 | 
29 | _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, isinf, half)
30 | #endif
31 | 


--------------------------------------------------------------------------------
/generic/lib/relational/isless.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | #include "relational.h"
 3 | 
 4 | //Note: It would be nice to use __builtin_isless with vector inputs, but it seems to only take scalar values as
 5 | //      input, which will produce incorrect output for vector input types.
 6 | 
 7 | _CLC_DEFINE_RELATIONAL_BINARY(int, isless, __builtin_isless, float, float)
 8 | 
 9 | #ifdef cl_khr_fp64
10 | 
11 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable
12 | 
13 | // The scalar version of isless(double, double) returns an int, but the vector versions
14 | // return long.
15 | 
16 | _CLC_DEF _CLC_OVERLOAD int isless(double x, double y){
17 | 	return __builtin_isless(x, y);
18 | }
19 | 
20 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isless, double, double)
21 | 
22 | #endif
23 | #ifdef cl_khr_fp16
24 | 
25 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable
26 | 
27 | // The scalar version of isless(half, half) returns an int, but the vector versions
28 | // return short.
29 | 
30 | _CLC_DEF _CLC_OVERLOAD int isless(half x, half y){
31 | 	return __builtin_isless(x, y);
32 | }
33 | 
34 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isless, half, half)
35 | 
36 | #endif
37 | 


--------------------------------------------------------------------------------
/generic/lib/relational/islessequal.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | #include "relational.h"
 3 | 
 4 | //Note: It would be nice to use __builtin_islessequal with vector inputs, but it seems to only take scalar values as
 5 | //      input, which will produce incorrect output for vector input types.
 6 | 
 7 | _CLC_DEFINE_RELATIONAL_BINARY(int, islessequal, __builtin_islessequal, float, float)
 8 | 
 9 | #ifdef cl_khr_fp64
10 | 
11 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable
12 | 
13 | // The scalar version of islessequal(double, double) returns an int, but the vector versions
14 | // return long.
15 | 
16 | _CLC_DEF _CLC_OVERLOAD int islessequal(double x, double y){
17 | 	return __builtin_islessequal(x, y);
18 | }
19 | 
20 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, islessequal, double, double)
21 | 
22 | #endif
23 | #ifdef cl_khr_fp16
24 | 
25 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable
26 | 
27 | // The scalar version of islessequal(half, half) returns an int, but the vector versions
28 | // return short.
29 | 
30 | _CLC_DEF _CLC_OVERLOAD int islessequal(half x, half y){
31 | 	return __builtin_islessequal(x, y);
32 | }
33 | 
34 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, islessequal, half, half)
35 | 
36 | #endif
37 | 


--------------------------------------------------------------------------------
/generic/lib/relational/islessgreater.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | #include "relational.h"
 3 | 
 4 | //Note: It would be nice to use __builtin_islessgreater with vector inputs, but it seems to only take scalar values as
 5 | //      input, which will produce incorrect output for vector input types.
 6 | 
 7 | _CLC_DEFINE_RELATIONAL_BINARY(int, islessgreater, __builtin_islessgreater, float, float)
 8 | 
 9 | #ifdef cl_khr_fp64
10 | 
11 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable
12 | 
13 | // The scalar version of islessgreater(double, double) returns an int, but the vector versions
14 | // return long.
15 | 
16 | _CLC_DEF _CLC_OVERLOAD int islessgreater(double x, double y){
17 | 	return __builtin_islessgreater(x, y);
18 | }
19 | 
20 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, islessgreater, double, double)
21 | 
22 | #endif
23 | #ifdef cl_khr_fp16
24 | 
25 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable
26 | 
27 | // The scalar version of islessgreater(half, half) returns an int, but the vector versions
28 | // return short.
29 | 
30 | _CLC_DEF _CLC_OVERLOAD int islessgreater(half x, half y){
31 | 	return __builtin_islessgreater(x, y);
32 | }
33 | 
34 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, islessgreater, half, half)
35 | 
36 | #endif
37 | 


--------------------------------------------------------------------------------
/generic/lib/relational/isnan.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | #include "relational.h"
 3 | 
 4 | _CLC_DEFINE_RELATIONAL_UNARY(int, isnan, __builtin_isnan, float)
 5 | 
 6 | #ifdef cl_khr_fp64
 7 | 
 8 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 9 | 
10 | // The scalar version of isnan(double) returns an int, but the vector versions
11 | // return long.
12 | _CLC_DEF _CLC_OVERLOAD int isnan(double x) {
13 |   return __builtin_isnan(x);
14 | }
15 | 
16 | _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, isnan, double)
17 | 
18 | #endif
19 | 
20 | #ifdef cl_khr_fp16
21 | 
22 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable
23 | 
24 | // The scalar version of isnan(half) returns an int, but the vector versions
25 | // return short.
26 | _CLC_DEF _CLC_OVERLOAD int isnan(half x) {
27 |   return __builtin_isnan(x);
28 | }
29 | 
30 | _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, isnan, half)
31 | 
32 | #endif
33 | 


--------------------------------------------------------------------------------
/generic/lib/relational/isnormal.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | #include "relational.h"
 3 | 
 4 | _CLC_DEFINE_RELATIONAL_UNARY(int, isnormal, __builtin_isnormal, float)
 5 | 
 6 | #ifdef cl_khr_fp64
 7 | 
 8 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 9 | 
10 | // The scalar version of isnormal(double) returns an int, but the vector versions
11 | // return long.
12 | _CLC_DEF _CLC_OVERLOAD int isnormal(double x) {
13 |   return __builtin_isnormal(x);
14 | }
15 | 
16 | _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, isnormal, double)
17 | 
18 | #endif
19 | #ifdef cl_khr_fp16
20 | 
21 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable
22 | 
23 | // The scalar version of isnormal(half) returns an int, but the vector versions
24 | // return short.
25 | _CLC_DEF _CLC_OVERLOAD int isnormal(half x) {
26 |   return __builtin_isnormal(x);
27 | }
28 | 
29 | _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, isnormal, half)
30 | 
31 | #endif
32 | 


--------------------------------------------------------------------------------
/generic/lib/relational/isnotequal.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | #include "relational.h"
 3 | 
 4 | #define _CLC_DEFINE_ISNOTEQUAL(RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \
 5 | _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
 6 |   return (x != y); \
 7 | } \
 8 | 
 9 | _CLC_DEFINE_ISNOTEQUAL(int, isnotequal, float, float)
10 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(int, isnotequal, float, float)
11 | 
12 | #ifdef cl_khr_fp64
13 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable
14 | 
15 | // The scalar version of isnotequal(double, double) returns an int, but the vector versions
16 | // return long.
17 | 
18 | _CLC_DEFINE_ISNOTEQUAL(int, isnotequal, double, double)
19 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isnotequal, double, double)
20 | 
21 | #endif
22 | #ifdef cl_khr_fp16
23 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable
24 | 
25 | // The scalar version of isnotequal(half, half) returns an int, but the vector versions
26 | // return short.
27 | 
28 | _CLC_DEFINE_ISNOTEQUAL(int, isnotequal, half, half)
29 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isnotequal, half, half)
30 | 
31 | #endif
32 | 
33 | #undef _CLC_DEFINE_ISNOTEQUAL
34 | 


--------------------------------------------------------------------------------
/generic/lib/relational/isordered.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | #include "relational.h"
 3 | 
 4 | #define _CLC_DEFINE_ISORDERED(RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \
 5 | _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
 6 |   return isequal(x, x) && isequal(y, y); \
 7 | } \
 8 | 
 9 | _CLC_DEFINE_ISORDERED(int, isordered, float, float)
10 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(int, isordered, float, float)
11 | 
12 | #ifdef cl_khr_fp64
13 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable
14 | 
15 | // The scalar version of isordered(double, double) returns an int, but the vector versions
16 | // return long.
17 | 
18 | _CLC_DEFINE_ISORDERED(int, isordered, double, double)
19 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isordered, double, double)
20 | 
21 | #endif
22 | #ifdef cl_khr_fp16
23 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable
24 | 
25 | // The scalar version of isordered(half, half) returns an int, but the vector versions
26 | // return short.
27 | 
28 | _CLC_DEFINE_ISORDERED(int, isordered, half, half)
29 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isordered, half, half)
30 | 
31 | #endif
32 | 
33 | #undef _CLC_DEFINE_ISORDERED
34 | 


--------------------------------------------------------------------------------
/generic/lib/relational/isunordered.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | #include "relational.h"
 3 | 
 4 | //Note: It would be nice to use __builtin_isunordered with vector inputs, but it seems to only take scalar values as
 5 | //      input, which will produce incorrect output for vector input types.
 6 | 
 7 | _CLC_DEFINE_RELATIONAL_BINARY(int, isunordered, __builtin_isunordered, float, float)
 8 | 
 9 | #ifdef cl_khr_fp64
10 | 
11 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable
12 | 
13 | // The scalar version of isunordered(double, double) returns an int, but the vector versions
14 | // return long.
15 | 
16 | _CLC_DEF _CLC_OVERLOAD int isunordered(double x, double y){
17 | 	return __builtin_isunordered(x, y);
18 | }
19 | 
20 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isunordered, double, double)
21 | 
22 | #endif
23 | #ifdef cl_khr_fp16
24 | 
25 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable
26 | 
27 | // The scalar version of isunordered(half, half) returns an int, but the vector versions
28 | // return short.
29 | 
30 | _CLC_DEF _CLC_OVERLOAD int isunordered(half x, half y){
31 | 	return __builtin_isunordered(x, y);
32 | }
33 | 
34 | _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isunordered, half, half)
35 | 
36 | #endif
37 | 


--------------------------------------------------------------------------------
/generic/lib/relational/select.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | #include <utils.h>
3 | 
4 | #define __CLC_BODY <select.inc>
5 | #include <clc/math/gentype.inc>
6 | #define __CLC_BODY <select.inc>
7 | #include <clc/integer/gentype.inc>
8 | 


--------------------------------------------------------------------------------
/generic/lib/relational/select.inc:
--------------------------------------------------------------------------------
 1 | #ifdef __CLC_SCALAR
 2 | #define __CLC_VECSIZE
 3 | #endif
 4 | 
 5 | #if __CLC_FPSIZE == 64
 6 | #define __CLC_S_GENTYPE __CLC_XCONCAT(long, __CLC_VECSIZE)
 7 | #define __CLC_U_GENTYPE __CLC_XCONCAT(ulong, __CLC_VECSIZE)
 8 | #elif __CLC_FPSIZE == 32
 9 | #define __CLC_S_GENTYPE __CLC_XCONCAT(int, __CLC_VECSIZE)
10 | #define __CLC_U_GENTYPE __CLC_XCONCAT(uint, __CLC_VECSIZE)
11 | #elif __CLC_FPSIZE == 16
12 | #define __CLC_S_GENTYPE __CLC_XCONCAT(short, __CLC_VECSIZE)
13 | #define __CLC_U_GENTYPE __CLC_XCONCAT(ushort, __CLC_VECSIZE)
14 | #endif
15 | 
16 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE select(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_S_GENTYPE z)
17 | {
18 | 	return z ? y : x;
19 | }
20 | 
21 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE select(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_U_GENTYPE z)
22 | {
23 | 	return z ? y : x;
24 | }
25 | 
26 | #ifdef __CLC_FPSIZE
27 | #undef __CLC_S_GENTYPE
28 | #undef __CLC_U_GENTYPE
29 | #endif
30 | 
31 | #ifdef __CLC_SCALAR
32 | #undef __CLC_VECSIZE
33 | #endif
34 | 


--------------------------------------------------------------------------------
/generic/lib/relational/signbit.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | #include "relational.h"
 3 | 
 4 | _CLC_DEFINE_RELATIONAL_UNARY(int, signbit, __builtin_signbitf, float)
 5 | 
 6 | #ifdef cl_khr_fp64
 7 | 
 8 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 9 | 
10 | // The scalar version of signbit(double) returns an int, but the vector versions
11 | // return long.
12 | 
13 | _CLC_DEF _CLC_OVERLOAD int signbit(double x){
14 | 	return __builtin_signbit(x);
15 | }
16 | 
17 | _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, signbit, double)
18 | 
19 | #endif
20 | #ifdef cl_khr_fp16
21 | 
22 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable
23 | 
24 | // The scalar version of signbit(half) returns an int, but the vector versions
25 | // return short.
26 | 
27 | _CLC_DEF _CLC_OVERLOAD int signbit(half x){
28 | 	return __builtin_signbit(x);
29 | }
30 | 
31 | _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, signbit, half)
32 | 
33 | #endif
34 | 


--------------------------------------------------------------------------------
/generic/lib/shared/clamp.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_BODY <clamp.inc>
4 | #include <clc/integer/gentype.inc>
5 | 
6 | #define __CLC_BODY <clamp.inc>
7 | #include <clc/math/gentype.inc>
8 | 


--------------------------------------------------------------------------------
/generic/lib/shared/clamp.inc:
--------------------------------------------------------------------------------
 1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE clamp(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z) {
 2 |   return (x > z ? z : (x < y ? y : x));
 3 | }
 4 | 
 5 | #ifndef __CLC_SCALAR
 6 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE clamp(__CLC_GENTYPE x, __CLC_SCALAR_GENTYPE y, __CLC_SCALAR_GENTYPE z) {
 7 |   return (x > (__CLC_GENTYPE)z ? (__CLC_GENTYPE)z : (x < (__CLC_GENTYPE)y ? (__CLC_GENTYPE)y : x));
 8 | }
 9 | #endif
10 | 


--------------------------------------------------------------------------------
/generic/lib/shared/max.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_BODY <max.inc>
4 | #include <clc/integer/gentype.inc>
5 | 
6 | #define __CLC_BODY <max.inc>
7 | #include <clc/math/gentype.inc>
8 | 


--------------------------------------------------------------------------------
/generic/lib/shared/max.inc:
--------------------------------------------------------------------------------
 1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE max(__CLC_GENTYPE a, __CLC_GENTYPE b) {
 2 |   return (a > b ? a : b);
 3 | }
 4 | 
 5 | #ifndef __CLC_SCALAR
 6 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE max(__CLC_GENTYPE a, __CLC_SCALAR_GENTYPE b) {
 7 |   return (a > (__CLC_GENTYPE)b ? a : (__CLC_GENTYPE)b);
 8 | }
 9 | #endif
10 | 


--------------------------------------------------------------------------------
/generic/lib/shared/min.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | #define __CLC_BODY <min.inc>
4 | #include <clc/integer/gentype.inc>
5 | 
6 | #define __CLC_BODY <min.inc>
7 | #include <clc/math/gentype.inc>
8 | 


--------------------------------------------------------------------------------
/generic/lib/shared/min.inc:
--------------------------------------------------------------------------------
 1 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE min(__CLC_GENTYPE a, __CLC_GENTYPE b) {
 2 |   return (b < a ? b : a);
 3 | }
 4 | 
 5 | #ifndef __CLC_SCALAR
 6 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE min(__CLC_GENTYPE a, __CLC_SCALAR_GENTYPE b) {
 7 |   return (b < (__CLC_GENTYPE)a ? (__CLC_GENTYPE)b : a);
 8 | }
 9 | #endif
10 | 


--------------------------------------------------------------------------------
/generic/lib/shared/vload_half.inc:
--------------------------------------------------------------------------------
 1 | #if __CLC_FPSIZE == 32
 2 | 
 3 | #ifdef __CLC_VECSIZE
 4 | 
 5 | #if __CLC_VECSIZE == 3
 6 | #  define __CLC_OFFSET 4
 7 | #else
 8 | #  define __CLC_OFFSET __CLC_VECSIZE
 9 | #endif
10 | 
11 |   FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __private);
12 |   FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __local);
13 |   FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __global);
14 |   FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __constant);
15 | 
16 | #undef __CLC_OFFSET
17 | #else
18 |   FUNC(, 1, 1, __CLC_GENTYPE, __private);
19 |   FUNC(, 1, 1, __CLC_GENTYPE, __local);
20 |   FUNC(, 1, 1, __CLC_GENTYPE, __global);
21 |   FUNC(, 1, 1, __CLC_GENTYPE, __constant);
22 | #endif
23 | #endif
24 | 


--------------------------------------------------------------------------------
/generic/lib/shared/vstore_half.inc:
--------------------------------------------------------------------------------
 1 | // This does not exist for fp16
 2 | #if __CLC_FPSIZE > 16
 3 | #ifdef __CLC_VECSIZE
 4 | 
 5 | #if __CLC_VECSIZE == 3
 6 | #  define __CLC_OFFSET 4
 7 | #else
 8 | #  define __CLC_OFFSET __CLC_VECSIZE
 9 | #endif
10 | 
11 |   FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __private);
12 |   FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local);
13 |   FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global);
14 | 
15 | #undef __CLC_OFFSET
16 | #else
17 |   FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __private);
18 |   FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local);
19 |   FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global);
20 | #endif
21 | #endif
22 | 


--------------------------------------------------------------------------------
/generic/lib/subnormal_disable.ll:
--------------------------------------------------------------------------------
1 | @__CLC_SUBNORMAL_DISABLE = unnamed_addr constant i1 true
2 | 


--------------------------------------------------------------------------------
/generic/lib/subnormal_helper_func.ll:
--------------------------------------------------------------------------------
1 | @__CLC_SUBNORMAL_DISABLE = external global i1
2 | 
3 | define i1 @__clc_subnormals_disabled() #0 {
4 |   %disable = load i1, i1* @__CLC_SUBNORMAL_DISABLE
5 |   ret i1 %disable
6 | }
7 | 
8 | attributes #0 = { alwaysinline }
9 | 


--------------------------------------------------------------------------------
/generic/lib/subnormal_use_default.ll:
--------------------------------------------------------------------------------
1 | @__CLC_SUBNORMAL_DISABLE = unnamed_addr constant i1 false
2 | 


--------------------------------------------------------------------------------
/generic/lib/workitem/get_global_id.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | _CLC_DEF size_t get_global_id(uint dim) {
4 |   return get_group_id(dim) * get_local_size(dim) + get_local_id(dim) + get_global_offset(dim);
5 | }
6 | 


--------------------------------------------------------------------------------
/generic/lib/workitem/get_global_size.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | _CLC_DEF size_t get_global_size(uint dim) {
4 |   return get_num_groups(dim)*get_local_size(dim);
5 | }
6 | 


--------------------------------------------------------------------------------
/libclc.pc.in:
--------------------------------------------------------------------------------
1 | includedir=@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_INCLUDEDIR@
2 | libexecdir=@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_DATADIR@/clc
3 | 
4 | Name: libclc
5 | Description: Library requirements of the OpenCL C programming language
6 | Version: @PROJECT_VERSION@
7 | Cflags: -I${includedir}
8 | Libs: -L${libexecdir}
9 | 


--------------------------------------------------------------------------------
/ptx-nvidiacl/lib/SOURCES:
--------------------------------------------------------------------------------
1 | mem_fence/fence.cl
2 | synchronization/barrier.cl
3 | workitem/get_global_id.cl
4 | workitem/get_group_id.cl
5 | workitem/get_local_id.cl
6 | workitem/get_local_size.cl
7 | workitem/get_num_groups.cl
8 | 


--------------------------------------------------------------------------------
/ptx-nvidiacl/lib/mem_fence/fence.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | _CLC_DEF void mem_fence(cl_mem_fence_flags flags) {
 4 |    if (flags & (CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE))
 5 |      __nvvm_membar_cta();
 6 | }
 7 | 
 8 | // We do not have separate mechanism for read and write fences.
 9 | _CLC_DEF void read_mem_fence(cl_mem_fence_flags flags) {
10 |   mem_fence(flags);
11 | }
12 | 
13 | _CLC_DEF void write_mem_fence(cl_mem_fence_flags flags) {
14 |   mem_fence(flags);
15 | }
16 | 


--------------------------------------------------------------------------------
/ptx-nvidiacl/lib/synchronization/barrier.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | _CLC_DEF void barrier(cl_mem_fence_flags flags) {
4 |   __syncthreads();
5 | }
6 | 
7 | 


--------------------------------------------------------------------------------
/ptx-nvidiacl/lib/workitem/get_global_id.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | _CLC_DEF size_t get_global_id(uint dim) {
4 |   return get_group_id(dim) * get_local_size(dim) + get_local_id(dim);
5 | }
6 | 


--------------------------------------------------------------------------------
/ptx-nvidiacl/lib/workitem/get_group_id.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | _CLC_DEF size_t get_group_id(uint dim) {
 4 |   switch (dim) {
 5 |   case 0:  return __nvvm_read_ptx_sreg_ctaid_x();
 6 |   case 1:  return __nvvm_read_ptx_sreg_ctaid_y();
 7 |   case 2:  return __nvvm_read_ptx_sreg_ctaid_z();
 8 |   default: return 0;
 9 |   }
10 | }
11 | 


--------------------------------------------------------------------------------
/ptx-nvidiacl/lib/workitem/get_local_id.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | _CLC_DEF size_t get_local_id(uint dim) {
 4 |   switch (dim) {
 5 |   case 0:  return __nvvm_read_ptx_sreg_tid_x();
 6 |   case 1:  return __nvvm_read_ptx_sreg_tid_y();
 7 |   case 2:  return __nvvm_read_ptx_sreg_tid_z();
 8 |   default: return 0;
 9 |   }
10 | }
11 | 


--------------------------------------------------------------------------------
/ptx-nvidiacl/lib/workitem/get_local_size.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | _CLC_DEF size_t get_local_size(uint dim) {
 4 |   switch (dim) {
 5 |   case 0:  return __nvvm_read_ptx_sreg_ntid_x();
 6 |   case 1:  return __nvvm_read_ptx_sreg_ntid_y();
 7 |   case 2:  return __nvvm_read_ptx_sreg_ntid_z();
 8 |   default: return 0;
 9 |   }
10 | }
11 | 


--------------------------------------------------------------------------------
/ptx-nvidiacl/lib/workitem/get_num_groups.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | _CLC_DEF size_t get_num_groups(uint dim) {
 4 |   switch (dim) {
 5 |   case 0:  return __nvvm_read_ptx_sreg_nctaid_x();
 6 |   case 1:  return __nvvm_read_ptx_sreg_nctaid_y();
 7 |   case 2:  return __nvvm_read_ptx_sreg_nctaid_z();
 8 |   default: return 0;
 9 |   }
10 | }
11 | 


--------------------------------------------------------------------------------
/ptx/lib/SOURCES:
--------------------------------------------------------------------------------
1 | math/nextafter.cl
2 | 


--------------------------------------------------------------------------------
/ptx/lib/SOURCES_3.9:
--------------------------------------------------------------------------------
1 | shared/vload_half_helpers.ll
2 | shared/vstore_half_helpers.ll
3 | 


--------------------------------------------------------------------------------
/ptx/lib/SOURCES_4.0:
--------------------------------------------------------------------------------
1 | shared/vload_half_helpers.ll
2 | shared/vstore_half_helpers.ll
3 | 


--------------------------------------------------------------------------------
/ptx/lib/SOURCES_5.0:
--------------------------------------------------------------------------------
1 | shared/vload_half_helpers.ll
2 | shared/vstore_half_helpers.ll
3 | 


--------------------------------------------------------------------------------
/ptx/lib/math/nextafter.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | #include "../lib/clcmacro.h"
 3 | #include <math/clc_nextafter.h>
 4 | 
 5 | _CLC_DEFINE_BINARY_BUILTIN(float, nextafter, __clc_nextafter, float, float)
 6 | 
 7 | #ifdef cl_khr_fp64
 8 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 9 | _CLC_DEFINE_BINARY_BUILTIN(double, nextafter, __clc_nextafter, double, double)
10 | #endif
11 | 


--------------------------------------------------------------------------------
/ptx/lib/shared/vload_half_helpers.ll:
--------------------------------------------------------------------------------
 1 | define float @__clc_vload_half_float_helper__private(half addrspace(0)* nocapture %ptr) nounwind alwaysinline {
 2 |   %data = load half, half addrspace(0)* %ptr
 3 |   %res = fpext half %data to float
 4 |   ret float %res
 5 | }
 6 | 
 7 | define float @__clc_vload_half_float_helper__global(half addrspace(1)* nocapture %ptr) nounwind alwaysinline {
 8 |   %data = load half, half addrspace(1)* %ptr
 9 |   %res = fpext half %data to float
10 |   ret float %res
11 | }
12 | 
13 | define float @__clc_vload_half_float_helper__local(half addrspace(3)* nocapture %ptr) nounwind alwaysinline {
14 |   %data = load half, half addrspace(3)* %ptr
15 |   %res = fpext half %data to float
16 |   ret float %res
17 | }
18 | 
19 | define float @__clc_vload_half_float_helper__constant(half addrspace(4)* nocapture %ptr) nounwind alwaysinline {
20 |   %data = load half, half addrspace(4)* %ptr
21 |   %res = fpext half %data to float
22 |   ret float %res
23 | }
24 | 


--------------------------------------------------------------------------------
/r600/lib/SOURCES:
--------------------------------------------------------------------------------
 1 | math/fmax.cl
 2 | math/fmin.cl
 3 | synchronization/barrier.cl
 4 | workitem/get_global_offset.cl
 5 | workitem/get_group_id.cl
 6 | workitem/get_global_size.cl
 7 | workitem/get_local_id.cl
 8 | workitem/get_local_size.cl
 9 | workitem/get_num_groups.cl
10 | workitem/get_work_dim.cl
11 | 


--------------------------------------------------------------------------------
/r600/lib/SOURCES_3.9:
--------------------------------------------------------------------------------
 1 | image/get_image_dim.cl
 2 | image/get_image_width.cl
 3 | image/get_image_height.cl
 4 | image/get_image_depth.cl
 5 | image/get_image_channel_data_type.cl
 6 | image/get_image_channel_order.cl
 7 | image/get_image_attributes_impl.ll
 8 | image/read_imagef.cl
 9 | image/read_imagei.cl
10 | image/read_imageui.cl
11 | image/read_image_impl.ll
12 | image/write_imagef.cl
13 | image/write_imagei.cl
14 | image/write_imageui.cl
15 | image/write_image_impl.ll
16 | 


--------------------------------------------------------------------------------
/r600/lib/image/get_image_channel_data_type.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | _CLC_DECL int __clc_get_image_channel_data_type_2d(image2d_t);
 4 | _CLC_DECL int __clc_get_image_channel_data_type_3d(image3d_t);
 5 | 
 6 | _CLC_OVERLOAD _CLC_DEF int
 7 | get_image_channel_data_type(image2d_t image) {
 8 |   return __clc_get_image_channel_data_type_2d(image);
 9 | }
10 | _CLC_OVERLOAD _CLC_DEF int
11 | get_image_channel_data_type(image3d_t image) {
12 |   return __clc_get_image_channel_data_type_3d(image);
13 | }
14 | 


--------------------------------------------------------------------------------
/r600/lib/image/get_image_channel_order.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | _CLC_DECL int __clc_get_image_channel_order_2d(image2d_t);
 4 | _CLC_DECL int __clc_get_image_channel_order_3d(image3d_t);
 5 | 
 6 | _CLC_OVERLOAD _CLC_DEF int
 7 | get_image_channel_order(image2d_t image) {
 8 |   return __clc_get_image_channel_order_2d(image);
 9 | }
10 | _CLC_OVERLOAD _CLC_DEF int
11 | get_image_channel_order(image3d_t image) {
12 |   return __clc_get_image_channel_order_3d(image);
13 | }
14 | 


--------------------------------------------------------------------------------
/r600/lib/image/get_image_depth.cl:
--------------------------------------------------------------------------------
1 | #include <clc/clc.h>
2 | 
3 | _CLC_DECL int __clc_get_image_depth_3d(image3d_t);
4 | 
5 | _CLC_OVERLOAD _CLC_DEF int
6 | get_image_depth(image3d_t image) {
7 | 	return __clc_get_image_depth_3d(image);
8 | }
9 | 


--------------------------------------------------------------------------------
/r600/lib/image/get_image_dim.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | _CLC_OVERLOAD _CLC_DEF int2 get_image_dim (image2d_t image) {
 4 |   return (int2)(get_image_width(image), get_image_height(image));
 5 | }
 6 | _CLC_OVERLOAD _CLC_DEF int4 get_image_dim (image3d_t image) {
 7 |   return (int4)(get_image_width(image), get_image_height(image),
 8 |                 get_image_depth(image), 0);
 9 | }
10 | 


--------------------------------------------------------------------------------
/r600/lib/image/get_image_height.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | _CLC_DECL int __clc_get_image_height_2d(image2d_t);
 4 | _CLC_DECL int __clc_get_image_height_3d(image3d_t);
 5 | 
 6 | _CLC_OVERLOAD _CLC_DEF int
 7 | get_image_height(image2d_t image) {
 8 |   return __clc_get_image_height_2d(image);
 9 | }
10 | _CLC_OVERLOAD _CLC_DEF int
11 | get_image_height(image3d_t image) {
12 |   return __clc_get_image_height_3d(image);
13 | }
14 | 


--------------------------------------------------------------------------------
/r600/lib/image/get_image_width.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | _CLC_DECL int __clc_get_image_width_2d(image2d_t);
 4 | _CLC_DECL int __clc_get_image_width_3d(image3d_t);
 5 | 
 6 | _CLC_OVERLOAD _CLC_DEF int
 7 | get_image_width(image2d_t image) {
 8 |   return __clc_get_image_width_2d(image);
 9 | }
10 | _CLC_OVERLOAD _CLC_DEF int
11 | get_image_width(image3d_t image) {
12 |   return __clc_get_image_width_3d(image);
13 | }
14 | 


--------------------------------------------------------------------------------
/r600/lib/image/read_imagef.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | _CLC_DECL float4 __clc_read_imagef_tex(image2d_t, sampler_t, float2);
 4 | 
 5 | _CLC_OVERLOAD _CLC_DEF float4 read_imagef(image2d_t image, sampler_t sampler,
 6 |                                           int2 coord) {
 7 |   float2 coord_float = (float2)(coord.x, coord.y);
 8 |   return __clc_read_imagef_tex(image, sampler, coord_float);
 9 | }
10 | 
11 | _CLC_OVERLOAD _CLC_DEF float4 read_imagef(image2d_t image, sampler_t sampler,
12 |                                           float2 coord) {
13 |   return __clc_read_imagef_tex(image, sampler, coord);
14 | }
15 | 


--------------------------------------------------------------------------------
/r600/lib/image/read_imagei.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | _CLC_DECL float4 __clc_read_imagef_tex(image2d_t, sampler_t, float2);
 4 | 
 5 | int4 __clc_reinterpret_v4f_to_v4i(float4 v) {
 6 |   union {
 7 |     int4 v4i;
 8 |     float4 v4f;
 9 |   } res = { .v4f = v};
10 |   return res.v4i;
11 | }
12 | 
13 | _CLC_OVERLOAD _CLC_DEF int4 read_imagei(image2d_t image, sampler_t sampler,
14 |                                         int2 coord) {
15 |   float2 coord_float = (float2)(coord.x, coord.y);
16 |   return __clc_reinterpret_v4f_to_v4i(
17 |     __clc_read_imagef_tex(image, sampler, coord_float));
18 | }
19 | _CLC_OVERLOAD _CLC_DEF int4 read_imagei(image2d_t image, sampler_t sampler,
20 |                                         float2 coord) {
21 |   return __clc_reinterpret_v4f_to_v4i(
22 |     __clc_read_imagef_tex(image, sampler, coord));
23 | }
24 | 


--------------------------------------------------------------------------------
/r600/lib/image/read_imageui.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | _CLC_DECL float4 __clc_read_imagef_tex(image2d_t, sampler_t, float2);
 4 | 
 5 | uint4 __clc_reinterpret_v4f_to_v4ui(float4 v) {
 6 |   union {
 7 |     uint4 v4ui;
 8 |     float4 v4f;
 9 |   } res = { .v4f = v};
10 |   return res.v4ui;
11 | }
12 | 
13 | _CLC_OVERLOAD _CLC_DEF uint4 read_imageui(image2d_t image, sampler_t sampler,
14 |                                           int2 coord) {
15 |   float2 coord_float = (float2)(coord.x, coord.y);
16 |   return __clc_reinterpret_v4f_to_v4ui(
17 |     __clc_read_imagef_tex(image, sampler, coord_float));
18 | }
19 | _CLC_OVERLOAD _CLC_DEF uint4 read_imageui(image2d_t image, sampler_t sampler,
20 |                                           float2 coord) {
21 |   return __clc_reinterpret_v4f_to_v4ui(
22 |     __clc_read_imagef_tex(image, sampler, coord));
23 | }
24 | 


--------------------------------------------------------------------------------
/r600/lib/image/write_imagef.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | _CLC_DECL void __clc_write_imagef_2d(image2d_t image, int2 coord, float4 color);
 4 | 
 5 | _CLC_OVERLOAD _CLC_DEF void
 6 | write_imagef(image2d_t image, int2 coord, float4 color)
 7 | {
 8 |   __clc_write_imagef_2d(image, coord, color);
 9 | }
10 | 


--------------------------------------------------------------------------------
/r600/lib/image/write_imagei.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | _CLC_DECL void __clc_write_imagei_2d(image2d_t image, int2 coord, int4 color);
 4 | 
 5 | _CLC_OVERLOAD _CLC_DEF void
 6 | write_imagei(image2d_t image, int2 coord, int4 color)
 7 | {
 8 |   __clc_write_imagei_2d(image, coord, color);
 9 | }
10 | 


--------------------------------------------------------------------------------
/r600/lib/image/write_imageui.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | _CLC_DECL void __clc_write_imageui_2d(image2d_t image, int2 coord, uint4 color);
 4 | 
 5 | _CLC_OVERLOAD _CLC_DEF void
 6 | write_imageui(image2d_t image, int2 coord, uint4 color)
 7 | {
 8 |   __clc_write_imageui_2d(image, coord, color);
 9 | }
10 | 


--------------------------------------------------------------------------------
/r600/lib/math/fmax.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #include "../../../generic/lib/clcmacro.h"
 4 | #include "../../../generic/lib/math/math.h"
 5 | 
 6 | _CLC_DEF _CLC_OVERLOAD float fmax(float x, float y)
 7 | {
 8 |    /* Flush denormals if not enabled. Otherwise fmax instruction flushes
 9 |     * the values for comparison, but outputs original denormal */
10 |    x = __clc_flush_denormal_if_not_supported(x);
11 |    y = __clc_flush_denormal_if_not_supported(y);
12 |    return __builtin_fmaxf(x, y);
13 | }
14 | _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, fmax, float, float)
15 | 
16 | #ifdef cl_khr_fp64
17 | 
18 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable
19 | 
20 | _CLC_DEF _CLC_OVERLOAD double fmax(double x, double y)
21 | {
22 |    return __builtin_fmax(x, y);
23 | }
24 | _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, fmax, double, double)
25 | 
26 | #endif
27 | 
28 | #define __CLC_BODY <../../../generic/lib/math/fmax.inc>
29 | #include <clc/math/gentype.inc>
30 | 


--------------------------------------------------------------------------------
/r600/lib/math/fmin.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | #include "../../../generic/lib/clcmacro.h"
 4 | #include "../../../generic/lib/math/math.h"
 5 | 
 6 | _CLC_DEF _CLC_OVERLOAD float fmin(float x, float y)
 7 | {
 8 |    /* fcanonicalize removes sNaNs and flushes denormals if not enabled.
 9 |     * Otherwise fmin instruction flushes the values for comparison,
10 |     * but outputs original denormal */
11 |    x = __clc_flush_denormal_if_not_supported(x);
12 |    y = __clc_flush_denormal_if_not_supported(y);
13 |    return __builtin_fminf(x, y);
14 | }
15 | _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, fmin, float, float)
16 | 
17 | #ifdef cl_khr_fp64
18 | 
19 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable
20 | 
21 | _CLC_DEF _CLC_OVERLOAD double fmin(double x, double y)
22 | {
23 |    return __builtin_fmin(x, y);
24 | }
25 | _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, fmin, double, double)
26 | 
27 | #endif
28 | 
29 | #define __CLC_BODY <../../../generic/lib/math/fmin.inc>
30 | #include <clc/math/gentype.inc>
31 | 


--------------------------------------------------------------------------------
/r600/lib/synchronization/barrier.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | _CLC_DEF void __clc_r600_barrier(void) __asm("llvm.r600.group.barrier");
 4 | 
 5 | _CLC_DEF void barrier(uint flags)
 6 | {
 7 |   // We should call mem_fence here, but that is not implemented for r600 yet
 8 |   __clc_r600_barrier();
 9 | }
10 | 


--------------------------------------------------------------------------------
/r600/lib/workitem/get_global_offset.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | _CLC_DEF uint get_global_offset(uint dim)
 4 | {
 5 | 	__attribute__((address_space(7))) uint * ptr =
 6 | 		(__attribute__((address_space(7))) uint *)
 7 | 		__builtin_r600_implicitarg_ptr();
 8 | 	if (dim < 3)
 9 | 		return ptr[dim + 1];
10 | 	return 0;
11 | }
12 | 


--------------------------------------------------------------------------------
/r600/lib/workitem/get_global_size.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | uint __clc_r600_get_global_size_x(void) __asm("llvm.r600.read.global.size.x");
 4 | uint __clc_r600_get_global_size_y(void) __asm("llvm.r600.read.global.size.y");
 5 | uint __clc_r600_get_global_size_z(void) __asm("llvm.r600.read.global.size.z");
 6 | 
 7 | _CLC_DEF size_t get_global_size(uint dim)
 8 | {
 9 | 	switch (dim) {
10 | 	case 0: return __clc_r600_get_global_size_x();
11 | 	case 1: return __clc_r600_get_global_size_y();
12 | 	case 2: return __clc_r600_get_global_size_z();
13 | 	default: return 1;
14 | 	}
15 | }
16 | 


--------------------------------------------------------------------------------
/r600/lib/workitem/get_group_id.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | _CLC_DEF uint get_group_id(uint dim)
 4 | {
 5 | 	switch(dim) {
 6 | 	case 0: return __builtin_r600_read_tgid_x();
 7 | 	case 1: return __builtin_r600_read_tgid_y();
 8 | 	case 2: return __builtin_r600_read_tgid_z();
 9 | 	default: return 1;
10 | 	}
11 | }
12 | 


--------------------------------------------------------------------------------
/r600/lib/workitem/get_local_id.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | _CLC_DEF uint get_local_id(uint dim)
 4 | {
 5 | 	switch(dim) {
 6 | 	case 0: return __builtin_r600_read_tidig_x();
 7 | 	case 1: return __builtin_r600_read_tidig_y();
 8 | 	case 2: return __builtin_r600_read_tidig_z();
 9 | 	default: return 1;
10 | 	}
11 | }
12 | 


--------------------------------------------------------------------------------
/r600/lib/workitem/get_local_size.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | uint __clc_r600_get_local_size_x(void) __asm("llvm.r600.read.local.size.x");
 4 | uint __clc_r600_get_local_size_y(void) __asm("llvm.r600.read.local.size.y");
 5 | uint __clc_r600_get_local_size_z(void) __asm("llvm.r600.read.local.size.z");
 6 | 
 7 | _CLC_DEF size_t get_local_size(uint dim)
 8 | {
 9 | 	switch (dim) {
10 | 	case 0: return __clc_r600_get_local_size_x();
11 | 	case 1: return __clc_r600_get_local_size_y();
12 | 	case 2: return __clc_r600_get_local_size_z();
13 | 	default: return 1;
14 | 	}
15 | }
16 | 


--------------------------------------------------------------------------------
/r600/lib/workitem/get_num_groups.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | uint __clc_r600_get_num_groups_x(void) __asm("llvm.r600.read.ngroups.x");
 4 | uint __clc_r600_get_num_groups_y(void) __asm("llvm.r600.read.ngroups.y");
 5 | uint __clc_r600_get_num_groups_z(void) __asm("llvm.r600.read.ngroups.z");
 6 | 
 7 | _CLC_DEF size_t get_num_groups(uint dim)
 8 | {
 9 | 	switch (dim) {
10 | 	case 0: return __clc_r600_get_num_groups_x();
11 | 	case 1: return __clc_r600_get_num_groups_y();
12 | 	case 2: return __clc_r600_get_num_groups_z();
13 | 	default: return 1;
14 | 	}
15 | }
16 | 


--------------------------------------------------------------------------------
/r600/lib/workitem/get_work_dim.cl:
--------------------------------------------------------------------------------
 1 | #include <clc/clc.h>
 2 | 
 3 | _CLC_DEF uint get_work_dim(void)
 4 | {
 5 | 	__attribute__((address_space(7))) uint * ptr =
 6 | 		(__attribute__((address_space(7))) uint *)
 7 | 		__builtin_r600_implicitarg_ptr();
 8 | 	return ptr[0];
 9 | }
10 | 


--------------------------------------------------------------------------------
/test/add_sat.cl:
--------------------------------------------------------------------------------
1 | __kernel void foo(__global char *a, __global char *b, __global char *c) {
2 |   *a = add_sat(*b, *c);
3 | }
4 | 


--------------------------------------------------------------------------------
/test/as_type.cl:
--------------------------------------------------------------------------------
1 | __kernel void foo(int4 *x, float4 *y) {
2 |   *x = as_int4(*y);
3 | }
4 | 


--------------------------------------------------------------------------------
/test/convert.cl:
--------------------------------------------------------------------------------
1 | __kernel void foo(int4 *x, float4 *y) {
2 |   *x = convert_int4(*y);
3 | }
4 | 


--------------------------------------------------------------------------------
/test/cos.cl:
--------------------------------------------------------------------------------
1 | __kernel void foo(float4 *f) {
2 |   *f = cos(*f);
3 | }
4 | 


--------------------------------------------------------------------------------
/test/cross.cl:
--------------------------------------------------------------------------------
1 | __kernel void foo(float4 *f) {
2 |   *f = cross(f[0], f[1]);
3 | }
4 | 


--------------------------------------------------------------------------------
/test/fabs.cl:
--------------------------------------------------------------------------------
1 | __kernel void foo(float *f) {
2 |   *f = fabs(*f);
3 | }
4 | 


--------------------------------------------------------------------------------
/test/get_group_id.cl:
--------------------------------------------------------------------------------
1 | __kernel void foo(int *i) {
2 |   i[get_group_id(0)] = 1;
3 | }
4 | 


--------------------------------------------------------------------------------
/test/rsqrt.cl:
--------------------------------------------------------------------------------
1 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable
2 | 
3 | __kernel void foo(float4 *x, double4 *y) {
4 |   x[1] = rsqrt(x[0]);
5 |   y[1] = rsqrt(y[0]);
6 | }
7 | 


--------------------------------------------------------------------------------
/test/subsat.cl:
--------------------------------------------------------------------------------
 1 | __kernel void test_subsat_char(char *a, char x, char y) {
 2 |   *a = sub_sat(x, y);
 3 |   return;
 4 | }
 5 | 
 6 | __kernel void test_subsat_uchar(uchar *a, uchar x, uchar y) {
 7 |   *a = sub_sat(x, y);
 8 |   return;
 9 | }
10 | 
11 | __kernel void test_subsat_long(long *a, long x, long y) {
12 |   *a = sub_sat(x, y);
13 |   return;
14 | }
15 | 
16 | __kernel void test_subsat_ulong(ulong *a, ulong x, ulong y) {
17 |   *a = sub_sat(x, y);
18 |   return;
19 | }


--------------------------------------------------------------------------------