├── CMakeLists.txt ├── Contributing.md ├── LICENSE.txt ├── README.md ├── first_edition_errata.txt ├── images ├── cover_first_edition.jpg └── cover_second_edition.jpg ├── samples ├── CMakeLists.txt ├── Ch01_introduction │ ├── CMakeLists.txt │ ├── fig_1_1_hello.cpp │ ├── fig_1_2.hpp │ ├── fig_1_3_race.cpp │ ├── fig_1_4_lambda.cpp │ └── fig_1_6_functor.cpp ├── Ch02_where_code_runs │ ├── CMakeLists.txt │ ├── fig_2_10_gpu_selector.cpp │ ├── fig_2_12_multiple_selectors.cpp │ ├── fig_2_13_gpu_plus_fpga.cpp │ ├── fig_2_15_aspect_selector.cpp │ ├── fig_2_16_custom_selector.cpp │ ├── fig_2_18_simple_device_code.cpp │ ├── fig_2_19.hpp │ ├── fig_2_20.hpp │ ├── fig_2_22_simple_device_code_2.cpp │ ├── fig_2_23_host_task.cpp │ ├── fig_2_2_simple_program.cpp │ ├── fig_2_3.hpp │ ├── fig_2_4.hpp │ ├── fig_2_7_implicit_default_selector.cpp │ └── fig_2_9_cpu_selector.cpp ├── Ch03_data_management │ ├── CMakeLists.txt │ ├── fig_3_10_in_order.cpp │ ├── fig_3_11_depends_on.cpp │ ├── fig_3_13_read_after_write.cpp │ ├── fig_3_15_write_after_read_and_write_after_write.cpp │ ├── fig_3_17.hpp │ ├── fig_3_18.hpp │ ├── fig_3_4_usm_explicit_data_movement.cpp │ ├── fig_3_5_usm_implicit_data_movement.cpp │ └── fig_3_6_buffers_and_accessors.cpp ├── Ch04_expressing_parallelism │ ├── CMakeLists.txt │ ├── fig_4_1.hpp │ ├── fig_4_10.hpp │ ├── fig_4_11.hpp │ ├── fig_4_15_nd_range_matrix_multiply.cpp │ ├── fig_4_17.hpp │ ├── fig_4_18.hpp │ ├── fig_4_19.hpp │ ├── fig_4_2.hpp │ ├── fig_4_20.hpp │ ├── fig_4_21.hpp │ ├── fig_4_22.hpp │ ├── fig_4_5_vector_add.cpp │ ├── fig_4_6_matrix_add.cpp │ ├── fig_4_7_basic_matrix_multiply.cpp │ └── fig_4_9.hpp ├── Ch05_error_handling │ ├── CMakeLists.txt │ ├── fig_5_1_async_task_graph.cpp │ ├── fig_5_2_sync_error.cpp │ ├── fig_5_3_async_error.cpp │ ├── fig_5_4_unhandled_exception.cpp │ ├── fig_5_5_terminate.cpp │ ├── fig_5_6_catch_snip.cpp │ ├── fig_5_7_catch.cpp │ ├── fig_5_8_lambda_handler.cpp │ └── fig_5_9_default_handler_proxy.cpp ├── Ch06_unified_shared_memory │ ├── CMakeLists.txt │ ├── fig_6_2.hpp │ ├── fig_6_3.hpp │ ├── fig_6_4.hpp │ ├── fig_6_5_allocation_styles.cpp │ ├── fig_6_6_usm_explicit_data_movement.cpp │ ├── fig_6_7_usm_implicit_data_movement.cpp │ ├── fig_6_8_prefetch_memadvise.cpp │ └── fig_6_9_queries.cpp ├── Ch07_buffers │ ├── CMakeLists.txt │ ├── fig_7_1.hpp │ ├── fig_7_10_accessors.cpp │ ├── fig_7_2_3_4_creating_buffers.cpp │ ├── fig_7_5_buffer_properties.cpp │ └── fig_7_8_accessors_simple.cpp ├── Ch08_scheduling_kernels_and_data_movement │ ├── CMakeLists.txt │ ├── fig_8_3_linear_dependence_in_order.cpp │ ├── fig_8_4_linear_dependence_events.cpp │ ├── fig_8_5_linear_dependence_buffers.cpp │ ├── fig_8_6_y_in_order.cpp │ ├── fig_8_7_y_events.cpp │ └── fig_8_8_y_buffers.cpp ├── Ch09_communication_and_sychronization │ ├── CMakeLists.txt │ ├── fig_9_11_matmul_broadcast.cpp │ ├── fig_9_12_ndrange_sub_group_matmul.cpp │ ├── fig_9_4_naive_matmul.cpp │ ├── fig_9_7_local_accessors.cpp │ ├── fig_9_8_ndrange_tiled_matmul.cpp │ ├── fig_9_9_sub_group_barrier.cpp │ └── matmul_harness.cpp ├── Ch10_defining_kernels │ ├── CMakeLists.txt │ ├── fig_10_10_kernel_query.cpp │ ├── fig_10_2_kernel_lambda.cpp │ ├── fig_10_3_optional_kernel_lambda_elements.cpp │ ├── fig_10_4_named_kernel_lambda.cpp │ ├── fig_10_5_unnamed_kernel_lambda.cpp │ ├── fig_10_6_kernel_functor.cpp │ ├── fig_10_7_optional_kernel_functor_elements.cpp │ ├── fig_10_8_use_kernel_bundle.cpp │ └── fig_10_9_use_specific_kernel_bundle.cpp ├── Ch11_vectors_and_math_arrays │ ├── CMakeLists.txt │ ├── fig_11_10.hpp │ ├── fig_11_2_marray.cpp │ ├── fig_11_3.hpp │ ├── fig_11_4_load_store.cpp │ ├── fig_11_5.hpp │ ├── fig_11_6_swizzle_vec.cpp │ └── fig_11_7_vector_exec.cpp ├── Ch12_device_information_and_kernel_specialization │ ├── CMakeLists.txt │ ├── fig_12_10_specialize.cpp │ ├── fig_12_1_assigned_device.cpp │ ├── fig_12_2_try_catch.cpp │ ├── fig_12_4_device_selector.cpp │ ├── fig_12_5_curious.cpp │ ├── fig_12_7_very_curious.cpp │ ├── fig_12_8_invocation_parameters.cpp │ └── tst_12_4_device_selector.cpp ├── Ch13_practical_tips │ ├── CMakeLists.txt │ ├── fig_13_10_common_pattern_bug.cpp │ ├── fig_13_11_host_accessor.cpp │ ├── fig_13_12_host_accessor_for_init.cpp │ ├── fig_13_13_host_accessor_deadlock.cpp │ ├── fig_13_4_stream.cpp │ ├── fig_13_6_queue_profiling_timing.cpp │ └── fig_13_9_common_buffer_pattern.cpp ├── Ch14_common_parallel_patterns │ ├── CMakeLists.txt │ ├── fig_14_10.hpp │ ├── fig_14_11_array_reduction.cpp │ ├── fig_14_12_user_defined_reduction.cpp │ ├── fig_14_13_algorithm_comparison.cpp │ ├── fig_14_15_map.cpp │ ├── fig_14_16_stencil.cpp │ ├── fig_14_17_local_stencil.cpp │ ├── fig_14_18_basic_reduction.cpp │ ├── fig_14_19_nd_range_reduction.cpp │ ├── fig_14_20-22_inclusive_scan.cpp │ ├── fig_14_23.hpp │ ├── fig_14_24_local_pack.cpp │ ├── fig_14_25.hpp │ ├── fig_14_26_local_unpack.cpp │ ├── fig_14_8_one_reduction.cpp │ └── fig_14_9.hpp ├── Ch15_programming_for_gpus │ ├── CMakeLists.txt │ ├── fig_15_10_divergent_control_flow.cpp │ ├── fig_15_12_small_work_group_matrix_multiplication.cpp │ ├── fig_15_18_columns_matrix_multiplication.cpp │ ├── fig_15_3_single_task_matrix_multiplication.cpp │ ├── fig_15_5_somewhat_parallel_matrix_multiplication.cpp │ ├── fig_15_7_more_parallel_matrix_multiplication.cpp │ └── matrix_multiplication_harness.cpp ├── Ch16_programming_for_cpus │ ├── CMakeLists.txt │ ├── fig_16_10.hpp │ ├── fig_16_12_forward_dep.cpp │ ├── fig_16_15.hpp │ ├── fig_16_16.hpp │ ├── fig_16_17pre.hpp │ ├── fig_16_18_vector_swizzle.cpp │ ├── fig_16_2.hpp │ ├── fig_16_4.hpp │ ├── fig_16_5.hpp │ └── fig_16_6_stream_triad.cpp ├── Ch17_programming_for_fpgas │ ├── CMakeLists.txt │ ├── fig_17_11_fpga_emulator_selector.cpp │ ├── fig_17_17_ndrange_func.cpp │ ├── fig_17_18_loop_func.cpp │ ├── fig_17_20_loop_carried_deps.cpp │ ├── fig_17_22_loop_carried_state.cpp │ ├── fig_17_31_inter_kernel_pipe.cpp │ ├── fig_17_32.hpp │ ├── fig_17_33.hpp │ └── fig_17_9_fpga_selector.cpp ├── Ch18_libraries │ ├── CMakeLists.txt │ ├── fig_18_10_pstl_usm.cpp │ ├── fig_18_1_builtin.cpp │ ├── fig_18_2_swap.cpp │ ├── fig_18_5.hpp │ ├── fig_18_6_std_fill.cpp │ ├── fig_18_7_std_fill_default_policy.cpp │ ├── fig_18_8_binary_search.cpp │ └── fig_18_9_pstl_usm_device.cpp ├── Ch19_memory_model_and_atomics │ ├── CMakeLists.txt │ ├── fig_19_11.hpp │ ├── fig_19_12.hpp │ ├── fig_19_13.hpp │ ├── fig_19_14.hpp │ ├── fig_19_15_buffer_and_atomic_ref.cpp │ ├── fig_19_16_usm_and_atomic_ref.cpp │ ├── fig_19_17_histogram.cpp │ ├── fig_19_18-19_device_latch.cpp │ ├── fig_19_3_data_race.cpp │ ├── fig_19_6_avoid_data_race_with_barrier.cpp │ └── fig_19_7_avoid_data_race_with_atomics.cpp ├── Ch20_backend_interoperability │ ├── CMakeLists.txt │ ├── fig_20_10_level_zero_kernel_interop.cpp │ ├── fig_20_11_filter_selector.cpp │ ├── fig_20_2_querying_backends.cpp │ ├── fig_20_3_opencl_to_sycl.cpp │ ├── fig_20_4_level_zero_to_sycl.cpp │ ├── fig_20_5_sycl_to_opencl.cpp │ ├── fig_20_6_sycl_to_level_zero.cpp │ ├── fig_20_7_interop_handle_opencl.cpp │ ├── fig_20_8_interop_handle_level_zero.cpp │ └── fig_20_9_opencl_kernel_interop.cpp ├── Ch21_migrating_cuda_code │ ├── CMakeLists.txt │ ├── fig_21_10_reverse.cu │ ├── fig_21_13-14_reverse_migrated.cpp │ ├── fig_21_1_basicsycl.cpp │ ├── fig_21_2_basiccuda.cu │ ├── fig_21_4-6_walkorder.cu │ ├── fig_21_5_walkorder.cpp │ ├── fig_21_7_possible_deadlock.cpp │ ├── fig_21_8_barriers.cpp │ └── fig_21_9_atomics.cpp └── Epilogue_future_direction_of_sycl │ ├── fig_ep_1_mdspan.cpp │ ├── fig_ep_2.hpp │ └── fig_ep_3_device_constexpr.cpp └── second_edition_errata.txt /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Intel Corporation 2 | 3 | # SPDX-License-Identifier: MIT 4 | 5 | cmake_minimum_required(VERSION 3.10 FATAL_ERROR) 6 | 7 | # Try to detect the right SYCL compiler if one is not explicitly specified: 8 | if (NOT CMAKE_CXX_COMPILER) 9 | if (WIN32) 10 | set(CMAKE_CXX_COMPILER icx) 11 | else() 12 | find_program(HAS_ICPX "icpx" NO_CACHE) 13 | if (HAS_ICPX) 14 | set(CMAKE_CXX_COMPILER icpx) 15 | else() 16 | set(CMAKE_CXX_COMPILER clang++) 17 | endif() 18 | endif() 19 | endif() 20 | 21 | set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) 22 | set(CMAKE_CXX_STANDARD 17) 23 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 24 | 25 | if (NOT CMAKE_BUILD_TYPE) 26 | message(STATUS "No build type selected, default to Release") 27 | set(CMAKE_BUILD_TYPE "Release" CACHE PATH "Build Type" FORCE) 28 | endif() 29 | 30 | set_property(GLOBAL PROPERTY USE_FOLDERS ON) 31 | 32 | project(DPCPPSamples) 33 | option(NODPL "Disable samples that require the oneAPI DPC++ Library (oneDPL).") 34 | option(NODPCT "Disable samples that require the DPC++ Compatibility Tool (dpct).") 35 | option(NOL0 "Disable samples that require the oneAPI Level Zero Headers and Loader." ON) 36 | option(WITHCUDA "Enable CUDA device support for the samples.") 37 | option(WITHROCM "Enable ROCm device support for the samples.") 38 | 39 | if (WITHCUDA AND WITHROCM) 40 | message(FATAL_ERROR "WITHCUDA and WITHROCM cannot be enabled at the same time.\n" 41 | "Clean up the directory and try again with only one of them enabled.") 42 | endif() 43 | 44 | set(CUDA_GPU_ARCH "sm_60" CACHE STRING "CUDA GPUs to compile for.") 45 | if (WITHCUDA) 46 | mark_as_advanced(CLEAR FORCE CUDA_GPU_ARCH) 47 | else() 48 | mark_as_advanced(FORCE CUDA_GPU_ARCH) 49 | endif() 50 | 51 | set(ROCM_GPU_ARCH "gfx1100" CACHE STRING "ROCm GPUs to compile for.") 52 | if (WITHROCM) 53 | mark_as_advanced(CLEAR FORCE ROCM_GPU_ARCH) 54 | else() 55 | mark_as_advanced(FORCE ROCM_GPU_ARCH) 56 | endif() 57 | 58 | enable_testing() 59 | 60 | add_subdirectory(samples) 61 | 62 | if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) 63 | set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_SOURCE_DIR}/install" CACHE PATH "Install Path" FORCE) 64 | endif() 65 | -------------------------------------------------------------------------------- /Contributing.md: -------------------------------------------------------------------------------- 1 | # Contributing to Apress Source Code 2 | 3 | Copyright for Apress source code belongs to the author(s). However, under fair use you are encouraged to fork and contribute minor corrections and updates for the benefit of the author(s) and other readers. 4 | 5 | ## How to Contribute 6 | 7 | 1. Make sure you have a GitHub account. 8 | 2. Fork the repository for the relevant book. 9 | 3. Create a new branch on which to make your change, e.g. 10 | `git checkout -b my_code_contribution` 11 | 4. Keep formating: clang-format -i --style="{BasedOnStyle: Google, ColumnLimit: 60}" .cpp 12 | [we used find . -name "*cpp" -exec clang-format -i --style="{BasedOnStyle: Google, ColumnLimit: 60}" {} \; originally] 13 | 5. Commit your change. Include a commit message describing the correction. Please note that if your commit message is not clear, the correction will not be accepted. 14 | 6. Submit a pull request. 15 | 16 | Thank you for your contribution! -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (C) 2020 Intel Corporation 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), 7 | to deal in the Software without restriction, including without limitation 8 | the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 | and/or sell copies of the Software, and to permit persons to whom 10 | the Software is furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included 13 | in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 19 | OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 21 | OR OTHER DEALINGS IN THE SOFTWARE. 22 | 23 | SPDX-License-Identifier: MIT 24 | -------------------------------------------------------------------------------- /images/cover_first_edition.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-parallel-CPP/6d3dce0df50446645f332df217219d143d48ed03/images/cover_first_edition.jpg -------------------------------------------------------------------------------- /images/cover_second_edition.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-parallel-CPP/6d3dce0df50446645f332df217219d143d48ed03/images/cover_second_edition.jpg -------------------------------------------------------------------------------- /samples/Ch01_introduction/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Intel Corporation 2 | 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_book_sample( 6 | TEST 7 | TARGET fig_1_1_hello 8 | SOURCES fig_1_1_hello.cpp) 9 | 10 | add_book_sample( 11 | TEST 12 | TARGET fig_1_3_race 13 | SOURCES fig_1_3_race.cpp) 14 | 15 | add_book_sample( 16 | TEST 17 | TARGET fig_1_4_lambda 18 | SOURCES fig_1_4_lambda.cpp) 19 | 20 | add_book_sample( 21 | TEST 22 | TARGET fig_1_6_functor 23 | SOURCES fig_1_6_functor.cpp) 24 | -------------------------------------------------------------------------------- /samples/Ch01_introduction/fig_1_1_hello.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | using namespace sycl; 8 | 9 | const std::string secret{ 10 | "Ifmmp-!xpsme\"\012J(n!tpssz-!Ebwf/!" 11 | "J(n!bgsbje!J!dbo(u!ep!uibu/!.!IBM\01"}; 12 | 13 | const auto sz = secret.size(); 14 | 15 | int main() { 16 | queue q; 17 | 18 | char* result = malloc_shared(sz, q); 19 | std::memcpy(result, secret.data(), sz); 20 | 21 | q.parallel_for(sz, [=](auto& i) { 22 | result[i] -= 1; 23 | }).wait(); 24 | 25 | std::cout << result << "\n"; 26 | free(result, q); 27 | return 0; 28 | } 29 | -------------------------------------------------------------------------------- /samples/Ch01_introduction/fig_1_2.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are snippets 6 | // that are not set up to be compiled as is. 7 | 8 | ! Fortran loop 9 | do i = 1, n 10 | z(i) = alpha * x(i) + y(i) 11 | end do 12 | 13 | // C++ loop 14 | for (int i=0;i i) { 20 | z[i] = alpha * x[i] + y[i]; 21 | }).wait(); 22 | -------------------------------------------------------------------------------- /samples/Ch01_introduction/fig_1_3_race.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | using namespace sycl; 8 | 9 | const std::string secret{ 10 | "Ifmmp-!xpsme\"\012J(n!tpssz-!Ebwf/!" 11 | "J(n!bgsbje!J!dbo(u!ep!uibu/!.!IBM\01"}; 12 | 13 | const auto sz = secret.size(); 14 | 15 | int main() { 16 | queue q; 17 | 18 | // BEGIN CODE SNIP 19 | // ...we are changing one line from Figure 1-1 20 | char* result = malloc_shared(sz, q); 21 | 22 | // Introduce potential data race! We don't define a 23 | // dependence to ensure correct ordering with later 24 | // operations. 25 | q.memcpy(result, secret.data(), sz); 26 | 27 | q.parallel_for(sz, [=](auto& i) { 28 | result[i] -= 1; 29 | }).wait(); 30 | 31 | // ... 32 | // END CODE SNIP 33 | std::cout << result << "\n"; 34 | free(result, q); 35 | return 0; 36 | } 37 | -------------------------------------------------------------------------------- /samples/Ch01_introduction/fig_1_4_lambda.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | 7 | void print_values(const int& i, const int& j, const int& k, 8 | const int& l) { 9 | std::cout << "i == " << i << "\n"; 10 | std::cout << "j == " << j << "\n"; 11 | std::cout << "k == " << k << "\n"; 12 | std::cout << "l == " << l << "\n"; 13 | } 14 | 15 | int main() { 16 | // BEGIN CODE SNIP 17 | int i = 1, j = 10, k = 100, l = 1000; 18 | 19 | auto lambda = [i, &j](int k0, int& l0) -> int { 20 | j = 2 * j; 21 | k0 = 2 * k0; 22 | l0 = 2 * l0; 23 | return i + j + k0 + l0; 24 | }; 25 | 26 | print_values(i, j, k, l); 27 | std::cout << "First call returned " << lambda(k, l) 28 | << "\n"; 29 | print_values(i, j, k, l); 30 | std::cout << "Second call returned " << lambda(k, l) 31 | << "\n"; 32 | print_values(i, j, k, l); 33 | // END CODE SNIP 34 | 35 | return 0; 36 | } 37 | -------------------------------------------------------------------------------- /samples/Ch01_introduction/fig_1_6_functor.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | 7 | // BEGIN CODE SNIP 8 | class Functor { 9 | public: 10 | Functor(int i, int &j) : my_i{i}, my_jRef{j} {} 11 | 12 | int operator()(int k0, int &l0) { 13 | my_jRef = 2 * my_jRef; 14 | k0 = 2 * k0; 15 | l0 = 2 * l0; 16 | return my_i + my_jRef + k0 + l0; 17 | } 18 | 19 | private: 20 | int my_i; 21 | int &my_jRef; 22 | }; 23 | // END CODE SNIP 24 | 25 | int main() { 26 | int i = 1, j = 10, k = 100, l = 1000; 27 | 28 | Functor F{i, j}; 29 | 30 | std::cout << "First call returned " << F(k, l) << "\n"; 31 | std::cout << "Second call returned " << F(k, l) << "\n"; 32 | 33 | return 0; 34 | } 35 | -------------------------------------------------------------------------------- /samples/Ch02_where_code_runs/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Intel Corporation 2 | 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_book_sample( 6 | TEST 7 | TARGET fig_2_2_simple_program 8 | SOURCES fig_2_2_simple_program.cpp) 9 | 10 | add_book_sample( 11 | TEST 12 | TARGET fig_2_7_implicit_default_selector 13 | SOURCES fig_2_7_implicit_default_selector.cpp) 14 | 15 | add_book_sample( 16 | TEST 17 | TARGET fig_2_9_cpu_selector 18 | SOURCES fig_2_9_cpu_selector.cpp) 19 | 20 | add_book_sample( 21 | TEST 22 | TARGET fig_2_10_gpu_selector 23 | SOURCES fig_2_10_gpu_selector.cpp) 24 | 25 | add_book_sample( 26 | TARGET fig_2_12_multiple_selectors 27 | SOURCES fig_2_12_multiple_selectors.cpp) 28 | 29 | add_book_sample( 30 | TARGET fig_2_13_gpu_plus_fpga 31 | SOURCES fig_2_13_gpu_plus_fpga.cpp) 32 | 33 | add_book_sample( 34 | TARGET fig_2_15_aspect_selector 35 | SOURCES fig_2_15_aspect_selector.cpp) 36 | 37 | add_book_sample( 38 | TARGET fig_2_16_custom_selector 39 | SOURCES fig_2_16_custom_selector.cpp) 40 | 41 | add_book_sample( 42 | TEST 43 | TARGET fig_2_18_simple_device_code 44 | SOURCES fig_2_18_simple_device_code.cpp) 45 | 46 | add_book_sample( 47 | TEST 48 | TARGET fig_2_22_simple_device_code_2 49 | SOURCES fig_2_22_simple_device_code_2.cpp) 50 | 51 | add_book_sample( 52 | TEST 53 | TARGET fig_2_23_host_task 54 | SOURCES fig_2_23_host_task.cpp) 55 | -------------------------------------------------------------------------------- /samples/Ch02_where_code_runs/fig_2_10_gpu_selector.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | using namespace sycl; 8 | 9 | int main() { 10 | // Create queue bound to an available GPU device 11 | queue q{gpu_selector_v}; 12 | 13 | std::cout << "Selected device: " 14 | << q.get_device().get_info() 15 | << "\n"; 16 | std::cout 17 | << " -> Device vendor: " 18 | << q.get_device().get_info() 19 | << "\n"; 20 | 21 | return 0; 22 | } 23 | 24 | // Example Output: 25 | // Selected device: AMD Radeon RX 5700 XT 26 | // -> Device vendor: AMD Corporation 27 | -------------------------------------------------------------------------------- /samples/Ch02_where_code_runs/fig_2_13_gpu_plus_fpga.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include // For fpga_selector_v 7 | #include 8 | using namespace sycl; 9 | 10 | int main() { 11 | queue my_gpu_queue(gpu_selector_v); 12 | queue my_fpga_queue(ext::intel::fpga_selector_v); 13 | 14 | std::cout << "Selected device 1: " 15 | << my_gpu_queue.get_device() 16 | .get_info() 17 | << "\n"; 18 | 19 | std::cout << "Selected device 2: " 20 | << my_fpga_queue.get_device() 21 | .get_info() 22 | << "\n"; 23 | 24 | return 0; 25 | } 26 | 27 | // Example Output: 28 | // Selected device 1: Intel(R) UHD Graphics [0x9a60] 29 | // Selected device 2: pac_a10 : Intel PAC Platform (pac_ee00000) 30 | -------------------------------------------------------------------------------- /samples/Ch02_where_code_runs/fig_2_15_aspect_selector.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | using namespace sycl; 8 | 9 | int main() { 10 | // In the aspect_selector form taking a comma seperated 11 | // group of aspects, all aspects must be present for a 12 | // device to be selected. 13 | queue q1{aspect_selector(aspect::fp16, aspect::gpu)}; 14 | 15 | // In the aspect_selector form that takes two vectors, the 16 | // first vector contains aspects that a device must 17 | // exhibit, and the second contains aspects that must NOT 18 | // be exhibited. 19 | queue q2{aspect_selector( 20 | std::vector{aspect::fp64, aspect::fp16}, 21 | std::vector{aspect::gpu, aspect::accelerator})}; 22 | 23 | std::cout 24 | << "First selected device is: " 25 | << q1.get_device().get_info() 26 | << "\n"; 27 | 28 | std::cout 29 | << "Second selected device is: " 30 | << q2.get_device().get_info() 31 | << "\n"; 32 | 33 | return 0; 34 | } 35 | 36 | // Example Output: 37 | // First selected device is: Intel(R) UHD Graphics [0x9a60] 38 | // Second selected device is: 11th Gen Intel(R) Core(TM) i9-11900KB @ 3.30GHz 39 | -------------------------------------------------------------------------------- /samples/Ch02_where_code_runs/fig_2_16_custom_selector.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | using namespace sycl; 8 | 9 | // BEGIN CODE SNIP 10 | int my_selector(const device &dev) { 11 | if (dev.get_info().find("pac_a10") != 12 | std::string::npos && 13 | dev.get_info().find("Intel") != 14 | std::string::npos) { 15 | return 1; 16 | } 17 | return -1; 18 | } 19 | // END CODE SNIP 20 | 21 | int main() { 22 | queue q(my_selector); 23 | 24 | std::cout << "Selected device is: " 25 | << q.get_device().get_info() 26 | << "\n"; 27 | 28 | return 0; 29 | } 30 | 31 | // Example Output: 32 | // Selected device is: pac_a10 : Intel PAC Platform (pac_ee00000) 33 | -------------------------------------------------------------------------------- /samples/Ch02_where_code_runs/fig_2_18_simple_device_code.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | using namespace sycl; 9 | 10 | int main() { 11 | constexpr int size = 16; 12 | std::array data; 13 | buffer B{data}; 14 | 15 | queue q{}; // Select any device for this queue 16 | 17 | std::cout << "Selected device is: " 18 | << q.get_device().get_info() 19 | << "\n"; 20 | 21 | // BEGIN CODE SNIP 22 | 23 | q.submit([&](handler& h) { 24 | accessor acc{B, h}; 25 | 26 | h.parallel_for(size, 27 | [=](auto& idx) { acc[idx] = idx; }); 28 | }); 29 | 30 | // END CODE SNIP 31 | 32 | return 0; 33 | } 34 | -------------------------------------------------------------------------------- /samples/Ch02_where_code_runs/fig_2_20.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | class queue { 9 | public: 10 | // Submit a memset operation writing to the specified 11 | // pointer. Return an event representing this operation. 12 | event memset(void* ptr, int value, size_t count); 13 | 14 | // Submit a memcpy operation copying from src to dest. 15 | // Return an event representing this operation. 16 | event memcpy(void* dest, const void* src, size_t count); 17 | 18 | // Submit different forms of kernel for execution. 19 | // Return an event representing the kernel operation. 20 | template 21 | event single_task(KernelType kernel); 22 | 23 | template 25 | event parallel_for(range num_work_items, 26 | KernelType kernel); 27 | 28 | template 30 | event parallel_for(nd_range execution_range, 31 | KernelType kernel); 32 | 33 | // Submit different forms of kernel for execution. 34 | // Wait for the specified event(s) to complete 35 | // before executing the kernel. 36 | // Return an event representing the kernel operation. 37 | template 38 | event single_task(const std::vector& events, 39 | KernelType kernel); 40 | 41 | template 43 | event parallel_for(range num_work_items, 44 | const std::vector& events, 45 | KernelType kernel); 46 | 47 | template 49 | event parallel_for(nd_range execution_range, 50 | const std::vector& events, 51 | KernelType kernel); 52 | }; 53 | -------------------------------------------------------------------------------- /samples/Ch02_where_code_runs/fig_2_22_simple_device_code_2.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | using namespace sycl; 9 | 10 | int main() { 11 | constexpr int size = 16; 12 | std::array data; 13 | buffer B{data}; 14 | 15 | queue q{}; // Select any device for this queue 16 | 17 | std::cout << "Selected device is: " 18 | << q.get_device().get_info() 19 | << "\n"; 20 | 21 | q.submit([&](handler& h) { 22 | accessor acc{B, h}; 23 | h.parallel_for(size, 24 | [=](auto& idx) { acc[idx] = idx; }); 25 | }); 26 | 27 | return 0; 28 | } 29 | -------------------------------------------------------------------------------- /samples/Ch02_where_code_runs/fig_2_23_host_task.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | using namespace sycl; 9 | constexpr int N = 4; 10 | 11 | int main() { 12 | queue q; 13 | int* A = malloc_shared(N, q); 14 | 15 | std::cout << "Selected device: " 16 | << q.get_device().get_info() 17 | << "\n"; 18 | 19 | // Initialize values in the shared allocation 20 | auto eA = q.submit([&](handler& h) { 21 | h.parallel_for(N, [=](auto& idx) { A[idx] = idx; }); 22 | }); 23 | 24 | // Use a host task to output values on the host as part of 25 | // task graph. depends_on is used to define a dependence 26 | // on previous device code having completed. Here the host 27 | // task is defined as a lambda expression. 28 | q.submit([&](handler& h) { 29 | h.depends_on(eA); 30 | h.host_task([=]() { 31 | for (int i = 0; i < N; i++) 32 | std::cout << "host_task @ " << i << " = " << A[i] 33 | << "\n"; 34 | }); 35 | }); 36 | 37 | // Wait for work to be completed in the queue before 38 | // accessing the shared data in the host program. 39 | q.wait(); 40 | 41 | for (int i = 0; i < N; i++) 42 | std::cout << "main @ " << i << " = " << A[i] << "\n"; 43 | 44 | free(A, q); 45 | 46 | return 0; 47 | } 48 | 49 | // Example Output: 50 | // Selected device: NVIDIA GeForce RTX 3060 51 | // host_task @ 0 = 0 52 | // host_task @ 1 = 1 53 | // host_task @ 2 = 2 54 | // host_task @ 3 = 3 55 | // main @ 0 = 0 56 | // main @ 1 = 1 57 | // main @ 2 = 2 58 | // main @ 3 = 3 59 | 60 | -------------------------------------------------------------------------------- /samples/Ch02_where_code_runs/fig_2_2_simple_program.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // BEGIN CODE SNIP 6 | #include 7 | #include 8 | #include 9 | using namespace sycl; 10 | 11 | int main() { 12 | constexpr int size = 16; 13 | std::array data; 14 | 15 | // Create queue on implementation-chosen default device 16 | queue q; 17 | 18 | // Create buffer using host allocated "data" array 19 | buffer B{data}; 20 | 21 | q.submit([&](handler& h) { 22 | accessor A{B, h}; 23 | h.parallel_for(size, [=](auto& idx) { A[idx] = idx; }); 24 | }); 25 | 26 | // Obtain access to buffer on the host 27 | // Will wait for device kernel to execute to generate data 28 | host_accessor A{B}; 29 | for (int i = 0; i < size; i++) 30 | std::cout << "data[" << i << "] = " << A[i] << "\n"; 31 | 32 | return 0; 33 | } 34 | // END CODE SNIP 35 | -------------------------------------------------------------------------------- /samples/Ch02_where_code_runs/fig_2_3.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | // BEGIN CODE SNIP 9 | class queue { 10 | public: 11 | // Create a queue associated with a default 12 | // (implementation chosen) device. 13 | queue(const property_list & = {}); 14 | 15 | queue(const async_handler &, const property_list & = {}); 16 | 17 | // Create a queue using a DeviceSelector. 18 | // A DeviceSelector is a callable that ranks 19 | // devices numerically. There are a few SYCL-defined 20 | // device selectors available such as 21 | // cpu_selector_v and gpu_selector_v. 22 | template 23 | explicit queue(const DeviceSelector &deviceSelector, 24 | const property_list &propList = {}); 25 | 26 | // Create a queue associated with an explicit device to 27 | // which the program already holds a reference. 28 | queue(const device &, const property_list & = {}); 29 | 30 | // Create a queue associated with a device in a specific 31 | // SYCL context. A device selector may be used in place 32 | // of a device. 33 | queue(const context &, const device &, 34 | const property_list & = {}); 35 | }; 36 | // END CODE SNIP 37 | -------------------------------------------------------------------------------- /samples/Ch02_where_code_runs/fig_2_4.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | // BEGIN CODE SNIP 9 | class queue { 10 | public: 11 | // Submit a command group to this queue. 12 | // The command group may be a lambda expression or 13 | // function object. Returns an event reflecting the status 14 | // of the action performed in the command group. 15 | template 16 | event submit(T); 17 | 18 | // Wait for all previously submitted actions to finish 19 | // executing. 20 | void wait(); 21 | 22 | // Wait for all previously submitted actions to finish 23 | // executing. Pass asynchronous exceptions to an 24 | // async_handler function. 25 | void wait_and_throw(); 26 | }; 27 | // END CODE SNIP 28 | -------------------------------------------------------------------------------- /samples/Ch02_where_code_runs/fig_2_7_implicit_default_selector.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | using namespace sycl; 8 | 9 | int main() { 10 | // Create queue on whatever default device that the 11 | // implementation chooses. Implicit use of 12 | // default_selector_v 13 | queue q; 14 | 15 | std::cout << "Selected device: " 16 | << q.get_device().get_info() 17 | << "\n"; 18 | 19 | return 0; 20 | } 21 | 22 | // Sample Outputs (one line per run depending on system): 23 | // Selected device: NVIDIA GeForce RTX 3060 24 | // Selected device: AMD Radeon RX 5700 XT 25 | // Selected device: Intel(R) Data Center GPU Max 1100 26 | // Selected device: Intel(R) FPGA Emulation Device 27 | // Selected device: AMD Ryzen 5 3600 6-Core Processor 28 | // Selected device: Intel(R) UHD Graphics 770 29 | // Selected device: Intel(R) Xeon(R) Gold 6128 CPU @ 3.40GHz 30 | // Selected device: 11th Gen Intel(R) Core(TM) i9-11900KB @ 3.30GHz 31 | // many more possible… these are only examples 32 | -------------------------------------------------------------------------------- /samples/Ch02_where_code_runs/fig_2_9_cpu_selector.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | using namespace sycl; 8 | 9 | int main() { 10 | // Create queue to use the CPU device explicitly 11 | queue q{cpu_selector_v}; 12 | 13 | std::cout << "Selected device: " 14 | << q.get_device().get_info() 15 | << "\n"; 16 | std::cout 17 | << " -> Device vendor: " 18 | << q.get_device().get_info() 19 | << "\n"; 20 | 21 | return 0; 22 | } 23 | 24 | // Example Output: 25 | // Selected device: Intel(R) Xeon(R) Gold 6128 CPU @ 3.40GHz 26 | // -> Device vendor: Intel(R) Corporation 27 | 28 | -------------------------------------------------------------------------------- /samples/Ch03_data_management/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Intel Corporation 2 | 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_book_sample( 6 | TEST 7 | TARGET fig_3_4_usm_explicit_data_movement 8 | SOURCES fig_3_4_usm_explicit_data_movement.cpp) 9 | 10 | add_book_sample( 11 | TEST 12 | TARGET fig_3_5_usm_implicit_data_movement 13 | SOURCES fig_3_5_usm_implicit_data_movement.cpp) 14 | 15 | add_book_sample( 16 | TEST 17 | TARGET fig_3_6_buffers_and_accessors 18 | SOURCES fig_3_6_buffers_and_accessors.cpp) 19 | 20 | add_book_sample( 21 | TEST 22 | TARGET fig_3_10_in_order 23 | SOURCES fig_3_10_in_order.cpp) 24 | 25 | add_book_sample( 26 | TEST 27 | TARGET fig_3_11_depends_on 28 | SOURCES fig_3_11_depends_on.cpp) 29 | 30 | add_book_sample( 31 | TEST 32 | TARGET fig_3_13_read_after_write 33 | SOURCES fig_3_13_read_after_write.cpp 34 | ADDITIONAL_COMPILE_OPTIONS -Wno-unused-variable) 35 | 36 | add_book_sample( 37 | TEST 38 | TARGET fig_3_15_write_after_read_and_write_after_write 39 | SOURCES fig_3_15_write_after_read_and_write_after_write.cpp) 40 | -------------------------------------------------------------------------------- /samples/Ch03_data_management/fig_3_10_in_order.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | using namespace sycl; 7 | constexpr int N = 4; 8 | 9 | int main() { 10 | queue q{property::queue::in_order()}; 11 | 12 | q.submit([&](handler& h) { 13 | h.parallel_for(N, [=](id<1> i) { /*...*/ }); // Task A 14 | }); 15 | q.submit([&](handler& h) { 16 | h.parallel_for(N, [=](id<1> i) { /*...*/ }); // Task B 17 | }); 18 | q.submit([&](handler& h) { 19 | h.parallel_for(N, [=](id<1> i) { /*...*/ }); // Task C 20 | }); 21 | 22 | return 0; 23 | } 24 | -------------------------------------------------------------------------------- /samples/Ch03_data_management/fig_3_11_depends_on.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | using namespace sycl; 7 | constexpr int N = 4; 8 | 9 | int main() { 10 | queue q; 11 | 12 | auto eA = q.submit([&](handler &h) { 13 | h.parallel_for(N, [=](id<1> i) { /*...*/ }); // Task A 14 | }); 15 | eA.wait(); 16 | auto eB = q.submit([&](handler &h) { 17 | h.parallel_for(N, [=](id<1> i) { /*...*/ }); // Task B 18 | }); 19 | auto eC = q.submit([&](handler &h) { 20 | h.depends_on(eB); 21 | h.parallel_for(N, [=](id<1> i) { /*...*/ }); // Task C 22 | }); 23 | auto eD = q.submit([&](handler &h) { 24 | h.depends_on({eB, eC}); 25 | h.parallel_for(N, [=](id<1> i) { /*...*/ }); // Task D 26 | }); 27 | 28 | return 0; 29 | } 30 | -------------------------------------------------------------------------------- /samples/Ch03_data_management/fig_3_13_read_after_write.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | using namespace sycl; 8 | constexpr int N = 42; 9 | 10 | int main() { 11 | std::array a, b, c; 12 | for (int i = 0; i < N; i++) { 13 | a[i] = b[i] = c[i] = 0; 14 | } 15 | 16 | queue q; 17 | 18 | // We will learn how to simplify this example later 19 | buffer a_buf{a}; 20 | buffer b_buf{b}; 21 | buffer c_buf{c}; 22 | 23 | q.submit([&](handler &h) { 24 | accessor a(a_buf, h, read_only); 25 | accessor b(b_buf, h, write_only); 26 | h.parallel_for( // computeB 27 | N, [=](id<1> i) { b[i] = a[i] + 1; }); 28 | }); 29 | 30 | q.submit([&](handler &h) { 31 | accessor a(a_buf, h, read_only); 32 | h.parallel_for( // readA 33 | N, [=](id<1> i) { 34 | // Useful only as an example 35 | int data = a[i]; 36 | }); 37 | }); 38 | 39 | q.submit([&](handler &h) { 40 | // RAW of buffer B 41 | accessor b(b_buf, h, read_only); 42 | accessor c(c_buf, h, write_only); 43 | h.parallel_for( // computeC 44 | N, [=](id<1> i) { c[i] = b[i] + 2; }); 45 | }); 46 | 47 | // read C on host 48 | host_accessor host_acc_c(c_buf, read_only); 49 | for (int i = 0; i < N; i++) { 50 | std::cout << host_acc_c[i] << " "; 51 | } 52 | std::cout << "\n"; 53 | return 0; 54 | } 55 | -------------------------------------------------------------------------------- /samples/Ch03_data_management/fig_3_15_write_after_read_and_write_after_write.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | using namespace sycl; 8 | constexpr int N = 42; 9 | 10 | int main() { 11 | std::array a, b; 12 | for (int i = 0; i < N; i++) { 13 | a[i] = b[i] = 0; 14 | } 15 | 16 | queue q; 17 | buffer a_buf{a}; 18 | buffer b_buf{b}; 19 | 20 | q.submit([&](handler &h) { 21 | accessor a(a_buf, h, read_only); 22 | accessor b(b_buf, h, write_only); 23 | h.parallel_for( // computeB 24 | N, [=](id<1> i) { b[i] = a[i] + 1; }); 25 | }); 26 | 27 | q.submit([&](handler &h) { 28 | // WAR of buffer A 29 | accessor a(a_buf, h, write_only); 30 | h.parallel_for( // rewriteA 31 | N, [=](id<1> i) { a[i] = 21 + 21; }); 32 | }); 33 | 34 | q.submit([&](handler &h) { 35 | // WAW of buffer B 36 | accessor b(b_buf, h, write_only); 37 | h.parallel_for( // rewriteB 38 | N, [=](id<1> i) { b[i] = 30 + 12; }); 39 | }); 40 | 41 | host_accessor host_acc_a(a_buf, read_only); 42 | host_accessor host_acc_b(b_buf, read_only); 43 | for (int i = 0; i < N; i++) { 44 | std::cout << host_acc_a[i] << " " << host_acc_b[i] 45 | << " "; 46 | } 47 | std::cout << "\n"; 48 | return 0; 49 | } 50 | -------------------------------------------------------------------------------- /samples/Ch03_data_management/fig_3_17.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | class handler { 9 | ... 10 | // Specifies event(s) that must be complete before the 11 | // action defined in this command group executes. 12 | void depends_on({event / std::vector & }); 13 | 14 | // Enqueues a memcpy from Src to Dest. 15 | // Count bytes are copied. 16 | void memcpy(void* Dest, const void* Src, size_t Count); 17 | 18 | // Enqueues a memcpy from Src to Dest. 19 | // Count elements are copied. 20 | template 21 | void copy(const T* Src, T* Dest, size_t Count); 22 | 23 | // Enqueues a memset operation on the specified pointer. 24 | // Writes the first byte of Value into Count bytes. 25 | void memset(void* Ptr, int Value, size_t Count) 26 | 27 | // Enques a fill operation on the specified pointer. 28 | // Fills Pattern into Ptr Count times. 29 | template 30 | void fill(void* Ptr, const T& Pattern, size_t Count); 31 | 32 | // Submits a kernel of one work-item for execution. 33 | template 34 | void single_task(KernelType KernelFunc); 35 | 36 | // Submits a kernel with NumWorkItems work-items for 37 | // execution. 38 | template 40 | void parallel_for(range NumWorkItems, 41 | KernelType KernelFunc); 42 | 43 | // Submits a kernel for execution over the supplied 44 | // nd_range. 45 | template 47 | void parallel_for(nd_range ExecutionRange, 48 | KernelType KernelFunc); 49 | ... 50 | }; 51 | -------------------------------------------------------------------------------- /samples/Ch03_data_management/fig_3_4_usm_explicit_data_movement.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | using namespace sycl; 8 | constexpr int N = 42; 9 | 10 | int main() { 11 | queue q; 12 | 13 | std::array host_array; 14 | int *device_array = malloc_device(N, q); 15 | 16 | for (int i = 0; i < N; i++) host_array[i] = N; 17 | 18 | // We will learn how to simplify this example later 19 | q.submit([&](handler &h) { 20 | // copy host_array to device_array 21 | h.memcpy(device_array, &host_array[0], N * sizeof(int)); 22 | }); 23 | q.wait(); 24 | 25 | q.submit([&](handler &h) { 26 | h.parallel_for(N, [=](id<1> i) { device_array[i]++; }); 27 | }); 28 | q.wait(); 29 | 30 | q.submit([&](handler &h) { 31 | // copy device_array back to host_array 32 | h.memcpy(&host_array[0], device_array, N * sizeof(int)); 33 | }); 34 | q.wait(); 35 | 36 | free(device_array, q); 37 | return 0; 38 | } 39 | -------------------------------------------------------------------------------- /samples/Ch03_data_management/fig_3_5_usm_implicit_data_movement.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | using namespace sycl; 7 | constexpr int N = 42; 8 | 9 | int main() { 10 | queue q; 11 | int *host_array = malloc_host(N, q); 12 | int *shared_array = malloc_shared(N, q); 13 | 14 | for (int i = 0; i < N; i++) { 15 | // Initialize host_array on host 16 | host_array[i] = i; 17 | } 18 | 19 | // We will learn how to simplify this example later 20 | q.submit([&](handler &h) { 21 | h.parallel_for(N, [=](id<1> i) { 22 | // access shared_array and host_array on device 23 | shared_array[i] = host_array[i] + 1; 24 | }); 25 | }); 26 | q.wait(); 27 | 28 | for (int i = 0; i < N; i++) { 29 | // access shared_array on host 30 | host_array[i] = shared_array[i]; 31 | } 32 | 33 | free(shared_array, q); 34 | free(host_array, q); 35 | return 0; 36 | } 37 | -------------------------------------------------------------------------------- /samples/Ch03_data_management/fig_3_6_buffers_and_accessors.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | using namespace sycl; 8 | constexpr int N = 42; 9 | 10 | int main() { 11 | std::array my_data; 12 | for (int i = 0; i < N; i++) my_data[i] = 0; 13 | 14 | { 15 | queue q; 16 | buffer my_buffer(my_data); 17 | 18 | q.submit([&](handler &h) { 19 | // create an accessor to update 20 | // the buffer on the device 21 | accessor my_accessor(my_buffer, h); 22 | 23 | h.parallel_for(N, [=](id<1> i) { my_accessor[i]++; }); 24 | }); 25 | 26 | // create host accessor 27 | host_accessor host_accessor(my_buffer); 28 | 29 | for (int i = 0; i < N; i++) { 30 | // access my_buffer on host 31 | std::cout << host_accessor[i] << " "; 32 | } 33 | std::cout << "\n"; 34 | } 35 | 36 | // my_data is updated when my_buffer is 37 | // destroyed upon exiting scope 38 | for (int i = 0; i < N; i++) { 39 | std::cout << my_data[i] << " "; 40 | } 41 | std::cout << "\n"; 42 | } 43 | -------------------------------------------------------------------------------- /samples/Ch04_expressing_parallelism/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Intel Corporation 2 | 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_book_sample( 6 | TEST 7 | TARGET fig_4_5_vector_add 8 | SOURCES fig_4_5_vector_add.cpp) 9 | 10 | add_book_sample( 11 | TEST 12 | TARGET fig_4_6_matrix_add 13 | SOURCES fig_4_6_matrix_add.cpp) 14 | 15 | add_book_sample( 16 | TEST 17 | TARGET fig_4_7_basic_matrix_multiply 18 | SOURCES fig_4_7_basic_matrix_multiply.cpp) 19 | 20 | add_book_sample( 21 | TEST 22 | TARGET fig_4_15_nd_range_matrix_multiply 23 | SOURCES fig_4_15_nd_range_matrix_multiply.cpp) 24 | -------------------------------------------------------------------------------- /samples/Ch04_expressing_parallelism/fig_4_1.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | for (int i = 0; i < N; ++i) { 9 | c[i] = a[i] + b[i]; 10 | } 11 | -------------------------------------------------------------------------------- /samples/Ch04_expressing_parallelism/fig_4_10.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | template 9 | class id { 10 | public: 11 | // Construct an id with one, two or three dimensions 12 | id(size_t dim0); 13 | id(size_t dim0, size_t dim1); 14 | id(size_t dim0, size_t dim1, size_t dim2); 15 | 16 | // Return the component of the id in a specific dimension 17 | size_t get(int dimension) const; 18 | size_t &operator[](int dimension); 19 | size_t operator[](int dimension) const; 20 | 21 | // Arithmetic operations on ids are also supported 22 | }; 23 | -------------------------------------------------------------------------------- /samples/Ch04_expressing_parallelism/fig_4_11.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | template 9 | class item { 10 | public: 11 | // Return the index of this item in the kernel's execution 12 | // range 13 | id get_id() const; 14 | size_t get_id(int dimension) const; 15 | size_t operator[](int dimension) const; 16 | 17 | // Return the execution range of the kernel executed by 18 | // this item 19 | range get_range() const; 20 | size_t get_range(int dimension) const; 21 | 22 | // Return the offset of this item (if WithOffset == true) 23 | id get_offset() const; 24 | 25 | // Return the linear index of this item 26 | // e.g. id(0) * range(1) * range(2) + id(1) * range(2) + 27 | // id(2) 28 | size_t get_linear_id() const; 29 | }; 30 | -------------------------------------------------------------------------------- /samples/Ch04_expressing_parallelism/fig_4_15_nd_range_matrix_multiply.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | using namespace sycl; 10 | 11 | int main() { 12 | // Set up queue on any available device 13 | queue q; 14 | 15 | // Initialize input and output memory on the host 16 | constexpr size_t N = 256; 17 | constexpr size_t B = 4; 18 | std::vector a(N * N), b(N * N), c(N * N); 19 | std::default_random_engine gen(42); 20 | std::uniform_real_distribution dist(0.0, 1.0); 21 | auto rng = [&]() { return dist(gen); }; 22 | std::generate(a.begin(), a.end(), rng); 23 | std::generate(b.begin(), b.end(), rng); 24 | std::fill(c.begin(), c.end(), 0); 25 | 26 | { 27 | // Create buffers associated with inputs and output 28 | buffer a_buf(a.data(), range<2>(N, N)), 29 | b_buf(b.data(), range<2>(N, N)), 30 | c_buf(c.data(), range<2>(N, N)); 31 | 32 | // Submit the kernel to the queue 33 | q.submit([&](handler& h) { 34 | accessor a{a_buf, h}; 35 | accessor b{b_buf, h}; 36 | accessor c{c_buf, h}; 37 | 38 | // BEGIN CODE SNIP 39 | range global{N, N}; 40 | range local{B, B}; 41 | h.parallel_for(nd_range{global, local}, 42 | [=](nd_item<2> it) { 43 | int j = it.get_global_id(0); 44 | int i = it.get_global_id(1); 45 | 46 | for (int k = 0; k < N; ++k) { 47 | c[j][i] += a[j][k] * b[k][i]; 48 | } 49 | }); 50 | // END CODE SNIP 51 | }); 52 | } 53 | 54 | // Check that all outputs match serial execution. 55 | bool passed = true; 56 | for (int j = 0; j < N; ++j) { 57 | for (int i = 0; i < N; ++i) { 58 | float gold = 0; 59 | for (int k = 0; k < N; ++k) { 60 | gold += a[j * N + k] * b[k * N + i]; 61 | } 62 | if (std::abs(gold - c[j * N + i]) / gold > 1.0E-05) { 63 | passed = false; 64 | } 65 | } 66 | } 67 | std::cout << ((passed) ? "SUCCESS" : "FAILURE") 68 | << std::endl; 69 | return (passed) ? 0 : 1; 70 | } 71 | -------------------------------------------------------------------------------- /samples/Ch04_expressing_parallelism/fig_4_17.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | template 9 | class nd_range { 10 | public: 11 | // Construct an nd_range from global and work-group local 12 | // ranges 13 | nd_range(range global, 14 | range local); 15 | 16 | // Return the global and work-group local ranges 17 | range get_global_range() const; 18 | range get_local_range() const; 19 | 20 | // Return the number of work-groups in the global range 21 | range get_group_range() const; 22 | }; 23 | -------------------------------------------------------------------------------- /samples/Ch04_expressing_parallelism/fig_4_18.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | template 9 | class nd_item { 10 | public: 11 | // Return the index of this item in the kernel's execution 12 | // range 13 | id get_global_id() const; 14 | size_t get_global_id(int dimension) const; 15 | size_t get_global_linear_id() const; 16 | 17 | // Return the execution range of the kernel executed by 18 | // this item 19 | range get_global_range() const; 20 | size_t get_global_range(int dimension) const; 21 | 22 | // Return the index of this item within its parent 23 | // work-group 24 | id get_local_id() const; 25 | size_t get_local_id(int dimension) const; 26 | size_t get_local_linear_id() const; 27 | 28 | // Return the execution range of this item's parent 29 | // work-group 30 | range get_local_range() const; 31 | size_t get_local_range(int dimension) const; 32 | 33 | // Return a handle to the work-group 34 | // or sub-group containing this item 35 | group get_group() const; 36 | sub_group get_sub_group() const; 37 | }; 38 | -------------------------------------------------------------------------------- /samples/Ch04_expressing_parallelism/fig_4_19.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | template 9 | class group { 10 | public: 11 | // Return the index of this group in the kernel's 12 | // execution range 13 | id get_group_id() const; 14 | size_t get_group_id(int dimension) const; 15 | size_t get_group_linear_id() const; 16 | 17 | // Return the number of groups in the kernel's execution 18 | // range 19 | range get_group_range() const; 20 | size_t get_group_range(int dimension) const; 21 | 22 | // Return the number of work-items in this group 23 | range get_local_range() const; 24 | size_t get_local_range(int dimension) const; 25 | }; 26 | -------------------------------------------------------------------------------- /samples/Ch04_expressing_parallelism/fig_4_2.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | launch N kernel instances { 9 | int id = 10 | get_instance_id(); // unique identifier in [0, N) 11 | c[id] = a[id] + b[id]; 12 | } 13 | -------------------------------------------------------------------------------- /samples/Ch04_expressing_parallelism/fig_4_20.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | void body(group& g); 9 | 10 | h.parallel_for(nd_range{global, local}, [=](nd_item<1> it) { 11 | group<1> g = it.get_group(); 12 | range<1> r = g.get_local_range(); 13 | ... 14 | body(g); 15 | }); 16 | -------------------------------------------------------------------------------- /samples/Ch04_expressing_parallelism/fig_4_21.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | class sub_group { 9 | public: 10 | // Return the index of the sub-group 11 | id<1> get_group_id() const; 12 | 13 | // Return the number of sub-groups in this item's parent 14 | // work-group 15 | range<1> get_group_range() const; 16 | 17 | // Return the index of the work-item in this sub-group 18 | id<1> get_local_id() const; 19 | 20 | // Return the number of work-items in this sub-group 21 | range<1> get_local_range() const; 22 | 23 | // Return the maximum number of work-items in any 24 | // sub-group in this item's parent work-group 25 | range<1> get_max_local_range() const; 26 | }; 27 | -------------------------------------------------------------------------------- /samples/Ch04_expressing_parallelism/fig_4_22.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | size_t N = ...; // amount of work 9 | size_t W = ...; // number of workers 10 | h.parallel_for(range{W}, [=](item<1> it) { 11 | for (int i = it.get_id()[0]; i < N; 12 | i += it.get_range()[0]) { 13 | output[i] = function(input[i]); 14 | } 15 | }); 16 | -------------------------------------------------------------------------------- /samples/Ch04_expressing_parallelism/fig_4_5_vector_add.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | using namespace sycl; 9 | 10 | int main() { 11 | // Set up queue on any available device 12 | queue q; 13 | 14 | // Initialize input and output memory on the host 15 | constexpr size_t N = 256; 16 | std::vector a(N), b(N), c(N); 17 | std::fill(a.begin(), a.end(), 1); 18 | std::fill(b.begin(), b.end(), 2); 19 | std::fill(c.begin(), c.end(), 0); 20 | 21 | { 22 | // Create buffers associated with inputs and output 23 | buffer a_buf{a}, b_buf{b}, c_buf{c}; 24 | 25 | // Submit the kernel to the queue 26 | q.submit([&](handler& h) { 27 | accessor a{a_buf, h}; 28 | accessor b{b_buf, h}; 29 | accessor c{c_buf, h}; 30 | 31 | // BEGIN CODE SNIP 32 | h.parallel_for(range{N}, [=](id<1> idx) { 33 | c[idx] = a[idx] + b[idx]; 34 | }); 35 | // END CODE SNIP 36 | }); 37 | } 38 | 39 | // Check that all outputs match expected value 40 | bool passed = std::all_of(c.begin(), c.end(), 41 | [](int i) { return (i == 3); }); 42 | std::cout << ((passed) ? "SUCCESS" : "FAILURE") 43 | << std::endl; 44 | return (passed) ? 0 : 1; 45 | } 46 | -------------------------------------------------------------------------------- /samples/Ch04_expressing_parallelism/fig_4_6_matrix_add.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | using namespace sycl; 9 | 10 | int main() { 11 | // Set up queue on any available device 12 | queue q; 13 | 14 | // Initialize input and output memory on the host 15 | constexpr size_t N = 256; 16 | constexpr size_t M = 256; 17 | std::vector a(N * M), b(N * M), c(N * M); 18 | std::fill(a.begin(), a.end(), 1); 19 | std::fill(b.begin(), b.end(), 2); 20 | std::fill(c.begin(), c.end(), 0); 21 | 22 | { 23 | // Create buffers associated with inputs and output 24 | buffer a_buf(a.data(), range<2>(N, M)), 25 | b_buf(b.data(), range<2>(N, M)), 26 | c_buf(c.data(), range<2>(N, M)); 27 | 28 | // Submit the kernel to the queue 29 | q.submit([&](handler& h) { 30 | accessor a{a_buf, h}; 31 | accessor b{b_buf, h}; 32 | accessor c{c_buf, h}; 33 | 34 | // BEGIN CODE SNIP 35 | h.parallel_for(range{N, M}, [=](id<2> idx) { 36 | c[idx] = a[idx] + b[idx]; 37 | }); 38 | // END CODE SNIP 39 | }); 40 | } 41 | 42 | // Check that all outputs match expected value 43 | bool passed = std::all_of(c.begin(), c.end(), 44 | [](int i) { return (i == 3); }); 45 | std::cout << ((passed) ? "SUCCESS" : "FAILURE") 46 | << std::endl; 47 | return (passed) ? 0 : 1; 48 | } 49 | -------------------------------------------------------------------------------- /samples/Ch04_expressing_parallelism/fig_4_7_basic_matrix_multiply.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | using namespace sycl; 10 | 11 | int main() { 12 | // Set up queue on any available device 13 | queue q; 14 | 15 | // Initialize input and output memory on the host 16 | constexpr size_t N = 256; 17 | std::vector a(N * N), b(N * N), c(N * N); 18 | std::default_random_engine gen(42); 19 | std::uniform_real_distribution dist(0.0, 1.0); 20 | auto rng = [&]() { return dist(gen); }; 21 | std::generate(a.begin(), a.end(), rng); 22 | std::generate(b.begin(), b.end(), rng); 23 | std::fill(c.begin(), c.end(), 0); 24 | 25 | { 26 | // Create buffers associated with inputs and output 27 | buffer a_buf(a.data(), range<2>(N, N)), 28 | b_buf(b.data(), range<2>(N, N)), 29 | c_buf(c.data(), range<2>(N, N)); 30 | 31 | // Submit the kernel to the queue 32 | q.submit([&](handler& h) { 33 | accessor a{a_buf, h}; 34 | accessor b{b_buf, h}; 35 | accessor c{c_buf, h}; 36 | 37 | // BEGIN CODE SNIP 38 | h.parallel_for(range{N, N}, [=](id<2> idx) { 39 | int j = idx[0]; 40 | int i = idx[1]; 41 | for (int k = 0; k < N; ++k) { 42 | c[j][i] += 43 | a[j][k] * b[k][i]; // or c[idx] += a[id(j,k)] 44 | // * b[id(k,i)]; 45 | } 46 | }); 47 | // END CODE SNIP 48 | }); 49 | } 50 | 51 | // Check that all outputs match serial execution 52 | bool passed = true; 53 | for (int j = 0; j < N; ++j) { 54 | for (int i = 0; i < N; ++i) { 55 | float gold = 0; 56 | for (int k = 0; k < N; ++k) { 57 | gold += a[j * N + k] * b[k * N + i]; 58 | } 59 | if (std::abs(gold - c[j * N + i]) / gold > 1.0E-05) { 60 | passed = false; 61 | } 62 | } 63 | } 64 | std::cout << ((passed) ? "SUCCESS" : "FAILURE") 65 | << std::endl; 66 | return (passed) ? 0 : 1; 67 | } 68 | -------------------------------------------------------------------------------- /samples/Ch04_expressing_parallelism/fig_4_9.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | template 9 | class range { 10 | public: 11 | // Construct a range with one, two or three dimensions 12 | range(size_t dim0); 13 | range(size_t dim0, size_t dim1); 14 | range(size_t dim0, size_t dim1, size_t dim2); 15 | 16 | // Return the size of the range in a specific dimension 17 | size_t get(int dimension) const; 18 | size_t &operator[](int dimension); 19 | size_t operator[](int dimension) const; 20 | 21 | // Return the product of the size of each dimension 22 | size_t size() const; 23 | 24 | // Arithmetic operations on ranges are also supported 25 | }; 26 | -------------------------------------------------------------------------------- /samples/Ch05_error_handling/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Intel Corporation 2 | 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_book_sample( 6 | TEST 7 | TARGET fig_5_1_async_task_graph 8 | SOURCES fig_5_1_async_task_graph.cpp) 9 | 10 | add_book_sample( 11 | TARGET fig_5_2_sync_error 12 | SOURCES fig_5_2_sync_error.cpp) 13 | 14 | add_book_sample( 15 | TARGET fig_5_3_async_error 16 | SOURCES fig_5_3_async_error.cpp) 17 | 18 | add_book_sample( 19 | TARGET fig_5_4_unhandled_exception 20 | SOURCES fig_5_4_unhandled_exception.cpp) 21 | 22 | add_book_sample( 23 | TARGET fig_5_5_terminate 24 | SOURCES fig_5_5_terminate.cpp) 25 | 26 | add_book_sample( 27 | TEST 28 | TARGET fig_5_6_catch_snip 29 | SOURCES fig_5_6_catch_snip.cpp) 30 | 31 | add_book_sample( 32 | TARGET fig_5_7_catch 33 | SOURCES fig_5_7_catch.cpp) 34 | 35 | add_book_sample( 36 | TARGET fig_5_8_lambda_handler 37 | SOURCES fig_5_8_lambda_handler.cpp) 38 | 39 | add_book_sample( 40 | TARGET fig_5_9_default_handler_proxy 41 | SOURCES fig_5_9_default_handler_proxy.cpp) 42 | -------------------------------------------------------------------------------- /samples/Ch05_error_handling/fig_5_1_async_task_graph.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | using namespace sycl; 8 | 9 | int main() { 10 | constexpr int size = 16; 11 | buffer b{range{size}}; 12 | 13 | // Create queue on any available device 14 | queue q; 15 | 16 | q.submit([&](handler& h) { 17 | accessor a{b, h}; 18 | 19 | h.parallel_for(size, [=](auto& idx) { a[idx] = idx; }); 20 | }); 21 | 22 | // Obtain access to buffer on the host 23 | // Will wait for device kernel to execute to generate data 24 | host_accessor a{b}; 25 | for (int i = 0; i < size; i++) 26 | std::cout << "data[" << i << "] = " << a[i] << "\n"; 27 | 28 | return 0; 29 | } 30 | -------------------------------------------------------------------------------- /samples/Ch05_error_handling/fig_5_2_sync_error.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | using namespace sycl; 7 | 8 | int main() { 9 | buffer b{range{16}}; 10 | 11 | // ERROR: Create sub-buffer larger than size of parent 12 | // buffer. An exception is thrown from within the buffer 13 | // constructor. 14 | buffer b2(b, id{8}, range{16}); 15 | 16 | return 0; 17 | } 18 | 19 | // Example Output: 20 | // terminate called after throwing an instance of 'sycl::_V1::invalid_object_error' 21 | // what(): Requested sub-buffer size exceeds the size of the parent buffer -30 (PI_ERROR_INVALID_VALUE) 22 | // Aborted 23 | 24 | -------------------------------------------------------------------------------- /samples/Ch05_error_handling/fig_5_3_async_error.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | using namespace sycl; 7 | 8 | // Our example asynchronous handler function 9 | auto handle_async_error = [](exception_list elist) { 10 | for (auto &e : elist) { 11 | try { 12 | std::rethrow_exception(e); 13 | } catch (...) { 14 | std::cout << "Caught SYCL ASYNC exception!!\n"; 15 | } 16 | } 17 | }; 18 | 19 | void say_device(const queue &Q) { 20 | std::cout << "Device : " 21 | << Q.get_device().get_info() 22 | << "\n"; 23 | } 24 | 25 | class something_went_wrong {}; // Example exception type 26 | 27 | int main() { 28 | queue q{cpu_selector_v, handle_async_error}; 29 | say_device(q); 30 | 31 | q.submit([&](handler &h) { 32 | h.host_task([]() { throw(something_went_wrong{}); }); 33 | }).wait(); 34 | 35 | return 0; 36 | } 37 | 38 | // Example output: 39 | // Device : Intel(R) Xeon(R) Gold 6128 CPU @ 3.40GHz 40 | // Caught SYCL ASYNC exception!! 41 | -------------------------------------------------------------------------------- /samples/Ch05_error_handling/fig_5_4_unhandled_exception.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | 7 | class something_went_wrong {}; 8 | 9 | int main() { 10 | std::cout << "Hello\n"; 11 | 12 | throw(something_went_wrong{}); 13 | } 14 | 15 | // Example output: 16 | // Hello 17 | // terminate called after throwing an instance of 'something_went_wrong' 18 | // Aborted 19 | -------------------------------------------------------------------------------- /samples/Ch05_error_handling/fig_5_5_terminate.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | 7 | int main() { 8 | std::cout << "Hello\n"; 9 | 10 | std::terminate(); 11 | } 12 | 13 | // Example output: 14 | // Hello 15 | // terminate called without an active exception 16 | // Aborted 17 | -------------------------------------------------------------------------------- /samples/Ch05_error_handling/fig_5_6_catch_snip.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | using namespace sycl; 7 | 8 | int main() { 9 | // BEGIN CODE SNIP 10 | 11 | try { 12 | // Do some SYCL work 13 | } catch (sycl::exception &e) { 14 | // Do something to output or handle the exception 15 | std::cout << "Caught sync SYCL exception: " << e.what() 16 | << "\n"; 17 | return 1; 18 | } 19 | 20 | // END CODE SNIP 21 | 22 | return 0; 23 | } 24 | -------------------------------------------------------------------------------- /samples/Ch05_error_handling/fig_5_7_catch.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | using namespace sycl; 7 | 8 | int main() { 9 | try { 10 | buffer b{range{16}}; 11 | 12 | // ERROR: Create sub-buffer larger than size of parent 13 | // buffer. An exception is thrown from within the buffer 14 | // constructor. 15 | buffer b2(b, id{8}, range{16}); 16 | 17 | } catch (sycl::exception &e) { 18 | // Do something to output or handle the exception 19 | std::cout << "Caught synchronous SYCL exception: " 20 | << e.what() << "\n"; 21 | return 1; 22 | } catch (std::exception &e) { 23 | std::cout << "Caught std exception: " << e.what() 24 | << "\n"; 25 | return 2; 26 | } catch (...) { 27 | std::cout << "Caught unknown exception\n"; 28 | return 3; 29 | } 30 | 31 | return 0; 32 | } 33 | 34 | // Example output: 35 | // Caught synchronous SYCL exception: Requested sub-buffer size exceeds the size of the parent buffer -30 (PI_ERROR_INVALID_VALUE) 36 | 37 | -------------------------------------------------------------------------------- /samples/Ch05_error_handling/fig_5_8_lambda_handler.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | using namespace sycl; 7 | 8 | // BEGIN CODE SNIP 9 | 10 | // Our simple asynchronous handler function 11 | auto handle_async_error = [](exception_list elist) { 12 | for (auto& e : elist) { 13 | try { 14 | std::rethrow_exception(e); 15 | } catch (sycl::exception& e) { 16 | std::cout << "ASYNC EXCEPTION!!\n"; 17 | std::cout << e.what() << "\n"; 18 | } 19 | } 20 | }; 21 | 22 | // END CODE SNIP 23 | 24 | void say_device(const queue& q) { 25 | std::cout << "Device : " 26 | << q.get_device().get_info() 27 | << "\n"; 28 | } 29 | 30 | int main() { 31 | queue q1{gpu_selector_v, handle_async_error}; 32 | queue q2{cpu_selector_v, handle_async_error}; 33 | say_device(q1); 34 | say_device(q2); 35 | 36 | try { 37 | q1.submit( 38 | [&](handler& h) { 39 | // Empty command group is illegal and generates an 40 | // error 41 | }, 42 | q2); // Secondary/backup queue! 43 | } catch (...) { 44 | } // Discard regular C++ exceptions for this example 45 | return 0; 46 | } 47 | -------------------------------------------------------------------------------- /samples/Ch05_error_handling/fig_5_9_default_handler_proxy.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | using namespace sycl; 7 | 8 | // BEGIN CODE SNIP 9 | 10 | // Our simple asynchronous handler function 11 | auto handle_async_error = [](exception_list elist) { 12 | for (auto& e : elist) { 13 | try { 14 | std::rethrow_exception(e); 15 | } catch (sycl::exception& e) { 16 | // Print information about the asynchronous exception 17 | } catch (...) { 18 | // Print information about non-sycl::exception 19 | } 20 | } 21 | 22 | // Terminate abnormally to make clear to user that 23 | // something unhandled happened 24 | std::terminate(); 25 | }; 26 | 27 | // END CODE SNIP 28 | 29 | void say_device(const queue& q) { 30 | std::cout << "Device : " 31 | << q.get_device().get_info() 32 | << "\n"; 33 | } 34 | 35 | class something_went_wrong {}; 36 | 37 | int main() { 38 | queue q{cpu_selector_v, handle_async_error}; 39 | say_device(q); 40 | 41 | q.submit([&](handler& h) { 42 | h.host_task([]() { throw(something_went_wrong{}); }); 43 | }).wait(); 44 | 45 | return 0; 46 | } 47 | 48 | // Example output: 49 | // Device : Intel(R) Xeon(R) Gold 6128 CPU @ 3.40GHz 50 | // terminate called without an active exception 51 | // Aborted 52 | -------------------------------------------------------------------------------- /samples/Ch06_unified_shared_memory/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Intel Corporation 2 | 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_book_sample( 6 | TEST 7 | TARGET fig_6_5_allocation_styles 8 | SOURCES fig_6_5_allocation_styles.cpp) 9 | 10 | add_book_sample( 11 | TEST 12 | TARGET fig_6_6_usm_explicit_data_movement 13 | SOURCES fig_6_6_usm_explicit_data_movement.cpp) 14 | 15 | add_book_sample( 16 | TEST 17 | TARGET fig_6_7_usm_implicit_data_movement 18 | SOURCES fig_6_7_usm_implicit_data_movement.cpp) 19 | 20 | add_book_sample( 21 | TEST 22 | TARGET fig_6_8_prefetch_memadvise 23 | SOURCES fig_6_8_prefetch_memadvise.cpp) 24 | 25 | add_book_sample( 26 | TEST 27 | TARGET fig_6_9_queries 28 | SOURCES fig_6_9_queries.cpp) 29 | -------------------------------------------------------------------------------- /samples/Ch06_unified_shared_memory/fig_6_2.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | // Named Functions 9 | void *malloc_device(size_t size, const device &dev, 10 | const context &ctxt); 11 | void *malloc_device(size_t size, const queue &q); 12 | void *aligned_alloc_device(size_t alignment, size_t size, 13 | const device &dev, 14 | const context &ctxt); 15 | void *aligned_alloc_device(size_t alignment, size_t size, 16 | const queue &q); 17 | 18 | void *malloc_host(size_t size, const context &ctxt); 19 | void *malloc_host(size_t size, const queue &q); 20 | void *aligned_alloc_host(size_t alignment, size_t size, 21 | const context &ctxt); 22 | void *aligned_alloc_host(size_t alignment, size_t size, 23 | const queue &q); 24 | 25 | void *malloc_shared(size_t size, const device &dev, 26 | const context &ctxt); 27 | void *malloc_shared(size_t size, const queue &q); 28 | void *aligned_alloc_shared(size_t alignment, size_t size, 29 | const device &dev, 30 | const context &ctxt); 31 | void *aligned_alloc_shared(size_t alignment, size_t size, 32 | const queue &q); 33 | 34 | // Single Function 35 | void *malloc(size_t size, const device &dev, 36 | const context &ctxt, usm::alloc kind); 37 | void *malloc(size_t size, const queue &q, usm::alloc kind); 38 | void *aligned_alloc(size_t alignment, size_t size, 39 | const device &dev, const context &ctxt, 40 | usm::alloc kind); 41 | void *aligned_alloc(size_t alignment, size_t size, 42 | const queue &q, usm::alloc kind); 43 | -------------------------------------------------------------------------------- /samples/Ch06_unified_shared_memory/fig_6_3.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | // Named Functions 9 | template 10 | T *malloc_device(size_t Count, const device &Dev, 11 | const context &Ctxt); 12 | template 13 | T *malloc_device(size_t Count, const queue &Q); 14 | template 15 | T *aligned_alloc_device(size_t Alignment, size_t Count, 16 | const device &Dev, 17 | const context &Ctxt); 18 | template 19 | T *aligned_alloc_device(size_t Alignment, size_t Count, 20 | const queue &Q); 21 | 22 | template 23 | T *malloc_host(size_t Count, const context &Ctxt); 24 | template 25 | T *malloc_host(size_t Count, const queue &Q); 26 | template 27 | T *aligned_alloc_host(size_t Alignment, size_t Count, 28 | const context &Ctxt); 29 | template 30 | T *aligned_alloc_host(size_t Alignment, size_t Count, 31 | const queue &Q); 32 | 33 | template 34 | T *malloc_shared(size_t Count, const device &Dev, 35 | const context &Ctxt); 36 | template 37 | T *malloc_shared(size_t Count, const queue &Q); 38 | template 39 | T *aligned_alloc_shared(size_t Alignment, size_t Count, 40 | const device &Dev, 41 | const context &Ctxt); 42 | template 43 | T *aligned_alloc_shared(size_t Alignment, size_t Count, 44 | const queue &Q); 45 | 46 | // Single Function 47 | template 48 | T *malloc(size_t Count, const device &Dev, 49 | const context &Ctxt, usm::alloc Kind); 50 | template 51 | T *malloc(size_t Count, const queue &Q, usm::alloc Kind); 52 | template 53 | T *aligned_alloc(size_t Alignment, size_t Count, 54 | const device &Dev, const context &Ctxt, 55 | usm::alloc Kind); 56 | template 57 | T *aligned_alloc(size_t Alignment, size_t Count, 58 | const queue &Q, usm::alloc Kind); 59 | -------------------------------------------------------------------------------- /samples/Ch06_unified_shared_memory/fig_6_5_allocation_styles.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | using namespace sycl; 7 | constexpr int N = 42; 8 | 9 | int main() { 10 | queue q; 11 | 12 | // Allocate N floats 13 | 14 | // C-style 15 | float *f1 = static_cast(malloc_shared( 16 | N * sizeof(float), q.get_device(), q.get_context())); 17 | 18 | // C++-style 19 | float *f2 = malloc_shared(N, q); 20 | 21 | // C++-allocator-style 22 | usm_allocator alloc(q); 23 | float *f3 = alloc.allocate(N); 24 | 25 | // Free our allocations 26 | free(f1, q.get_context()); 27 | free(f2, q); 28 | alloc.deallocate(f3, N); 29 | 30 | return 0; 31 | } 32 | -------------------------------------------------------------------------------- /samples/Ch06_unified_shared_memory/fig_6_6_usm_explicit_data_movement.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | using namespace sycl; 8 | constexpr int N = 42; 9 | 10 | int main() { 11 | queue q; 12 | 13 | std::array host_array; 14 | int* device_array = malloc_device(N, q); 15 | for (int i = 0; i < N; i++) host_array[i] = N; 16 | 17 | q.submit([&](handler& h) { 18 | // copy host_array to device_array 19 | h.memcpy(device_array, &host_array[0], N * sizeof(int)); 20 | }); 21 | q.wait(); // needed for now (we learn a better way later) 22 | 23 | q.submit([&](handler& h) { 24 | h.parallel_for(N, [=](id<1> i) { device_array[i]++; }); 25 | }); 26 | q.wait(); // needed for now (we learn a better way later) 27 | 28 | q.submit([&](handler& h) { 29 | // copy device_array back to host_array 30 | h.memcpy(&host_array[0], device_array, N * sizeof(int)); 31 | }); 32 | q.wait(); // needed for now (we learn a better way later) 33 | 34 | free(device_array, q); 35 | return 0; 36 | } 37 | -------------------------------------------------------------------------------- /samples/Ch06_unified_shared_memory/fig_6_7_usm_implicit_data_movement.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | using namespace sycl; 7 | constexpr int N = 42; 8 | 9 | int main() { 10 | queue q; 11 | 12 | int* host_array = malloc_host(N, q); 13 | int* shared_array = malloc_shared(N, q); 14 | for (int i = 0; i < N; i++) host_array[i] = i; 15 | 16 | q.submit([&](handler& h) { 17 | h.parallel_for(N, [=](id<1> i) { 18 | // access shared_array and host_array on device 19 | shared_array[i] = host_array[i] + 1; 20 | }); 21 | }); 22 | q.wait(); 23 | 24 | free(shared_array, q); 25 | free(host_array, q); 26 | return 0; 27 | } 28 | -------------------------------------------------------------------------------- /samples/Ch06_unified_shared_memory/fig_6_8_prefetch_memadvise.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | using namespace sycl; 7 | 8 | // Appropriate values depend on your HW 9 | constexpr int BLOCK_SIZE = 42; 10 | constexpr int NUM_BLOCKS = 2500; 11 | constexpr int N = NUM_BLOCKS * BLOCK_SIZE; 12 | 13 | int main() { 14 | queue q; 15 | int *data = malloc_shared(N, q); 16 | int *read_only_data = malloc_shared(BLOCK_SIZE, q); 17 | 18 | for (int i = 0; i < N; i++) { 19 | data[i] = -i; 20 | } 21 | 22 | // Never updated after initialization 23 | for (int i = 0; i < BLOCK_SIZE; i++) { 24 | read_only_data[i] = i; 25 | } 26 | 27 | // Mark this data as "read only" so the runtime can copy 28 | // it to the device instead of migrating it from the host. 29 | // Real values will be documented by your backend. 30 | int HW_SPECIFIC_ADVICE_RO = 0; 31 | q.mem_advise(read_only_data, BLOCK_SIZE, 32 | HW_SPECIFIC_ADVICE_RO); 33 | event e = q.prefetch(data, BLOCK_SIZE * sizeof(int)); 34 | 35 | for (int b = 0; b < NUM_BLOCKS; b++) { 36 | q.parallel_for(range{BLOCK_SIZE}, e, [=](id<1> i) { 37 | data[b * BLOCK_SIZE + i] += read_only_data[i]; 38 | }); 39 | if ((b + 1) < NUM_BLOCKS) { 40 | // Prefetch next block 41 | e = q.prefetch(data + (b + 1) * BLOCK_SIZE, 42 | BLOCK_SIZE * sizeof(int)); 43 | } 44 | } 45 | q.wait(); 46 | 47 | free(data, q); 48 | free(read_only_data, q); 49 | return 0; 50 | } 51 | -------------------------------------------------------------------------------- /samples/Ch06_unified_shared_memory/fig_6_9_queries.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | using namespace sycl; 7 | namespace dinfo = info::device; 8 | constexpr int N = 42; 9 | 10 | template 11 | void foo(T data, id<1> i) { 12 | data[i] = N; 13 | } 14 | 15 | int main() { 16 | queue q; 17 | auto dev = q.get_device(); 18 | auto ctxt = q.get_context(); 19 | bool usm_shared = dev.has(aspect::usm_shared_allocations); 20 | bool usm_device = dev.has(aspect::usm_device_allocations); 21 | bool use_USM = usm_shared || usm_device; 22 | 23 | if (use_USM) { 24 | int *data; 25 | if (usm_shared) { 26 | data = malloc_shared(N, q); 27 | } else /* use device allocations */ { 28 | data = malloc_device(N, q); 29 | } 30 | std::cout << "Using USM with " 31 | << ((get_pointer_type(data, ctxt) == 32 | usm::alloc::shared) 33 | ? "shared" 34 | : "device") 35 | << " allocations on " 36 | << get_pointer_device(data, ctxt) 37 | .get_info() 38 | << "\n"; 39 | q.parallel_for(N, [=](id<1> i) { foo(data, i); }); 40 | q.wait(); 41 | free(data, q); 42 | } else /* use buffers */ { 43 | buffer data{range{N}}; 44 | q.submit([&](handler &h) { 45 | accessor a(data, h); 46 | h.parallel_for(N, [=](id<1> i) { foo(a, i); }); 47 | }); 48 | q.wait(); 49 | } 50 | return 0; 51 | } 52 | -------------------------------------------------------------------------------- /samples/Ch07_buffers/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Intel Corporation 2 | 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_book_sample( 6 | TEST 7 | TARGET fig_7_2_3_4_creating_buffers 8 | SOURCES fig_7_2_3_4_creating_buffers.cpp) 9 | 10 | add_book_sample( 11 | TEST 12 | TARGET fig_7_5_buffer_properties 13 | SOURCES fig_7_5_buffer_properties.cpp) 14 | 15 | add_book_sample( 16 | TEST 17 | TARGET fig_7_8_accessors_simple 18 | SOURCES fig_7_8_accessors_simple.cpp) 19 | 20 | add_book_sample( 21 | TEST 22 | TARGET fig_7_10_accessors 23 | SOURCES fig_7_10_accessors.cpp) 24 | -------------------------------------------------------------------------------- /samples/Ch07_buffers/fig_7_1.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | template 9 | class buffer; 10 | -------------------------------------------------------------------------------- /samples/Ch07_buffers/fig_7_10_accessors.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // BEGIN CODE SNIP 6 | #include 7 | #include 8 | using namespace sycl; 9 | constexpr int N = 42; 10 | 11 | int main() { 12 | queue q; 13 | 14 | // Create 3 buffers of 42 ints 15 | buffer buf_a{range{N}}; 16 | buffer buf_b{range{N}}; 17 | buffer buf_c{range{N}}; 18 | 19 | accessor pc{buf_c}; 20 | 21 | q.submit([&](handler &h) { 22 | accessor a{buf_a, h, write_only, no_init}; 23 | accessor b{buf_b, h, write_only, no_init}; 24 | accessor c{buf_c, h, write_only, no_init}; 25 | h.parallel_for(N, [=](id<1> i) { 26 | a[i] = 1; 27 | b[i] = 40; 28 | c[i] = 0; 29 | }); 30 | }); 31 | q.submit([&](handler &h) { 32 | accessor a{buf_a, h, read_only}; 33 | accessor b{buf_b, h, read_only}; 34 | accessor c{buf_c, h, read_write}; 35 | h.parallel_for(N, 36 | [=](id<1> i) { c[i] += a[i] + b[i]; }); 37 | }); 38 | q.submit([&](handler &h) { 39 | h.require(pc); 40 | h.parallel_for(N, [=](id<1> i) { pc[i]++; }); 41 | }); 42 | 43 | host_accessor result{buf_c, read_only}; 44 | 45 | for (int i = 0; i < N; i++) { 46 | assert(result[i] == N); 47 | } 48 | return 0; 49 | } 50 | // END CODE SNIP 51 | -------------------------------------------------------------------------------- /samples/Ch07_buffers/fig_7_2_3_4_creating_buffers.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | using namespace sycl; 7 | 8 | int main() { 9 | // BEGIN CODE SNIP 10 | // Create a buffer of 2x5 ints using the default allocator 11 | buffer> b1{range<2>{2, 5}}; 12 | 13 | // Create a buffer of 2x5 ints using the default allocator 14 | // and CTAD for range 15 | buffer b2{range{2, 5}}; 16 | 17 | // Create a buffer of 20 floats using a 18 | // default-constructed std::allocator 19 | buffer> b3{range{20}}; 20 | 21 | // Create a buffer of 20 floats using a passed-in 22 | // allocator 23 | std::allocator myFloatAlloc; 24 | buffer> b4{range(20), 25 | myFloatAlloc}; 26 | // END CODE SNIP 27 | 28 | // BEGIN CODE SNIP 29 | // Create a buffer of 4 doubles and initialize it from a 30 | // host pointer 31 | double myDoubles[4] = {1.1, 2.2, 3.3, 4.4}; 32 | buffer b5{myDoubles, range{4}}; 33 | 34 | // Create a buffer of 5 doubles and initialize it from a 35 | // host pointer to const double 36 | const double myConstDbls[5] = {1.0, 2.0, 3.0, 4.0, 5.0}; 37 | buffer b6{myConstDbls, range{5}}; 38 | 39 | // Create a buffer from a shared pointer to int 40 | auto sharedPtr = std::make_shared(42); 41 | buffer b7{sharedPtr, range{1}}; 42 | // END CODE SNIP 43 | 44 | // BEGIN CODE SNIP 45 | // Create a buffer of ints from an input iterator 46 | std::vector myVec; 47 | buffer b8{myVec.begin(), myVec.end()}; 48 | buffer b9{myVec}; 49 | 50 | // Create a buffer of 2x5 ints and 2 non-overlapping 51 | // sub-buffers of 5 ints. 52 | buffer b10{range{2, 5}}; 53 | buffer b11{b10, id{0, 0}, range{1, 5}}; 54 | buffer b12{b10, id{1, 0}, range{1, 5}}; 55 | // END CODE SNIP 56 | 57 | return 0; 58 | } 59 | -------------------------------------------------------------------------------- /samples/Ch07_buffers/fig_7_5_buffer_properties.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | using namespace sycl; 8 | 9 | int main() { 10 | // BEGIN CODE SNIP 11 | queue q; 12 | int my_ints[42]; 13 | 14 | // Create a buffer of 42 ints 15 | buffer b{range(42)}; 16 | 17 | // Create a buffer of 42 ints, initialize with a host 18 | // pointer, and add the use_host_pointer property 19 | buffer b1{my_ints, 20 | range(42), 21 | {property::buffer::use_host_ptr{}}}; 22 | 23 | // Create a buffer of 42 ints, initialize with a host 24 | // pointer, and add the use_mutex property 25 | std::mutex myMutex; 26 | buffer b2{my_ints, 27 | range(42), 28 | {property::buffer::use_mutex{myMutex}}}; 29 | // Retrieve a pointer to the mutex used by this buffer 30 | auto mutexPtr = 31 | b2.get_property() 32 | .get_mutex_ptr(); 33 | // Lock the mutex until we exit scope 34 | std::lock_guard guard{*mutexPtr}; 35 | 36 | // Create a context-bound buffer of 42 ints, initialized 37 | // from a host pointer 38 | buffer b3{ 39 | my_ints, 40 | range(42), 41 | {property::buffer::context_bound{q.get_context()}}}; 42 | // END CODE SNIP 43 | 44 | return 0; 45 | } 46 | -------------------------------------------------------------------------------- /samples/Ch07_buffers/fig_7_8_accessors_simple.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // BEGIN CODE SNIP 6 | #include 7 | #include 8 | using namespace sycl; 9 | constexpr int N = 42; 10 | 11 | int main() { 12 | queue q; 13 | // Create 3 buffers of 42 ints 14 | buffer a_buf{range{N}}; 15 | buffer b_buf{range{N}}; 16 | buffer c_buf{range{N}}; 17 | accessor pc{c_buf}; 18 | 19 | q.submit([&](handler &h) { 20 | accessor a{a_buf, h}; 21 | accessor b{b_buf, h}; 22 | accessor c{c_buf, h}; 23 | h.parallel_for(N, [=](id<1> i) { 24 | a[i] = 1; 25 | b[i] = 40; 26 | c[i] = 0; 27 | }); 28 | }); 29 | q.submit([&](handler &h) { 30 | accessor a{a_buf, h}; 31 | accessor b{b_buf, h}; 32 | accessor c{c_buf, h}; 33 | h.parallel_for(N, 34 | [=](id<1> i) { c[i] += a[i] + b[i]; }); 35 | }); 36 | q.submit([&](handler &h) { 37 | h.require(pc); 38 | h.parallel_for(N, [=](id<1> i) { pc[i]++; }); 39 | }); 40 | 41 | host_accessor result{c_buf}; 42 | for (int i = 0; i < N; i++) { 43 | assert(result[i] == N); 44 | } 45 | return 0; 46 | } 47 | // END CODE SNIP 48 | -------------------------------------------------------------------------------- /samples/Ch08_scheduling_kernels_and_data_movement/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Intel Corporation 2 | 3 | # SPDX-License-Identifier: MIT 4 | 5 | if(NOT WITHCUDA) 6 | # TEMPORARILY DISABLE: doesn't work with CUDA backend. 7 | add_book_sample( 8 | TEST 9 | TARGET fig_8_3_linear_dependence_in_order 10 | SOURCES fig_8_3_linear_dependence_in_order.cpp) 11 | endif() 12 | 13 | if(NOT WITHCUDA) 14 | # TEMPORARILY DISABLE: doesn't work with CUDA backend. 15 | add_book_sample( 16 | TEST 17 | TARGET fig_8_4_linear_dependence_events 18 | SOURCES fig_8_4_linear_dependence_events.cpp) 19 | endif() 20 | 21 | add_book_sample( 22 | TEST 23 | TARGET fig_8_5_linear_dependence_buffers 24 | SOURCES fig_8_5_linear_dependence_buffers.cpp) 25 | 26 | if(NOT WITHCUDA) 27 | # TEMPORARILY DISABLE: doesn't work with CUDA backend. 28 | add_book_sample( 29 | TEST 30 | TARGET fig_8_6_y_in_order 31 | SOURCES fig_8_6_y_in_order.cpp) 32 | endif() 33 | 34 | if(NOT WITHCUDA) 35 | # TEMPORARILY DISABLE: doesn't work with CUDA backend. 36 | add_book_sample( 37 | TEST 38 | TARGET fig_8_7_y_events 39 | SOURCES fig_8_7_y_events.cpp) 40 | endif() 41 | 42 | add_book_sample( 43 | TEST 44 | TARGET fig_8_8_y_buffers 45 | SOURCES fig_8_8_y_buffers.cpp) 46 | 47 | 48 | -------------------------------------------------------------------------------- /samples/Ch08_scheduling_kernels_and_data_movement/fig_8_3_linear_dependence_in_order.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | using namespace sycl; 7 | constexpr int N = 42; 8 | 9 | int main() { 10 | queue q{property::queue::in_order()}; 11 | 12 | int *data = malloc_shared(N, q); 13 | 14 | q.parallel_for(N, [=](id<1> i) { data[i] = 1; }); 15 | 16 | q.single_task([=]() { 17 | for (int i = 1; i < N; i++) data[0] += data[i]; 18 | }); 19 | q.wait(); 20 | 21 | assert(data[0] == N); 22 | return 0; 23 | } 24 | -------------------------------------------------------------------------------- /samples/Ch08_scheduling_kernels_and_data_movement/fig_8_4_linear_dependence_events.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | using namespace sycl; 7 | constexpr int N = 42; 8 | 9 | int main() { 10 | queue q; 11 | 12 | int *data = malloc_shared(N, q); 13 | 14 | auto e = q.parallel_for(N, [=](id<1> i) { data[i] = 1; }); 15 | 16 | q.submit([&](handler &h) { 17 | h.depends_on(e); 18 | h.single_task([=]() { 19 | for (int i = 1; i < N; i++) data[0] += data[i]; 20 | }); 21 | }); 22 | q.wait(); 23 | 24 | assert(data[0] == N); 25 | return 0; 26 | } 27 | -------------------------------------------------------------------------------- /samples/Ch08_scheduling_kernels_and_data_movement/fig_8_5_linear_dependence_buffers.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | using namespace sycl; 7 | constexpr int N = 42; 8 | 9 | int main() { 10 | queue q; 11 | 12 | buffer data{range{N}}; 13 | 14 | q.submit([&](handler &h) { 15 | accessor a{data, h}; 16 | h.parallel_for(N, [=](id<1> i) { a[i] = 1; }); 17 | }); 18 | 19 | q.submit([&](handler &h) { 20 | accessor a{data, h}; 21 | h.single_task([=]() { 22 | for (int i = 1; i < N; i++) a[0] += a[i]; 23 | }); 24 | }); 25 | 26 | host_accessor h_a{data}; 27 | assert(h_a[0] == N); 28 | return 0; 29 | } 30 | -------------------------------------------------------------------------------- /samples/Ch08_scheduling_kernels_and_data_movement/fig_8_6_y_in_order.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | using namespace sycl; 7 | constexpr int N = 42; 8 | 9 | int main() { 10 | queue q{property::queue::in_order()}; 11 | 12 | int *data1 = malloc_shared(N, q); 13 | int *data2 = malloc_shared(N, q); 14 | 15 | q.parallel_for(N, [=](id<1> i) { data1[i] = 1; }); 16 | 17 | q.parallel_for(N, [=](id<1> i) { data2[i] = 2; }); 18 | 19 | q.parallel_for(N, [=](id<1> i) { data1[i] += data2[i]; }); 20 | 21 | q.single_task([=]() { 22 | for (int i = 1; i < N; i++) data1[0] += data1[i]; 23 | 24 | data1[0] /= 3; 25 | }); 26 | q.wait(); 27 | 28 | assert(data1[0] == N); 29 | return 0; 30 | } 31 | -------------------------------------------------------------------------------- /samples/Ch08_scheduling_kernels_and_data_movement/fig_8_7_y_events.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | using namespace sycl; 7 | constexpr int N = 42; 8 | 9 | int main() { 10 | queue q; 11 | 12 | int *data1 = malloc_shared(N, q); 13 | int *data2 = malloc_shared(N, q); 14 | 15 | auto e1 = 16 | q.parallel_for(N, [=](id<1> i) { data1[i] = 1; }); 17 | 18 | auto e2 = 19 | q.parallel_for(N, [=](id<1> i) { data2[i] = 2; }); 20 | 21 | auto e3 = q.parallel_for( 22 | range{N}, {e1, e2}, 23 | [=](id<1> i) { data1[i] += data2[i]; }); 24 | 25 | q.single_task(e3, [=]() { 26 | for (int i = 1; i < N; i++) data1[0] += data1[i]; 27 | 28 | data1[0] /= 3; 29 | }); 30 | q.wait(); 31 | 32 | assert(data1[0] == N); 33 | return 0; 34 | } 35 | -------------------------------------------------------------------------------- /samples/Ch08_scheduling_kernels_and_data_movement/fig_8_8_y_buffers.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | using namespace sycl; 7 | constexpr int N = 42; 8 | 9 | int main() { 10 | queue q; 11 | 12 | buffer data1{range{N}}; 13 | buffer data2{range{N}}; 14 | 15 | q.submit([&](handler &h) { 16 | accessor a{data1, h}; 17 | h.parallel_for(N, [=](id<1> i) { a[i] = 1; }); 18 | }); 19 | 20 | q.submit([&](handler &h) { 21 | accessor b{data2, h}; 22 | h.parallel_for(N, [=](id<1> i) { b[i] = 2; }); 23 | }); 24 | 25 | q.submit([&](handler &h) { 26 | accessor a{data1, h}; 27 | accessor b{data2, h, read_only}; 28 | h.parallel_for(N, [=](id<1> i) { a[i] += b[i]; }); 29 | }); 30 | 31 | q.submit([&](handler &h) { 32 | accessor a{data1, h}; 33 | h.single_task([=]() { 34 | for (int i = 1; i < N; i++) a[0] += a[i]; 35 | 36 | a[0] /= 3; 37 | }); 38 | }); 39 | 40 | host_accessor h_a{data1}; 41 | assert(h_a[0] == N); 42 | return 0; 43 | } 44 | -------------------------------------------------------------------------------- /samples/Ch09_communication_and_sychronization/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Intel Corporation 2 | 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_book_sample( 6 | TEST 7 | TARGET fig_9_4_naive_matmul 8 | SOURCES matmul_harness.cpp fig_9_4_naive_matmul.cpp) 9 | 10 | add_book_sample( 11 | TEST 12 | TARGET fig_9_7_local_accessors 13 | SOURCES fig_9_7_local_accessors.cpp) 14 | 15 | add_book_sample( 16 | TEST 17 | TARGET fig_9_8_ndrange_tiled_matmul 18 | SOURCES matmul_harness.cpp fig_9_8_ndrange_tiled_matmul.cpp) 19 | 20 | add_book_sample( 21 | TEST 22 | TARGET fig_9_9_sub_group_barrier 23 | SOURCES fig_9_9_sub_group_barrier.cpp) 24 | 25 | add_book_sample( 26 | TEST 27 | TARGET fig_9_11_matmul_broadcast 28 | SOURCES matmul_harness.cpp fig_9_11_matmul_broadcast.cpp) 29 | 30 | add_book_sample( 31 | TARGET fig_9_12_ndrange_sub_group_matmul 32 | SOURCES matmul_harness.cpp fig_9_12_ndrange_sub_group_matmul.cpp) 33 | -------------------------------------------------------------------------------- /samples/Ch09_communication_and_sychronization/fig_9_4_naive_matmul.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | using namespace sycl; 8 | 9 | extern const int matrixSize = 128; 10 | static const int iterations = 16; 11 | 12 | template 13 | double run_sycl(const std::vector& vecA, 14 | const std::vector& vecB, 15 | std::vector& vecC) { 16 | using ns = std::chrono::nanoseconds; 17 | ns::rep best_time = std::numeric_limits::max(); 18 | 19 | const int M = matrixSize; 20 | const int N = matrixSize; 21 | const int K = matrixSize; 22 | 23 | std::fill(vecC.begin(), vecC.end(), (T)0); 24 | 25 | buffer bufA{vecA.data(), range<2>{M, K}}; 26 | buffer bufB{vecB.data(), range<2>{K, N}}; 27 | buffer bufC{vecC.data(), range<2>{M, N}}; 28 | 29 | queue q; 30 | std::cout << "Running on device: " 31 | << q.get_device().get_info() 32 | << "\n"; 33 | 34 | for (int i = 0; i < iterations; ++i) { 35 | auto start = std::chrono::steady_clock::now(); 36 | 37 | q.submit([&](handler& h) { 38 | accessor matrixA{bufA, h}; 39 | accessor matrixB{bufB, h}; 40 | accessor matrixC{bufC, h}; 41 | 42 | // BEGIN CODE SNIP 43 | h.parallel_for(range{M, N}, [=](id<2> id) { 44 | int m = id[0]; 45 | int n = id[1]; 46 | 47 | // Template type T is the type of data stored 48 | // in the matrix 49 | T sum = 0; 50 | for (int k = 0; k < K; k++) { 51 | sum += matrixA[m][k] * matrixB[k][n]; 52 | } 53 | 54 | matrixC[m][n] = sum; 55 | }); 56 | // END CODE SNIP 57 | }); 58 | q.wait(); 59 | 60 | auto duration = 61 | std::chrono::steady_clock::now() - start; 62 | auto time = 63 | std::chrono::duration_cast(duration).count(); 64 | 65 | best_time = std::min(time, best_time); 66 | } 67 | 68 | double best_seconds = (double)best_time / 1e9; 69 | 70 | return best_seconds; 71 | } 72 | 73 | template double run_sycl( 74 | const std::vector& vecA, 75 | const std::vector& vecB, 76 | std::vector& vecC); 77 | -------------------------------------------------------------------------------- /samples/Ch09_communication_and_sychronization/fig_9_7_local_accessors.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | using namespace sycl; 9 | 10 | int main() { 11 | constexpr size_t size = 16; 12 | std::array data; 13 | 14 | for (int i = 0; i < size; i++) data[i] = i; 15 | 16 | { 17 | buffer dataBuf{data}; 18 | 19 | queue q{default_selector_v}; 20 | std::cout 21 | << "Running on device: " 22 | << q.get_device().get_info() 23 | << "\n"; 24 | 25 | q.submit([&](handler& h) { 26 | // BEGIN CODE SNIP 27 | // This is a typical global accessor. 28 | accessor dataAcc{dataBuf, h}; 29 | 30 | // This is a 1D local accessor consisting of 16 ints: 31 | auto localIntAcc = local_accessor(16, h); 32 | 33 | // This is a 2D local accessor consisting of 4 x 4 34 | // floats: 35 | auto localFloatAcc = 36 | local_accessor({4, 4}, h); 37 | 38 | h.parallel_for( 39 | nd_range<1>{{size}, {16}}, [=](nd_item<1> item) { 40 | auto index = item.get_global_id(); 41 | auto local_index = item.get_local_id(); 42 | 43 | // Within a kernel, a local accessor may be read 44 | // from and written to like any other accessor. 45 | localIntAcc[local_index] = dataAcc[index] + 1; 46 | dataAcc[index] = localIntAcc[local_index]; 47 | }); 48 | // END CODE SNIP 49 | }); 50 | } 51 | 52 | for (int i = 0; i < size; i++) { 53 | if (data[i] != i + 1) { 54 | std::cout << "Results did not validate at index " << i 55 | << "!\n"; 56 | return -1; 57 | } 58 | } 59 | 60 | std::cout << "Success!\n"; 61 | return 0; 62 | } 63 | -------------------------------------------------------------------------------- /samples/Ch09_communication_and_sychronization/fig_9_9_sub_group_barrier.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | using namespace sycl; 9 | 10 | int main() { 11 | constexpr size_t size = 16; 12 | std::array data; 13 | 14 | for (int i = 0; i < size; i++) data[i] = i; 15 | 16 | { 17 | buffer data_buf{data}; 18 | 19 | queue q; 20 | std::cout 21 | << "Running on device: " 22 | << q.get_device().get_info() 23 | << "\n"; 24 | 25 | q.submit([&](handler& h) { 26 | accessor data_acc{data_buf, h}; 27 | 28 | // BEGIN CODE SNIP 29 | h.parallel_for( 30 | nd_range{{size}, {16}}, [=](nd_item<1> item) { 31 | auto sg = item.get_sub_group(); 32 | group_barrier(sg); 33 | // ... 34 | auto index = item.get_global_id(); 35 | data_acc[index] = data_acc[index] + 1; 36 | }); 37 | // END CODE SNIP 38 | }); 39 | } 40 | 41 | for (int i = 0; i < size; i++) { 42 | if (data[i] != i + 1) { 43 | std::cout << "Results did not validate at index " << i 44 | << "!\n"; 45 | return -1; 46 | } 47 | } 48 | 49 | std::cout << "Success!\n"; 50 | return 0; 51 | } 52 | -------------------------------------------------------------------------------- /samples/Ch10_defining_kernels/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Intel Corporation 2 | 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_book_sample( 6 | TEST 7 | TARGET fig_10_2_kernel_lambda 8 | SOURCES fig_10_2_kernel_lambda.cpp) 9 | 10 | add_book_sample( 11 | TEST 12 | TARGET fig_10_3_optional_kernel_lambda_elements 13 | SOURCES fig_10_3_optional_kernel_lambda_elements.cpp) 14 | 15 | add_book_sample( 16 | TEST 17 | TARGET fig_10_4_named_kernel_lambda 18 | SOURCES fig_10_4_named_kernel_lambda.cpp) 19 | 20 | add_book_sample( 21 | TEST 22 | TARGET fig_10_5_unnamed_kernel_lambda 23 | SOURCES fig_10_5_unnamed_kernel_lambda.cpp) 24 | 25 | add_book_sample( 26 | TEST 27 | TARGET fig_10_6_kernel_functor 28 | SOURCES fig_10_6_kernel_functor.cpp) 29 | 30 | add_book_sample( 31 | TEST 32 | TARGET fig_10_7_optional_kernel_functor_elements 33 | SOURCES fig_10_7_optional_kernel_functor_elements.cpp) 34 | 35 | add_book_sample( 36 | TEST 37 | TARGET fig_10_8_use_kernel_bundle 38 | SOURCES fig_10_8_use_kernel_bundle.cpp) 39 | 40 | add_book_sample( 41 | TEST 42 | TARGET fig_10_9_use_specific_kernel_bundle 43 | SOURCES fig_10_9_use_specific_kernel_bundle.cpp) 44 | 45 | add_book_sample( 46 | TEST 47 | TARGET fig_10_10_kernel_query 48 | SOURCES fig_10_10_kernel_query.cpp) 49 | -------------------------------------------------------------------------------- /samples/Ch10_defining_kernels/fig_10_2_kernel_lambda.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | using namespace sycl; 9 | 10 | int main() { 11 | constexpr size_t size = 16; 12 | std::array data; 13 | 14 | for (int i = 0; i < size; i++) { 15 | data[i] = i; 16 | } 17 | 18 | { 19 | buffer data_buf{data}; 20 | 21 | queue q; 22 | std::cout 23 | << "Running on device: " 24 | << q.get_device().get_info() 25 | << "\n"; 26 | 27 | q.submit([&](handler& h) { 28 | accessor data_acc{data_buf, h}; 29 | // BEGIN CODE SNIP 30 | h.parallel_for( 31 | size, 32 | // This is the start of a kernel lambda 33 | // expression: 34 | [=](id<1> i) { data_acc[i] = data_acc[i] + 1; } 35 | // This is the end of the kernel lambda 36 | // expression. 37 | ); 38 | // END CODE SNIP 39 | }); 40 | } 41 | 42 | for (int i = 0; i < size; i++) { 43 | if (data[i] != i + 1) { 44 | std::cout << "Results did not validate at index " << i 45 | << "!\n"; 46 | return -1; 47 | } 48 | } 49 | 50 | std::cout << "Success!\n"; 51 | return 0; 52 | } 53 | -------------------------------------------------------------------------------- /samples/Ch10_defining_kernels/fig_10_3_optional_kernel_lambda_elements.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | using namespace sycl; 9 | 10 | int main() { 11 | constexpr size_t size = 16; 12 | std::array data; 13 | 14 | for (int i = 0; i < size; i++) { 15 | data[i] = i; 16 | } 17 | 18 | { 19 | buffer data_buf{data}; 20 | 21 | queue q; 22 | std::cout 23 | << "Running on device: " 24 | << q.get_device().get_info() 25 | << "\n"; 26 | 27 | q.submit([&](handler& h) { 28 | // BEGIN CODE SNIP 29 | accessor data_acc{data_buf, h}; 30 | 31 | h.parallel_for( 32 | nd_range{{size}, {8}}, 33 | [=](nd_item<1> item) noexcept 34 | [[sycl::reqd_work_group_size(8)]] -> void { 35 | auto i = item.get_global_id(); 36 | data_acc[i] = data_acc[i] + 1; 37 | }); 38 | }); 39 | // END CODE SNIP 40 | } 41 | 42 | for (int i = 0; i < size; i++) { 43 | if (data[i] != i + 1) { 44 | std::cout << "Results did not validate at index " << i 45 | << "!\n"; 46 | return -1; 47 | } 48 | } 49 | 50 | std::cout << "Success!\n"; 51 | return 0; 52 | } 53 | -------------------------------------------------------------------------------- /samples/Ch10_defining_kernels/fig_10_4_named_kernel_lambda.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | using namespace sycl; 9 | 10 | class Add; 11 | 12 | int main() { 13 | constexpr size_t size = 16; 14 | std::array data; 15 | 16 | for (int i = 0; i < size; i++) { 17 | data[i] = i; 18 | } 19 | 20 | { 21 | buffer data_buf{data}; 22 | 23 | queue q; 24 | std::cout 25 | << "Running on device: " 26 | << q.get_device().get_info() 27 | << "\n"; 28 | 29 | q.submit([&](handler& h) { 30 | accessor data_acc{data_buf, h}; 31 | 32 | // BEGIN CODE SNIP 33 | // In this example, "class Add" names the kernel 34 | // lambda expression. 35 | h.parallel_for(size, [=](id<1> i) { 36 | data_acc[i] = data_acc[i] + 1; 37 | }); 38 | // END CODE SNIP 39 | }); 40 | } 41 | 42 | for (int i = 0; i < size; i++) { 43 | if (data[i] != i + 1) { 44 | std::cout << "Results did not validate at index " << i 45 | << "!\n"; 46 | return -1; 47 | } 48 | } 49 | 50 | std::cout << "Success!\n"; 51 | return 0; 52 | } 53 | -------------------------------------------------------------------------------- /samples/Ch10_defining_kernels/fig_10_5_unnamed_kernel_lambda.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | using namespace sycl; 9 | 10 | int main() { 11 | constexpr size_t size = 16; 12 | std::array data; 13 | 14 | for (int i = 0; i < size; i++) { 15 | data[i] = i; 16 | } 17 | 18 | { 19 | buffer data_buf{data}; 20 | 21 | queue q; 22 | std::cout 23 | << "Running on device: " 24 | << q.get_device().get_info() 25 | << "\n"; 26 | 27 | q.submit([&](handler& h) { 28 | accessor data_acc{data_buf, h}; 29 | 30 | // In many cases the explicit kernel name template 31 | // parameter is not required. 32 | // BEGIN CODE SNIP 33 | h.parallel_for(size, [=](id<1> i) { 34 | data_acc[i] = data_acc[i] + 1; 35 | }); 36 | // END CODE SNIP 37 | }); 38 | } 39 | 40 | for (int i = 0; i < size; i++) { 41 | if (data[i] != i + 1) { 42 | std::cout << "Results did not validate at index " << i 43 | << "!\n"; 44 | return -1; 45 | } 46 | } 47 | 48 | std::cout << "Success!\n"; 49 | return 0; 50 | } 51 | -------------------------------------------------------------------------------- /samples/Ch10_defining_kernels/fig_10_6_kernel_functor.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | using namespace sycl; 9 | 10 | // BEGIN CODE SNIP 11 | class Add { 12 | public: 13 | Add(accessor acc) : data_acc(acc) {} 14 | void operator()(id<1> i) const { 15 | data_acc[i] = data_acc[i] + 1; 16 | } 17 | 18 | private: 19 | accessor data_acc; 20 | }; 21 | 22 | int main() { 23 | constexpr size_t size = 16; 24 | std::array data; 25 | 26 | for (int i = 0; i < size; i++) { 27 | data[i] = i; 28 | } 29 | 30 | { 31 | buffer data_buf{data}; 32 | 33 | queue q; 34 | std::cout 35 | << "Running on device: " 36 | << q.get_device().get_info() 37 | << "\n"; 38 | 39 | q.submit([&](handler& h) { 40 | accessor data_acc{data_buf, h}; 41 | h.parallel_for(size, Add(data_acc)); 42 | }); 43 | } 44 | // END CODE SNIP 45 | 46 | for (int i = 0; i < size; i++) { 47 | if (data[i] != i + 1) { 48 | std::cout << "Results did not validate at index " << i 49 | << "!\n"; 50 | return -1; 51 | } 52 | } 53 | 54 | std::cout << "Success!\n"; 55 | return 0; 56 | } 57 | -------------------------------------------------------------------------------- /samples/Ch10_defining_kernels/fig_10_7_optional_kernel_functor_elements.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | using namespace sycl; 9 | 10 | // BEGIN CODE SNIP 11 | class AddWithAttribute { 12 | public: 13 | AddWithAttribute(accessor acc) : data_acc(acc) {} 14 | [[sycl::reqd_work_group_size(8)]] void operator()( 15 | nd_item<1> item) const { 16 | auto i = item.get_global_id(); 17 | data_acc[i] = data_acc[i] + 1; 18 | } 19 | 20 | private: 21 | accessor data_acc; 22 | }; 23 | 24 | class MulWithAttribute { 25 | public: 26 | MulWithAttribute(accessor acc) : data_acc(acc) {} 27 | void operator() [[sycl::reqd_work_group_size(8)]] ( 28 | nd_item<1> item) const { 29 | auto i = item.get_global_id(); 30 | data_acc[i] = data_acc[i] * 2; 31 | } 32 | 33 | private: 34 | accessor data_acc; 35 | }; 36 | // END CODE SNIP 37 | 38 | int main() { 39 | constexpr size_t size = 16; 40 | std::array data; 41 | 42 | for (int i = 0; i < size; i++) { 43 | data[i] = i; 44 | } 45 | 46 | { 47 | buffer data_buf{data}; 48 | 49 | queue q; 50 | std::cout 51 | << "Running on device: " 52 | << q.get_device().get_info() 53 | << "\n"; 54 | 55 | q.submit([&](handler& h) { 56 | accessor data_acc{data_buf, h}; 57 | h.parallel_for(nd_range{{size}, {8}}, 58 | AddWithAttribute(data_acc)); 59 | }); 60 | 61 | q.submit([&](handler& h) { 62 | accessor data_acc{data_buf, h}; 63 | h.parallel_for(nd_range{{size}, {8}}, 64 | MulWithAttribute(data_acc)); 65 | }); 66 | } 67 | 68 | for (int i = 0; i < size; i++) { 69 | if (data[i] != (i + 1) * 2) { 70 | std::cout << "Results did not validate at index " << i 71 | << "!\n"; 72 | return -1; 73 | } 74 | } 75 | 76 | std::cout << "Success!\n"; 77 | return 0; 78 | } 79 | -------------------------------------------------------------------------------- /samples/Ch10_defining_kernels/fig_10_8_use_kernel_bundle.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | using namespace sycl; 9 | 10 | int main() { 11 | constexpr size_t size = 16; 12 | std::array data; 13 | 14 | for (int i = 0; i < size; i++) { 15 | data[i] = i; 16 | } 17 | 18 | { 19 | buffer data_buf{data}; 20 | 21 | queue q; 22 | std::cout 23 | << "Running on device: " 24 | << q.get_device().get_info() 25 | << "\n"; 26 | 27 | // BEGIN CODE SNIP 28 | auto kb = get_kernel_bundle( 29 | q.get_context()); 30 | 31 | std::cout 32 | << "All kernel compilation should be done now.\n"; 33 | 34 | q.submit([&](handler& h) { 35 | // Use the pre-compiled kernel from the kernel bundle. 36 | h.use_kernel_bundle(kb); 37 | 38 | accessor data_acc{data_buf, h}; 39 | h.parallel_for(range{size}, [=](id<1> i) { 40 | data_acc[i] = data_acc[i] + 1; 41 | }); 42 | }); 43 | // END CODE SNIP 44 | } 45 | 46 | for (int i = 0; i < size; i++) { 47 | if (data[i] != i + 1) { 48 | std::cout << "Results did not validate at index " << i 49 | << "!\n"; 50 | return -1; 51 | } 52 | } 53 | 54 | std::cout << "Success!\n"; 55 | return 0; 56 | } 57 | -------------------------------------------------------------------------------- /samples/Ch10_defining_kernels/fig_10_9_use_specific_kernel_bundle.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | using namespace sycl; 9 | 10 | class Add; 11 | 12 | int main() { 13 | constexpr size_t size = 16; 14 | std::array data; 15 | 16 | for (int i = 0; i < size; i++) { 17 | data[i] = i; 18 | } 19 | 20 | { 21 | buffer data_buf{data}; 22 | 23 | queue q; 24 | std::cout 25 | << "Running on device: " 26 | << q.get_device().get_info() 27 | << "\n"; 28 | 29 | // BEGIN CODE SNIP 30 | auto kid = get_kernel_id(); 31 | auto kb = get_kernel_bundle( 32 | q.get_context(), {q.get_device()}, {kid}); 33 | 34 | std::cout << "Kernel compilation should be done now.\n"; 35 | 36 | q.submit([&](handler& h) { 37 | // Use the pre-compiled kernel from the kernel bundle. 38 | h.use_kernel_bundle(kb); 39 | 40 | accessor data_acc{data_buf, h}; 41 | h.parallel_for(range{size}, [=](id<1> i) { 42 | data_acc[i] = data_acc[i] + 1; 43 | }); 44 | }); 45 | // END CODE SNIP 46 | } 47 | 48 | for (int i = 0; i < size; i++) { 49 | if (data[i] != i + 1) { 50 | std::cout << "Results did not validate at index " << i 51 | << "!\n"; 52 | return -1; 53 | } 54 | } 55 | 56 | std::cout << "Success!\n"; 57 | return 0; 58 | } 59 | -------------------------------------------------------------------------------- /samples/Ch11_vectors_and_math_arrays/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Intel Corporation 2 | 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_book_sample( 6 | TEST 7 | TARGET fig_11_2_marray 8 | SOURCES fig_11_2_marray.cpp) 9 | 10 | add_book_sample( 11 | TEST 12 | TARGET fig_11_4_load_store 13 | SOURCES fig_11_4_load_store.cpp) 14 | 15 | add_book_sample( 16 | TEST 17 | TARGET fig_11_6_swizzle_vec 18 | SOURCES fig_11_6_swizzle_vec.cpp) 19 | 20 | add_book_sample( 21 | TEST 22 | TARGET fig_11_7_vector_exec 23 | SOURCES fig_11_7_vector_exec.cpp) 24 | -------------------------------------------------------------------------------- /samples/Ch11_vectors_and_math_arrays/fig_11_10.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | q.submit([&](sycl::handler &h) { // assume sub group size is 8 9 | // ... 10 | h.parallel_for(range<1>(8), [=](id<1> i) { 11 | // ... 12 | float4 y4 = b[i]; // i=0, 1, 2, ... 13 | // ... 14 | float x = dowork(&y4); // the “dowork” expects y4, 15 | // i.e., vec_y[8][4] layout 16 | }); 17 | -------------------------------------------------------------------------------- /samples/Ch11_vectors_and_math_arrays/fig_11_2_marray.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #define M 32 8 | 9 | using namespace sycl; 10 | 11 | template 12 | bool checkEqual(marray A, marray B) { 13 | for (int i = 0; i < N; i++) { 14 | if (A[i] != B[i]) { 15 | return false; 16 | } 17 | } 18 | return true; 19 | } 20 | 21 | int main() { 22 | // BEGIN CODE SNIP 23 | queue q; 24 | marray input{1.0004f, 1e-4f, 1.4f, 14.0f}; 25 | marray res[M]; 26 | for (int i = 0; i < M; i++) 27 | res[i] = {-(i + 1), -(i + 1), -(i + 1), -(i + 1)}; 28 | { 29 | buffer in_buf(&input, range{1}); 30 | buffer re_buf(res, range{M}); 31 | 32 | q.submit([&](handler &cgh) { 33 | accessor re_acc{re_buf, cgh, read_write}; 34 | accessor in_acc{in_buf, cgh, read_only}; 35 | 36 | cgh.parallel_for(range<1>(M), [=](id<1> idx) { 37 | int i = idx[0]; 38 | re_acc[i] = cos(in_acc[0]); 39 | }); 40 | }); 41 | } 42 | // END CODE SNIP 43 | 44 | if (checkEqual(res[0], res[M / 2])) 45 | std::cout << "passed\n"; 46 | else 47 | std::cout << "failed\n"; 48 | return 0; 49 | } 50 | -------------------------------------------------------------------------------- /samples/Ch11_vectors_and_math_arrays/fig_11_3.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | template 9 | void load(size_t offset, multi_ptr ptr); 10 | 11 | template 12 | void store(size_t offset, multi_ptr ptr) const; 13 | -------------------------------------------------------------------------------- /samples/Ch11_vectors_and_math_arrays/fig_11_4_load_store.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | using namespace sycl; 8 | 9 | int main() { 10 | constexpr int workers = 64; 11 | constexpr int size = workers * 16; 12 | 13 | // BEGIN CODE SNIP 14 | std::array fpData; 15 | for (int i = 0; i < size; i++) { 16 | fpData[i] = 8.0f; 17 | } 18 | 19 | buffer fpBuf(fpData); 20 | 21 | queue q; 22 | q.submit([&](handler& h) { 23 | accessor acc{fpBuf, h}; 24 | 25 | h.parallel_for(workers, [=](id<1> idx) { 26 | float16 inpf16; 27 | inpf16.load(idx, acc.get_multi_ptr()); 28 | float16 result = inpf16 * 2.0f; 29 | result.store(idx, acc.get_multi_ptr()); 30 | }); 31 | }); 32 | // END CODE SNIP 33 | 34 | host_accessor hostAcc(fpBuf); 35 | if (fpData[0] != 16.0f) { 36 | std::cout << "Failed\n"; 37 | return -1; 38 | } 39 | 40 | std::cout << "Passed\n"; 41 | return 0; 42 | } 43 | -------------------------------------------------------------------------------- /samples/Ch11_vectors_and_math_arrays/fig_11_5.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | template 9 | __swizzled_vec__ swizzle() const; 10 | __swizzled_vec__ XYZW_ACCESS() const; 11 | __swizzled_vec__ RGBA_ACCESS() const; 12 | __swizzled_vec__ INDEX_ACCESS() const; 13 | 14 | #ifdef SYCL_SIMPLE_SWIZZLES 15 | // Available only when numElements <= 4 16 | // XYZW_SWIZZLE is all permutations with repetition of: 17 | // x, y, z, w, subject to numElements 18 | __swizzled_vec__ XYZW_SWIZZLE() const; 19 | 20 | // Available only when numElements == 4 21 | // RGBA_SWIZZLE is all permutations with repetition of: r, 22 | // g, b, a. 23 | __swizzled_vec__ RGBA_SWIZZLE() const; 24 | #endif 25 | -------------------------------------------------------------------------------- /samples/Ch11_vectors_and_math_arrays/fig_11_6_swizzle_vec.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #define SYCL_SIMPLE_SWIZZLES 6 | #include 7 | #include 8 | using namespace sycl; 9 | 10 | int main() { 11 | // BEGIN CODE SNIP 12 | constexpr int size = 16; 13 | 14 | std::array input; 15 | for (int i = 0; i < size; i++) { 16 | input[i] = float4(8.0f, 6.0f, 2.0f, i); 17 | } 18 | 19 | buffer b(input); 20 | 21 | queue q; 22 | q.submit([&](handler& h) { 23 | accessor a{b, h}; 24 | 25 | // We can access the individual elements of a vector by 26 | // using the functions x(), y(), z(), w() and so on. 27 | // 28 | // "Swizzles" can be used by calling a vector member 29 | // equivalent to the swizzle order that we need, for 30 | // example zyx() or any combination of the elements. 31 | // The swizzle need not be the same size as the 32 | // original vector. 33 | h.parallel_for(size, [=](id<1> idx) { 34 | auto e = a[idx]; 35 | float w = e.w(); 36 | float4 sw = e.xyzw(); 37 | sw = e.xyzw() * sw.wzyx(); 38 | sw = sw + w; 39 | a[idx] = sw.xyzw(); 40 | }); 41 | }); 42 | // END CODE SNIP 43 | 44 | host_accessor hostAcc(b); 45 | 46 | for (int i = 0; i < size; i++) { 47 | if (hostAcc[i].y() != 12.0f + i) { 48 | std::cout << "Failed\n"; 49 | return -1; 50 | } 51 | } 52 | 53 | std::cout << "Passed\n"; 54 | return 0; 55 | } 56 | -------------------------------------------------------------------------------- /samples/Ch11_vectors_and_math_arrays/fig_11_7_vector_exec.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | using namespace sycl; 9 | 10 | int main() { 11 | constexpr int size = 8; 12 | 13 | std::array fpData; 14 | std::array fp4Data; 15 | for (int i = 0; i < size; i++) { 16 | fpData[i] = i; 17 | float b = i * 4.0f; 18 | fp4Data[i] = float4(b, b + 1, b + 2, b + 3); 19 | } 20 | 21 | buffer fpBuf(fpData); 22 | buffer fp4Buf(fp4Data); 23 | 24 | queue q; 25 | q.submit([&](handler& h) { 26 | accessor a{fpBuf, h}; 27 | accessor b{fp4Buf, h}; 28 | 29 | // BEGIN CODE SNIP 30 | h.parallel_for(8, [=](id<1> i) { 31 | float x = a[i]; 32 | float4 y4 = b[i]; 33 | a[i] = x + sycl::length(y4); 34 | }); 35 | // END CODE SNIP 36 | }); 37 | 38 | host_accessor a(fpBuf); 39 | for (int i = 0; i < size; i++) { 40 | float b = 4 * i; 41 | if (1 < a[i] - (i + std::sqrt(std::pow(b, 2) + 42 | std::pow(b + 1, 2) + 43 | std::pow(b + 2, 2) + 44 | std::pow(b + 3, 2)))) { 45 | std::cout << "Failed\n"; 46 | return -1; 47 | } 48 | } 49 | 50 | std::cout << "Passed\n"; 51 | return 0; 52 | } 53 | -------------------------------------------------------------------------------- /samples/Ch12_device_information_and_kernel_specialization/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Intel Corporation 2 | 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_book_sample( 6 | TEST 7 | TARGET fig_12_1_assigned_device 8 | SOURCES fig_12_1_assigned_device.cpp) 9 | 10 | add_book_sample( 11 | TEST 12 | TARGET fig_12_2_try_catch 13 | SOURCES fig_12_2_try_catch.cpp) 14 | 15 | add_book_sample( 16 | TEST 17 | TARGET fig_12_4_device_selector 18 | SOURCES fig_12_4_device_selector.cpp) 19 | 20 | add_book_sample( 21 | TEST 22 | TARGET fig_12_5_curious 23 | SOURCES fig_12_5_curious.cpp) 24 | 25 | add_book_sample( 26 | TEST 27 | TARGET fig_12_7_very_curious 28 | SOURCES fig_12_7_very_curious.cpp) 29 | 30 | add_book_sample( 31 | TEST 32 | TARGET fig_12_8_invocation_parameters 33 | SOURCES fig_12_8_invocation_parameters.cpp) 34 | 35 | add_book_sample( 36 | TEST 37 | TARGET fig_12_10_specialize 38 | SOURCES fig_12_10_specialize.cpp) 39 | -------------------------------------------------------------------------------- /samples/Ch12_device_information_and_kernel_specialization/fig_12_10_specialize.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | using namespace sycl; 4 | 5 | int main() { 6 | queue q; 7 | 8 | constexpr int size = 16; 9 | std::array data; 10 | 11 | // Using "sycl::device_has()" as an attribute does not 12 | // affect the device we select. Therefore, our host code 13 | // should check the device's aspects before submitting a 14 | // kernel which does require that attribute. 15 | if (q.get_device().has(aspect::fp64)) { 16 | buffer B{data}; 17 | q.submit([&](handler& h) { 18 | accessor A{B, h}; 19 | // the attributes here say that the kernel is allowed 20 | // to require fp64 support any attribute(s) from 21 | // Figure 12-3 could be specified note that namespace 22 | // stmt above (for C++) does not affect attributes (a 23 | // C++ quirk) so sycl:: is needed here 24 | h.parallel_for( 25 | size, [=](auto& idx) 26 | [[sycl::device_has(aspect::fp64)]] { 27 | A[idx] = idx * 2.0; 28 | }); 29 | }); 30 | std::cout << "doubles were used\n"; 31 | } else { 32 | // here we use an alternate method (not needing double 33 | // math support on the device) to help our code be 34 | // flexible and hence more portable 35 | std::array fdata; 36 | { 37 | buffer B{fdata}; 38 | q.submit([&](handler& h) { 39 | accessor A{B, h}; 40 | h.parallel_for( 41 | size, [=](auto& idx) { A[idx] = idx * 2.0f; }); 42 | }); 43 | } 44 | 45 | for (int i = 0; i < size; i++) data[i] = fdata[i]; 46 | 47 | std::cout << "no doubles used\n"; 48 | } 49 | for (int i = 0; i < size; i++) 50 | std::cout << "data[" << i << "] = " << data[i] << "\n"; 51 | return 0; 52 | } 53 | -------------------------------------------------------------------------------- /samples/Ch12_device_information_and_kernel_specialization/fig_12_1_assigned_device.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | using namespace sycl; 8 | 9 | int main() { 10 | // BEGIN CODE SNIP 11 | queue q; 12 | 13 | std::cout << "By default, we are running on " 14 | << q.get_device().get_info() 15 | << "\n"; 16 | // END CODE SNIP 17 | 18 | return 0; 19 | } 20 | 21 | // Example Outputs (one line per run – depends on system): 22 | // By default, we are running on NVIDIA GeForce RTX 3060 23 | // By default, we are running on AMD Radeon RX 5700 XT 24 | // By default, we are running on Intel(R) UHD Graphics 770 25 | // By default, we are running on Intel(R) Xeon(R) Gold 6336Y CPU @ 2.40GHz 26 | // By default, we are running on Intel(R) Data Center GPU Max 1100 27 | 28 | -------------------------------------------------------------------------------- /samples/Ch12_device_information_and_kernel_specialization/fig_12_2_try_catch.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | using namespace sycl; 8 | 9 | int main() { 10 | // BEGIN CODE SNIP 11 | auto GPU_is_available = false; 12 | 13 | try { 14 | device testForGPU(gpu_selector_v); 15 | GPU_is_available = true; 16 | } catch (exception const& ex) { 17 | std::cout << "Caught this SYCL exception: " << ex.what() 18 | << std::endl; 19 | } 20 | 21 | auto q = GPU_is_available ? queue(gpu_selector_v) 22 | : queue(default_selector_v); 23 | 24 | std::cout 25 | << "After checking for a GPU, we are running on:\n " 26 | << q.get_device().get_info() 27 | << "\n"; 28 | 29 | // END CODE SNIP 30 | return 0; 31 | } 32 | 33 | // After checking for a GPU, we are running on: 34 | // AMD Radeon RX 5700 XT 35 | // After checking for a GPU, we are running on: 36 | // Intel(R) Data Center GPU Max 1100 37 | // After checking for a GPU, we are running on: 38 | // NVIDIA GeForce RTX 3060 39 | // After checking for a GPU, we are running on: 40 | // Intel(R) UHD Graphics 770 41 | // 42 | // Example Output (using a system without GPU): 43 | // Caught this SYCL exception: No device of 44 | // requested type 'info::device_type::gpu' available. 45 | // ...(PI_ERROR_DEVICE_NOT_FOUND) 46 | // After checking for a GPU, we are running on: 47 | // AMD Ryzen 5 3600 6-Core Processor 48 | -------------------------------------------------------------------------------- /samples/Ch12_device_information_and_kernel_specialization/fig_12_4_device_selector.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | using namespace sycl; 8 | 9 | int my_selector(const device& dev) { 10 | int score = -1; 11 | 12 | // We prefer non-Martian GPUs, especially ACME GPUs 13 | if (dev.is_gpu()) { 14 | if (dev.get_info().find("ACME") != 15 | std::string::npos) 16 | score += 25; 17 | 18 | if (dev.get_info().find( 19 | "Martian") == std::string::npos) 20 | score += 800; 21 | } 22 | 23 | // If there is no GPU on the system all devices will be 24 | // given a negative score and the selector will not select 25 | // a device. This will cause an exception. 26 | return score; 27 | } 28 | 29 | int main() { 30 | try { 31 | auto q = queue{my_selector}; 32 | std::cout 33 | << "After checking for a GPU, we are running on:\n " 34 | << q.get_device().get_info() 35 | << "\n"; 36 | } catch (exception const& ex) { 37 | std::cout << "Custom device selector did not select a " 38 | "device.\n"; 39 | std::cout << "Caught this SYCL exception: " << ex.what() 40 | << std::endl; 41 | } 42 | 43 | return 0; 44 | } 45 | 46 | // Four Example Outputs (using four different 47 | // systems, each with a GPU): 48 | // After checking for a GPU, we are running on: 49 | // Intel(R) Gen9 HD Graphics NEO. 50 | // After checking for a GPU, we are running on: 51 | // NVIDIA GeForce RTX 3060 52 | // After checking for a GPU, we are running on: 53 | // Intel(R) Data Center GPU Max 1100 54 | // After checking for a GPU, we are running on: 55 | // AMD Radeon RX 5700 XT 56 | // 57 | // Example Output (using a system without GPU): 58 | // After checking for a GPU, we are running on: 59 | // Custom device selector did not select a device. 60 | // Caught this SYCL exception: No device of requested 61 | // type available. ...(PI_ERROR_DEVICE_NOT_FOUND) 62 | -------------------------------------------------------------------------------- /samples/Ch12_device_information_and_kernel_specialization/fig_12_5_curious.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | using namespace sycl; 8 | 9 | int main() { 10 | // BEGIN CODE SNIP 11 | // Loop through available platforms 12 | for (auto const& this_platform : 13 | platform::get_platforms()) { 14 | std::cout 15 | << "Found platform: " 16 | << this_platform.get_info() 17 | << "\n"; 18 | 19 | // Loop through available devices in this platform 20 | for (auto const& this_device : 21 | this_platform.get_devices()) { 22 | std::cout 23 | << " Device: " 24 | << this_device.get_info() 25 | << "\n"; 26 | } 27 | std::cout << "\n"; 28 | } 29 | // END CODE SNIP 30 | 31 | return 0; 32 | } 33 | 34 | 35 | // % clang++ -fsycl fig_12_5_curious.cpp -o curious 36 | // 37 | // % ./curious 38 | // Found platform: NVIDIA CUDA BACKEND 39 | // Device: NVIDIA GeForce RTX 3060 40 | // 41 | // Found platform: AMD HIP BACKEND 42 | // Device: AMD Radeon RX 5700 XT 43 | // 44 | // Found platform: Intel(R) OpenCL 45 | // Device: Intel(R) Xeon(R) E-2176G CPU @ 3.70GHz 46 | // 47 | // Found platform: Intel(R) OpenCL HD Graphics 48 | // Device: Intel(R) UHD Graphics P630 [0x3e96] 49 | // 50 | // Found platform: Intel(R) Level-Zero 51 | // Device: Intel(R) UHD Graphics P630 [0x3e96] 52 | // 53 | // Found platform: Intel(R) FPGA Emulation Platform for OpenCL(TM) 54 | // Device: Intel(R) FPGA Emulation Device 55 | 56 | -------------------------------------------------------------------------------- /samples/Ch12_device_information_and_kernel_specialization/fig_12_8_invocation_parameters.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | using namespace sycl; 8 | 9 | int main() { 10 | // BEGIN CODE SNIP 11 | queue q; 12 | device dev = q.get_device(); 13 | 14 | std::cout << "We are running on:\n" 15 | << dev.get_info() << "\n"; 16 | 17 | // Query results like the following can be used to 18 | // calculate how large your kernel invocations can be. 19 | auto maxWG = 20 | dev.get_info(); 21 | auto maxGmem = 22 | dev.get_info(); 23 | auto maxLmem = 24 | dev.get_info(); 25 | 26 | std::cout << "Max WG size is " << maxWG 27 | << "\nGlobal memory size is " << maxGmem 28 | << "\nLocal memory size is " << maxLmem << "\n"; 29 | 30 | // END CODE SNIP 31 | return 0; 32 | } 33 | -------------------------------------------------------------------------------- /samples/Ch12_device_information_and_kernel_specialization/tst_12_4_device_selector.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | using namespace std; 8 | 9 | int my_selector(int isgpu, string foo) { 10 | int score = -1; 11 | 12 | // We prefer non-Martian GPUs, especially ACME GPUs 13 | if (isgpu) { 14 | if (foo.find("ACME") != std::string::npos) score += 25; 15 | 16 | if (foo.find("Martian") == std::string::npos) 17 | score += 800; 18 | } 19 | 20 | // If there is no GPU on the system all devices will be 21 | // given a negative score and the selector will not select 22 | // a device. This will cause an exception. 23 | return score; 24 | } 25 | 26 | int main() { 27 | string foo; 28 | foo = "Intel GPU"; 29 | cout << "NOTGPU" << foo << my_selector(0, foo) << '\n'; 30 | cout << "YESGPU" << foo << my_selector(1, foo) << '\n'; 31 | foo = "Intel ACME GPU"; 32 | cout << "NOTGPU" << foo << my_selector(0, foo) << '\n'; 33 | cout << "YESGPU" << foo << my_selector(1, foo) << '\n'; 34 | foo = "Intel GPU Martian"; 35 | cout << "NOTGPU" << foo << my_selector(0, foo) << '\n'; 36 | cout << "YESGPU" << foo << my_selector(1, foo) << '\n'; 37 | foo = "Intel Martian ACME GPU"; 38 | cout << "NOTGPU" << foo << my_selector(0, foo) << '\n'; 39 | cout << "YESGPU" << foo << my_selector(1, foo) << '\n'; 40 | foo = "ACME"; 41 | cout << "NOTGPU" << foo << my_selector(0, foo) << '\n'; 42 | cout << "YESGPU" << foo << my_selector(1, foo) << '\n'; 43 | foo = "MartianACME"; 44 | cout << "NOTGPU" << foo << my_selector(0, foo) << '\n'; 45 | cout << "YESGPU" << foo << my_selector(1, foo) << '\n'; 46 | foo = "Martian"; 47 | cout << "NOTGPU" << foo << my_selector(0, foo) << '\n'; 48 | cout << "YESGPU" << foo << my_selector(1, foo) << '\n'; 49 | return 0; 50 | } 51 | -------------------------------------------------------------------------------- /samples/Ch13_practical_tips/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Intel Corporation 2 | 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_book_sample( 6 | TEST 7 | TARGET fig_13_4_stream 8 | SOURCES fig_13_4_stream.cpp) 9 | 10 | add_book_sample( 11 | TEST 12 | TARGET fig_13_6_queue_profiling_timing 13 | SOURCES fig_13_6_queue_profiling_timing.cpp) 14 | 15 | add_book_sample( 16 | TEST 17 | TARGET fig_13_9_common_buffer_pattern 18 | SOURCES fig_13_9_common_buffer_pattern.cpp) 19 | 20 | add_book_sample( 21 | TEST 22 | TARGET fig_13_10_common_pattern_bug 23 | SOURCES fig_13_10_common_pattern_bug.cpp) 24 | 25 | add_book_sample( 26 | TEST 27 | TARGET fig_13_11_host_accessor 28 | SOURCES fig_13_11_host_accessor.cpp) 29 | 30 | add_book_sample( 31 | TEST 32 | TARGET fig_13_12_host_accessor_for_init 33 | SOURCES fig_13_12_host_accessor_for_init.cpp) 34 | 35 | add_book_sample( 36 | TARGET fig_13_13_host_accessor_deadlock 37 | SOURCES fig_13_13_host_accessor_deadlock.cpp) 38 | 39 | -------------------------------------------------------------------------------- /samples/Ch13_practical_tips/fig_13_10_common_pattern_bug.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | using namespace sycl; 9 | 10 | int main() { 11 | // BEGIN CODE SNIP 12 | 13 | constexpr size_t N = 1024; 14 | 15 | // Set up queue on any available device 16 | queue q; 17 | 18 | // Create host containers to initialize on the host 19 | std::vector in_vec(N), out_vec(N); 20 | 21 | // Initialize input and output vectors 22 | for (int i = 0; i < N; i++) in_vec[i] = i; 23 | std::fill(out_vec.begin(), out_vec.end(), 0); 24 | 25 | // Create buffers using host allocations (vector in this 26 | // case) 27 | buffer in_buf{in_vec}, out_buf{out_vec}; 28 | 29 | // Submit the kernel to the queue 30 | q.submit([&](handler& h) { 31 | accessor in{in_buf, h}; 32 | accessor out{out_buf, h}; 33 | 34 | h.parallel_for(range{N}, 35 | [=](id<1> idx) { out[idx] = in[idx]; }); 36 | }); 37 | 38 | // BUG!!! We're using the host allocation out_vec, but the 39 | // buffer out_buf is still alive and owns that allocation! 40 | // We will probably see the initialiation value (zeros) 41 | // printed out, since the kernel probably hasn't even run 42 | // yet, and the buffer has no reason to have copied any 43 | // output back to the host even if the kernel has run. 44 | for (int i = 0; i < N; i++) 45 | std::cout << "out_vec[" << i << "]=" << out_vec[i] 46 | << "\n"; 47 | 48 | // END CODE SNIP 49 | return 0; 50 | } 51 | -------------------------------------------------------------------------------- /samples/Ch13_practical_tips/fig_13_11_host_accessor.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | using namespace sycl; 9 | 10 | int main() { 11 | // BEGIN CODE SNIP 12 | 13 | constexpr size_t N = 1024; 14 | 15 | // Set up queue on any available device 16 | queue q; 17 | 18 | // Create host containers to initialize on the host 19 | std::vector in_vec(N), out_vec(N); 20 | 21 | // Initialize input and output vectors 22 | for (int i = 0; i < N; i++) in_vec[i] = i; 23 | std::fill(out_vec.begin(), out_vec.end(), 0); 24 | 25 | // Create buffers using host allocations (vector in this 26 | // case) 27 | buffer in_buf{in_vec}, out_buf{out_vec}; 28 | 29 | // Submit the kernel to the queue 30 | q.submit([&](handler& h) { 31 | accessor in{in_buf, h}; 32 | accessor out{out_buf, h}; 33 | 34 | h.parallel_for(range{N}, 35 | [=](id<1> idx) { out[idx] = in[idx]; }); 36 | }); 37 | 38 | // Check that all outputs match expected value 39 | // Use host accessor! Buffer is still in scope / alive 40 | host_accessor A{out_buf}; 41 | 42 | for (int i = 0; i < N; i++) 43 | std::cout << "A[" << i << "]=" << A[i] << "\n"; 44 | 45 | // END CODE SNIP 46 | return 0; 47 | } 48 | -------------------------------------------------------------------------------- /samples/Ch13_practical_tips/fig_13_12_host_accessor_for_init.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | using namespace sycl; 9 | 10 | int main() { 11 | // BEGIN CODE SNIP 12 | 13 | constexpr size_t N = 1024; 14 | 15 | // Set up queue on any available device 16 | queue q; 17 | 18 | // Create buffers of size N 19 | buffer in_buf{N}, out_buf{N}; 20 | 21 | // Use host accessors to initialize the data 22 | { // CRITICAL: Begin scope for host_accessor lifetime! 23 | host_accessor in_acc{in_buf}, out_acc{out_buf}; 24 | for (int i = 0; i < N; i++) { 25 | in_acc[i] = i; 26 | out_acc[i] = 0; 27 | } 28 | } // CRITICAL: Close scope to make host accessors go out 29 | // of scope! 30 | 31 | // Submit the kernel to the queue 32 | q.submit([&](handler& h) { 33 | accessor in{in_buf, h}; 34 | accessor out{out_buf, h}; 35 | 36 | h.parallel_for(range{N}, 37 | [=](id<1> idx) { out[idx] = in[idx]; }); 38 | }); 39 | 40 | // Check that all outputs match expected value 41 | // Use host accessor! Buffer is still in scope / alive 42 | host_accessor A{out_buf}; 43 | 44 | for (int i = 0; i < N; i++) 45 | std::cout << "A[" << i << "]=" << A[i] << "\n"; 46 | 47 | // END CODE SNIP 48 | return 0; 49 | } 50 | -------------------------------------------------------------------------------- /samples/Ch13_practical_tips/fig_13_4_stream.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | using namespace sycl; 8 | 9 | int main() { 10 | queue q; 11 | // BEGIN CODE SNIP 12 | q.submit([&](handler &h) { 13 | stream out(1024, 256, h); 14 | h.parallel_for(range{8}, [=](id<1> idx) { 15 | out << "Testing my sycl stream (this is work-item ID:" 16 | << idx << ")\n"; 17 | }); 18 | }); 19 | // END CODE SNIP 20 | 21 | // Wait on the queue so that the host program doesn't 22 | // complete before the device code stream out is executed. 23 | // This ensures that the example actually displays the 24 | // output text. 25 | q.wait(); 26 | 27 | return 0; 28 | } 29 | -------------------------------------------------------------------------------- /samples/Ch14_common_parallel_patterns/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Intel Corporation 2 | 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_book_sample( 6 | TEST 7 | TARGET fig_14_8_one_reduction 8 | SOURCES fig_14_8_one_reduction.cpp) 9 | 10 | add_book_sample( 11 | TEST 12 | TARGET fig_14_11_array_reduction 13 | SOURCES fig_14_11_array_reduction.cpp) 14 | 15 | add_book_sample( 16 | TEST 17 | TARGET fig_14_12_user_defined_reduction 18 | SOURCES fig_14_12_user_defined_reduction.cpp) 19 | 20 | add_book_sample( 21 | TEST 22 | TARGET fig_14_13_algorithm_comparison 23 | SOURCES fig_14_13_algorithm_comparison.cpp) 24 | 25 | add_book_sample( 26 | TEST 27 | TARGET fig_14_15_map 28 | SOURCES fig_14_15_map.cpp 29 | ADDITIONAL_COMPILE_OPTIONS -fno-fast-math) 30 | 31 | add_book_sample( 32 | TEST 33 | TARGET fig_14_16_stencil 34 | SOURCES fig_14_16_stencil.cpp) 35 | 36 | add_book_sample( 37 | TEST 38 | TARGET fig_14_17_local_stencil 39 | SOURCES fig_14_17_local_stencil.cpp) 40 | 41 | add_book_sample( 42 | TEST 43 | TARGET fig_14_18_basic_reduction 44 | SOURCES fig_14_18_basic_reduction.cpp) 45 | 46 | add_book_sample( 47 | TEST 48 | TARGET fig_14_19_nd_range_reduction 49 | SOURCES fig_14_19_nd_range_reduction.cpp) 50 | 51 | add_book_sample( 52 | TEST 53 | TARGET fig_14_20-22_inclusive_scan 54 | SOURCES fig_14_20-22_inclusive_scan.cpp) 55 | 56 | add_book_sample( 57 | TEST 58 | TARGET fig_14_24_local_pack 59 | SOURCES fig_14_24_local_pack.cpp) 60 | 61 | add_book_sample( 62 | TEST 63 | TARGET fig_14_26_local_unpack 64 | SOURCES fig_14_26_local_unpack.cpp) 65 | -------------------------------------------------------------------------------- /samples/Ch14_common_parallel_patterns/fig_14_10.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | template 10 | class reducer { 11 | // Combine partial result with reducer's value 12 | void combine(const T& partial); 13 | }; 14 | 15 | // Other operators are available for standard binary 16 | // operations 17 | template 18 | auto& operator+=(reducer>&, const T&); 19 | -------------------------------------------------------------------------------- /samples/Ch14_common_parallel_patterns/fig_14_11_array_reduction.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace sycl; 10 | 11 | int main() { 12 | constexpr size_t N = 16; 13 | constexpr size_t B = 4; 14 | 15 | queue q; 16 | int* data = malloc_shared(N, q); 17 | int* histogram = malloc_shared(B, q); 18 | std::iota(data, data + N, 1); 19 | std::fill(histogram, histogram + B, 0); 20 | 21 | q.submit([&](handler& h) { 22 | // BEGIN CODE SNIP 23 | h.parallel_for( 24 | range{N}, 25 | reduction(span(histogram, B), plus<>()), 26 | [=](id<1> i, auto& histogram) { 27 | histogram[data[i] % B]++; 28 | }); 29 | // END CODE SNIP 30 | }).wait(); 31 | 32 | bool passed = true; 33 | std::cout << "Histogram:" << std::endl; 34 | for (int b = 0; b < B; ++b) { 35 | std::cout << "bin[" << b << "]: " << histogram[b] 36 | << std::endl; 37 | passed &= (histogram[b] == N / B); 38 | } 39 | std::cout << ((passed) ? "SUCCESS" : "FAILURE") << "\n"; 40 | 41 | free(histogram, q); 42 | free(data, q); 43 | return (passed) ? 0 : 1; 44 | } 45 | -------------------------------------------------------------------------------- /samples/Ch14_common_parallel_patterns/fig_14_12_user_defined_reduction.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace sycl; 10 | 11 | template 12 | using minloc = minimum>; 13 | 14 | int main() { 15 | constexpr size_t N = 16; 16 | 17 | queue q; 18 | float* data = malloc_shared(N, q); 19 | std::pair* res = 20 | malloc_shared>(1, q); 21 | std::generate(data, data + N, std::mt19937{}); 22 | 23 | std::pair identity = { 24 | std::numeric_limits::max(), 25 | std::numeric_limits::min()}; 26 | *res = identity; 27 | 28 | auto red = 29 | sycl::reduction(res, identity, minloc()); 30 | 31 | q.submit([&](handler& h) { 32 | h.parallel_for( 33 | range<1>{N}, red, [=](id<1> i, auto& res) { 34 | std::pair partial = {data[i], i}; 35 | res.combine(partial); 36 | }); 37 | }).wait(); 38 | 39 | std::cout << "minimum value = " << res->first << " at " 40 | << res->second << "\n"; 41 | 42 | std::pair gold = identity; 43 | for (int i = 0; i < N; ++i) { 44 | if (data[i] <= gold.first || 45 | (data[i] == gold.first && i < gold.second)) { 46 | gold.first = data[i]; 47 | gold.second = i; 48 | } 49 | } 50 | bool passed = (res->first == gold.first) && 51 | (res->second == gold.second); 52 | std::cout << ((passed) ? "SUCCESS" : "FAILURE") << "\n"; 53 | 54 | free(res, q); 55 | free(data, q); 56 | return (passed) ? 0 : 1; 57 | } 58 | -------------------------------------------------------------------------------- /samples/Ch14_common_parallel_patterns/fig_14_15_map.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | using namespace sycl; 12 | 13 | int main() { 14 | queue q; 15 | 16 | const size_t N = 64; 17 | float* input = malloc_shared(N, q); 18 | float* output = malloc_shared(N, q); 19 | std::iota(input, input + N, 1); 20 | std::fill(output, output + N, 0); 21 | 22 | // BEGIN CODE SNIP 23 | // Compute the square root of each input value 24 | q.parallel_for(N, [=](id<1> i) { 25 | output[i] = sycl::sqrt(input[i]); 26 | }).wait(); 27 | // END CODE SNIP 28 | 29 | // Check that all outputs match serial execution. 30 | bool passed = true; 31 | for (int i = 0; i < N; ++i) { 32 | float gold = std::sqrt(input[i]); 33 | if (std::abs(output[i] - gold) >= 1.0E-06) { 34 | passed = false; 35 | } 36 | } 37 | std::cout << ((passed) ? "SUCCESS" : "FAILURE") << "\n"; 38 | 39 | free(output, q); 40 | free(input, q); 41 | return (passed) ? 0 : 1; 42 | } 43 | -------------------------------------------------------------------------------- /samples/Ch14_common_parallel_patterns/fig_14_16_stencil.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | using namespace sycl; 14 | 15 | int main() { 16 | queue q; 17 | 18 | const size_t N = 16; 19 | const size_t M = 16; 20 | range<2> stencil_range(N, M); 21 | range<2> alloc_range(N + 2, M + 2); 22 | std::vector input(alloc_range.size()), 23 | output(alloc_range.size()); 24 | std::iota(input.begin(), input.end(), 1); 25 | std::fill(output.begin(), output.end(), 0); 26 | 27 | { 28 | buffer input_buf(input.data(), alloc_range); 29 | buffer output_buf(output.data(), alloc_range); 30 | 31 | // BEGIN CODE SNIP 32 | q.submit([&](handler& h) { 33 | accessor input{input_buf, h}; 34 | accessor output{output_buf, h}; 35 | 36 | // Compute the average of each cell and its immediate 37 | // neighbors 38 | h.parallel_for(stencil_range, [=](id<2> idx) { 39 | int i = idx[0] + 1; 40 | int j = idx[1] + 1; 41 | 42 | float self = input[i][j]; 43 | float north = input[i - 1][j]; 44 | float east = input[i][j + 1]; 45 | float south = input[i + 1][j]; 46 | float west = input[i][j - 1]; 47 | output[i][j] = 48 | (self + north + east + south + west) / 5.0f; 49 | }); 50 | }); 51 | // END CODE SNIP 52 | } 53 | 54 | // Check that all outputs match serial execution. 55 | bool passed = true; 56 | for (int i = 1; i < N + 1; ++i) { 57 | for (int j = 1; j < M + 1; ++j) { 58 | float self = input[i * (M + 2) + j]; 59 | float north = input[(i - 1) * (M + 2) + j]; 60 | float east = input[i * (M + 2) + (j + 1)]; 61 | float south = input[(i + 1) * (M + 2) + j]; 62 | float west = input[i * (M + 2) + (j - 1)]; 63 | float gold = 64 | (self + north + east + south + west) / 5.0f; 65 | if (std::abs(output[i * (M + 2) + j] - gold) >= 66 | 1.0E-06) { 67 | passed = false; 68 | } 69 | } 70 | } 71 | std::cout << ((passed) ? "SUCCESS" : "FAILURE") << "\n"; 72 | return (passed) ? 0 : 1; 73 | } 74 | -------------------------------------------------------------------------------- /samples/Ch14_common_parallel_patterns/fig_14_18_basic_reduction.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace sycl; 10 | 11 | int main() { 12 | constexpr size_t N = 16; 13 | 14 | queue q; 15 | int* data = malloc_shared(N, q); 16 | int* sum = malloc_shared(1, q); 17 | std::iota(data, data + N, 1); 18 | *sum = 0; 19 | 20 | // BEGIN CODE SNIP 21 | q.parallel_for(N, [=](id<1> i) { 22 | atomic_ref( 25 | *sum) += data[i]; 26 | }).wait(); 27 | // END CODE SNIP 28 | 29 | std::cout << "sum = " << *sum << "\n"; 30 | bool passed = (*sum == ((N * (N + 1)) / 2)); 31 | std::cout << ((passed) ? "SUCCESS" : "FAILURE") << "\n"; 32 | 33 | free(sum, q); 34 | free(data, q); 35 | return (passed) ? 0 : 1; 36 | } 37 | -------------------------------------------------------------------------------- /samples/Ch14_common_parallel_patterns/fig_14_19_nd_range_reduction.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace sycl; 10 | 11 | int main() { 12 | constexpr size_t N = 16; 13 | constexpr size_t B = 4; 14 | 15 | queue q; 16 | int* data = malloc_shared(N, q); 17 | int* sum = malloc_shared(1, q); 18 | std::iota(data, data + N, 1); 19 | *sum = 0; 20 | 21 | // BEGIN CODE SNIP 22 | q.parallel_for(nd_range<1>{N, B}, [=](nd_item<1> it) { 23 | int i = it.get_global_id(0); 24 | auto grp = it.get_group(); 25 | int group_sum = 26 | reduce_over_group(grp, data[i], plus<>()); 27 | if (grp.leader()) { 28 | atomic_ref( 31 | *sum) += group_sum; 32 | } 33 | }).wait(); 34 | // END CODE SNIP 35 | 36 | std::cout << "sum = " << *sum << "\n"; 37 | bool passed = (*sum == ((N * (N + 1)) / 2)); 38 | std::cout << ((passed) ? "SUCCESS" : "FAILURE") << "\n"; 39 | 40 | free(sum, q); 41 | free(data, q); 42 | return (passed) ? 0 : 1; 43 | } 44 | -------------------------------------------------------------------------------- /samples/Ch14_common_parallel_patterns/fig_14_23.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | uint32_t index = 9 | exclusive_scan(g, (uint32_t)predicate, plus<>()); 10 | if (predicate) dst[index] = value; 11 | -------------------------------------------------------------------------------- /samples/Ch14_common_parallel_patterns/fig_14_25.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | uint32_t index = 9 | exclusive_scan(sg, (uint32_t)predicate, plus<>()); 10 | return (predicate) ? new_value[index] : original_value; 11 | -------------------------------------------------------------------------------- /samples/Ch14_common_parallel_patterns/fig_14_8_one_reduction.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace sycl; 10 | 11 | int main() { 12 | constexpr size_t N = 16; 13 | 14 | queue q; 15 | int* data = malloc_shared(N, q); 16 | int* sum = malloc_shared(1, q); 17 | std::iota(data, data + N, 1); 18 | *sum = 0; 19 | 20 | q.submit([&](handler& h) { 21 | // BEGIN CODE SNIP 22 | h.parallel_for( 23 | range<1>{N}, reduction(sum, plus<>()), 24 | [=](id<1> i, auto& sum) { sum += data[i]; }); 25 | // END CODE SNIP 26 | }).wait(); 27 | 28 | std::cout << "sum = " << *sum << "\n"; 29 | bool passed = (*sum == ((N * (N + 1)) / 2)); 30 | std::cout << ((passed) ? "SUCCESS" : "FAILURE") << "\n"; 31 | 32 | free(sum, q); 33 | free(data, q); 34 | return (passed) ? 0 : 1; 35 | } 36 | -------------------------------------------------------------------------------- /samples/Ch14_common_parallel_patterns/fig_14_9.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | template 9 | unspecified reduction(BufferT variable, handler& h, 10 | BinaryOperation combiner, 11 | const property_list& properties = {}); 12 | 13 | template 14 | unspecified reduction(BufferT variable, handler& h, 15 | const BufferT::value_type& identity, 16 | BinaryOperation combiner, 17 | const property_list& properties = {}); 18 | 19 | template 20 | unspecified reduction(T* variable, BinaryOperation combiner, 21 | const property_list& properties = {}); 22 | 23 | template 24 | unspecified reduction(T* variable, const T& identity, 25 | BinaryOperation combiner, 26 | const property_list& properties = {}); 27 | 28 | template 30 | unspecified reduction(span variables, 31 | BinaryOperation combiner, 32 | const property_list& properties = {}); 33 | 34 | template 36 | unspecified reduction(span variables, 37 | const T& identity, 38 | BinaryOperation combiner, 39 | const property_list& properties = {}); 40 | -------------------------------------------------------------------------------- /samples/Ch15_programming_for_gpus/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Intel Corporation 2 | 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_book_sample( 6 | TEST 7 | TARGET fig_15_3_single_task_matrix_multiplication 8 | SOURCES matrix_multiplication_harness.cpp fig_15_3_single_task_matrix_multiplication.cpp) 9 | 10 | add_book_sample( 11 | TEST 12 | TARGET fig_15_5_somewhat_parallel_matrix_multiplication 13 | SOURCES matrix_multiplication_harness.cpp fig_15_5_somewhat_parallel_matrix_multiplication.cpp) 14 | 15 | add_book_sample( 16 | TEST 17 | TARGET fig_15_7_more_parallel_matrix_multiplication 18 | SOURCES matrix_multiplication_harness.cpp fig_15_7_more_parallel_matrix_multiplication.cpp) 19 | 20 | add_book_sample( 21 | TEST 22 | TARGET fig_15_10_divergent_control_flow 23 | SOURCES fig_15_10_divergent_control_flow.cpp) 24 | 25 | add_book_sample( 26 | TEST 27 | TARGET fig_15_12_small_work_group_matrix_multiplication 28 | SOURCES matrix_multiplication_harness.cpp fig_15_12_small_work_group_matrix_multiplication.cpp) 29 | 30 | add_book_sample( 31 | TEST 32 | TARGET fig_15_18_columns_matrix_multiplication 33 | SOURCES matrix_multiplication_harness.cpp fig_15_18_columns_matrix_multiplication.cpp) 34 | -------------------------------------------------------------------------------- /samples/Ch15_programming_for_gpus/fig_15_10_divergent_control_flow.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | using namespace sycl; 9 | 10 | int main() { 11 | constexpr size_t array_size = 16; 12 | std::array data; 13 | 14 | for (int i = 0; i < array_size; i++) { 15 | data[i] = i; 16 | } 17 | 18 | buffer dataBuf{data}; 19 | 20 | queue q{default_selector_v}; 21 | q.submit([&](handler& h) { 22 | accessor dataAcc{dataBuf, h}; 23 | 24 | // BEGIN CODE SNIP 25 | h.parallel_for(array_size, [=](id<1> i) { 26 | auto condition = i[0] & 1; 27 | if (condition) { 28 | dataAcc[i] = dataAcc[i] * 2; // odd 29 | } else { 30 | dataAcc[i] = dataAcc[i] + 1; // even 31 | } 32 | }); 33 | // END CODE SNIP 34 | }); 35 | 36 | host_accessor dataAcc{dataBuf}; 37 | 38 | for (int i = 0; i < array_size; i++) { 39 | if (i & 1) { 40 | if (dataAcc[i] != i * 2) { 41 | std::cout << "Odd result did not validate at index " 42 | << i << "!\n"; 43 | return -1; 44 | } 45 | } else { 46 | if (dataAcc[i] != i + 1) { 47 | std::cout 48 | << "Even result did not validate at index " << i 49 | << "!\n"; 50 | return -1; 51 | } 52 | } 53 | } 54 | 55 | std::cout << "Success!\n"; 56 | return 0; 57 | } 58 | -------------------------------------------------------------------------------- /samples/Ch15_programming_for_gpus/fig_15_7_more_parallel_matrix_multiplication.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | using namespace sycl; 8 | 9 | extern const int matrixSize = 128; 10 | static const int iterations = 16; 11 | 12 | template 13 | double run_sycl(const std::vector& vecA, 14 | const std::vector& vecB, 15 | std::vector& vecC) { 16 | const int M = matrixSize; 17 | const int N = matrixSize; 18 | const int K = matrixSize; 19 | 20 | using ns = std::chrono::nanoseconds; 21 | ns::rep best_time = std::numeric_limits::max(); 22 | 23 | std::fill(vecC.begin(), vecC.end(), (T)0); 24 | 25 | buffer bufA{vecA}; // M * K elements 26 | buffer bufB{vecB}; // K * N elements 27 | buffer bufC{vecC}; // M * N elements 28 | 29 | queue q; // Choose any available device 30 | std::cout << "Running on device: " 31 | << q.get_device().get_info() 32 | << "\n"; 33 | 34 | for (int i = 0; i < iterations; ++i) { 35 | auto start = std::chrono::steady_clock::now(); 36 | 37 | q.submit([&](handler& h) { 38 | accessor matrixA{bufA, h}; 39 | accessor matrixB{bufB, h}; 40 | accessor matrixC{bufC, h}; 41 | 42 | // BEGIN CODE SNIP 43 | h.parallel_for(range{M, N}, [=](id<2> idx) { 44 | int m = idx[0]; 45 | int n = idx[1]; 46 | 47 | T sum = 0; 48 | for (int k = 0; k < K; k++) { 49 | sum += matrixA[m * K + k] * matrixB[k * N + n]; 50 | } 51 | 52 | matrixC[m * N + n] = sum; 53 | }); 54 | // END CODE SNIP 55 | }); 56 | 57 | q.wait(); // So that we know the kernel has finished 58 | // before checking time 59 | auto duration = 60 | std::chrono::steady_clock::now() - start; 61 | auto time = 62 | std::chrono::duration_cast(duration).count(); 63 | 64 | best_time = std::min(time, best_time); 65 | } 66 | 67 | double best_seconds = (double)best_time / 1e9; 68 | 69 | return best_seconds; 70 | } 71 | 72 | template double run_sycl( 73 | const std::vector& vecA, 74 | const std::vector& vecB, 75 | std::vector& vecC); 76 | -------------------------------------------------------------------------------- /samples/Ch16_programming_for_cpus/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Intel Corporation 2 | 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_book_sample( 6 | TEST 7 | TARGET fig_16_6_stream_triad 8 | SOURCES fig_16_6_stream_triad.cpp) 9 | 10 | add_book_sample( 11 | TEST 12 | TARGET fig_16_12_forward_dep 13 | SOURCES fig_16_12_forward_dep.cpp) 14 | 15 | add_book_sample( 16 | TEST 17 | TARGET fig_16_18_vector_swizzle 18 | SOURCES fig_16_18_vector_swizzle.cpp) 19 | -------------------------------------------------------------------------------- /samples/Ch16_programming_for_cpus/fig_16_10.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | template 9 | void init(queue& deviceQueue, T* VA, T* VB, T* VC, 10 | size_t array_size) { 11 | range<1> numOfItems{array_size}; 12 | 13 | buffer bufferA(VA, numOfItems); 14 | buffer bufferB(VB, numOfItems); 15 | buffer bufferC(VC, numOfItems); 16 | 17 | auto queue_event = deviceQueue.submit([&](handler& cgh) { 18 | auto aA = bufA.template get_access(cgh); 19 | auto aB = bufB.template get_access(cgh); 20 | auto aC = bufC.template get_access(cgh); 21 | 22 | cgh.parallel_for>(numOfItems, [=](id<1> wi) { 23 | aA[wi] = 2.0; 24 | aB[wi] = 1.0; 25 | aC[wi] = 0.0; 26 | }); 27 | }); 28 | 29 | queue_event.wait(); 30 | } 31 | -------------------------------------------------------------------------------- /samples/Ch16_programming_for_cpus/fig_16_12_forward_dep.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | 8 | using namespace sycl; 9 | 10 | int main() { 11 | // BEGIN CODE SNIP 12 | const int n = 16, w = 16; 13 | 14 | queue q; 15 | range<2> G = {n, w}; 16 | range<2> L = {1, w}; 17 | 18 | int *a = malloc_shared(n * (n + 1), q); 19 | 20 | for (int i = 0; i < n; i++) 21 | for (int j = 0; j < n + 1; j++) a[i * n + j] = i + j; 22 | 23 | q.parallel_for( 24 | nd_range<2>{G, L}, 25 | [=](nd_item<2> it) [[sycl::reqd_sub_group_size(w)]] { 26 | // distribute uniform "i" over the sub-group with 27 | // 16-way redundant computation 28 | const int i = it.get_global_id(0); 29 | sub_group sg = it.get_sub_group(); 30 | 31 | for (int j = sg.get_local_id()[0]; j < n; j += w) { 32 | // load a[i*n+j+1:16] before updating a[i*n+j:16] 33 | // to preserve loop-carried forward dependency 34 | auto va = a[i * n + j + 1]; 35 | group_barrier(sg); 36 | a[i * n + j] = va + i + 2; 37 | } 38 | group_barrier(sg); 39 | }) 40 | .wait(); 41 | // END CODE SNIP 42 | 43 | if (a[0] == 3 && a[9] == 12) 44 | std::cout << "passed\n"; 45 | else 46 | std::cout << "failed\n"; 47 | free(a, q); 48 | } 49 | -------------------------------------------------------------------------------- /samples/Ch16_programming_for_cpus/fig_16_15.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | cgh.parallel_for>(numOfItems,[=](id<1> wi) { 9 | x[wi] = a[wi].x; // lead to gather x0, x1, x2, x3 10 | y[wi] = a[wi].y; // lead to gather y0, y1, y2, y3 11 | z[wi] = a[wi].z; // lead to gather z0, z1, z2, z3 12 | w[wi] = a[wi].w; // lead to gather w0, w1, w2, w3 13 | }); 14 | -------------------------------------------------------------------------------- /samples/Ch16_programming_for_cpus/fig_16_16.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | cgh.parallel_for>(numOfItems,[=](id<1> wi) { 9 | x[wi] = a.x[wi]; // lead to unit-stride vector load x[0:4] 10 | y[wi] = a.y[wi]; // lead to unit-stride vector load y[0:4] 11 | z[wi] = a.z[wi]; // lead to unit-stride vector load z[0:4] 12 | w[wi] = a.w[wi]; // lead to unit-stride vector load w[0:4] 13 | }); 14 | -------------------------------------------------------------------------------- /samples/Ch16_programming_for_cpus/fig_16_17pre.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | int id = get_global_id(0); a[id] = b[id] + c[id]; 9 | -------------------------------------------------------------------------------- /samples/Ch16_programming_for_cpus/fig_16_18_vector_swizzle.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #define SYCL_SIMPLE_SWIZZLES 6 | #include 7 | #include 8 | 9 | using namespace sycl; 10 | 11 | int main() { 12 | // BEGIN CODE SNIP 13 | queue q; 14 | 15 | bool *resArray = malloc_shared(1, q); 16 | resArray[0] = true; 17 | 18 | q.single_task([=]() { 19 | sycl::vec old_v = 20 | sycl::vec(0, 100, 200, 300); 21 | sycl::vec new_v = sycl::vec(); 22 | 23 | new_v.rgba() = old_v.abgr(); 24 | int vals[] = {300, 200, 100, 0}; 25 | 26 | if (new_v.r() != vals[0] || new_v.g() != vals[1] || 27 | new_v.b() != vals[2] || new_v.a() != vals[3]) { 28 | resArray[0] = false; 29 | } 30 | }).wait(); 31 | // END CODE SNIP 32 | 33 | if (resArray[0]) 34 | std::cout << "passed\n"; 35 | else 36 | std::cout << "failed\n"; 37 | free(resArray, q); 38 | } 39 | -------------------------------------------------------------------------------- /samples/Ch16_programming_for_cpus/fig_16_2.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | h.parallel_for(range(1024), 9 | [=](id<1> k) { z[k] = x[k] + y[k]; }); 10 | -------------------------------------------------------------------------------- /samples/Ch16_programming_for_cpus/fig_16_4.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | // C++ STREAM Triad workload 9 | // __restrict is used to denote no memory aliasing among 10 | // arguments 11 | template 12 | double triad(T* __restrict VA, T* __restrict VB, 13 | T* __restrict VC, size_t array_size, 14 | const T scalar) { 15 | double ts = timer_start(); 16 | for (size_t id = 0; id < array_size; id++) { 17 | VC[id] = VA[id] + scalar * VB[id]; 18 | } 19 | double te = timer_end(); 20 | return (te – ts); 21 | } 22 | -------------------------------------------------------------------------------- /samples/Ch16_programming_for_cpus/fig_16_5.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are snippets 6 | // that are not set up to be compiled as is. 7 | 8 | // STREAM Triad: SIMD code generated by the compiler, where zmm0, zmm1 9 | // and zmm2 are SIMD vector registers. The vectorized loop is unrolled by 4 10 | // to leverage the out-of-execution of instructions from Xeon CPU and to 11 | // hide memory load and store latency 12 | 13 | // clang-format off 14 | 15 | # %bb.0: # %entry 16 | vbroadcastsd %xmm0, %zmm0 # broadcast “scalar” to SIMD reg zmm0 17 | movq $-32, %rax 18 | .p2align 4, 0x90 19 | .LBB0_1: # %loop.19 20 | # =>This Loop Header: Depth=1 21 | vmovupd 256(%rdx,%rax,8), %zmm1 # load 8 elements from memory to zmm1 22 | vfmadd213pd 256(%rsi,%rax,8), %zmm0, %zmm1 # zmm1=(zmm0*zmm1)+mem 23 | # perform SIMD FMA for 8 data elements 24 | # VC[id:8] = scalar*VB[id:8]+VA[id:8] 25 | vmovupd %zmm1, 256(%rdi,%rax,8) # store 8-element result to mem from zmm1 26 | # This SIMD loop body is unrolled by 4 27 | vmovupd 320(%rdx,%rax,8), %zmm1 28 | vfmadd213pd 320(%rsi,%rax,8), %zmm0, %zmm1 # zmm1=(zmm0*zmm1)+mem 29 | vmovupd %zmm1, 320(%rdi,%rax,8) 30 | 31 | vmovupd 384(%rdx,%rax,8), %zmm1 32 | vfmadd213pd 384(%rsi,%rax,8), %zmm0, %zmm1 # zmm1=(zmm0*zmm1)+mem 33 | vmovupd %zmm1, 384(%rdi,%rax,8) 34 | 35 | vmovupd 448(%rdx,%rax,8), %zmm1 36 | vfmadd213pd 448(%rsi,%rax,8), %zmm0, %zmm1 # zmm1=(zmm0*zmm1)+mem 37 | vmovupd %zmm1, 448(%rdi,%rax,8) 38 | addq $32, %rax 39 | cmpq $134217696, %rax # imm = 0x7FFFFE0 40 | jb .LBB0_1 41 | -------------------------------------------------------------------------------- /samples/Ch17_programming_for_fpgas/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Intel Corporation 2 | 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_book_sample( 6 | TARGET fig_17_9_fpga_selector 7 | SOURCES fig_17_9_fpga_selector.cpp) 8 | 9 | add_book_sample( 10 | TARGET fig_17_11_fpga_emulator_selector 11 | SOURCES fig_17_11_fpga_emulator_selector.cpp) 12 | 13 | add_book_sample( 14 | TARGET fig_17_17_ndrange_func 15 | SOURCES fig_17_17_ndrange_func.cpp) 16 | 17 | add_book_sample( 18 | TARGET fig_17_18_loop_func 19 | SOURCES fig_17_18_loop_func.cpp) 20 | 21 | add_book_sample( 22 | TARGET fig_17_20_loop_carried_deps 23 | SOURCES fig_17_20_loop_carried_deps.cpp) 24 | 25 | add_book_sample( 26 | TARGET fig_17_22_loop_carried_state 27 | SOURCES fig_17_22_loop_carried_state.cpp) 28 | 29 | if(NOT WITHCUDA AND NOT WITHROCM) 30 | # TEMPORARILY DISABLE: doesn't work with CUDA or ROCm backend. 31 | add_book_sample( 32 | TARGET fig_17_31_inter_kernel_pipe 33 | SOURCES fig_17_31_inter_kernel_pipe.cpp) 34 | endif() 35 | -------------------------------------------------------------------------------- /samples/Ch17_programming_for_fpgas/fig_17_11_fpga_emulator_selector.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include // For fpga_selector_v 6 | #include 7 | using namespace sycl; 8 | 9 | void say_device(const queue& q) { 10 | std::cout << "Device : " 11 | << q.get_device().get_info() 12 | << "\n"; 13 | } 14 | 15 | int main() { 16 | queue q{ext::intel::fpga_emulator_selector_v}; 17 | say_device(q); 18 | 19 | q.submit([&](handler& h) { 20 | h.parallel_for(1024, [=](auto idx) { 21 | // ... 22 | }); 23 | }); 24 | 25 | return 0; 26 | } 27 | -------------------------------------------------------------------------------- /samples/Ch17_programming_for_fpgas/fig_17_17_ndrange_func.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include // For fpga_emulator_selector_v 6 | #include 7 | using namespace sycl; 8 | 9 | int generate_random_number_from_ID(const id<3>& I) { 10 | return 0; // Useless non-RNG generator as proxy! 11 | }; 12 | 13 | int main() { 14 | queue q{ext::intel::fpga_emulator_selector_v}; 15 | 16 | buffer B{range{16, 16, 16}}; 17 | 18 | q.submit([&](handler& h) { 19 | accessor output(B, h); 20 | // BEGIN CODE SNIP 21 | h.parallel_for({16, 16, 16}, [=](auto I) { 22 | output[I] = generate_random_number_from_ID(I); 23 | }); 24 | // END CODE SNIP 25 | }); 26 | 27 | return 0; 28 | } 29 | -------------------------------------------------------------------------------- /samples/Ch17_programming_for_fpgas/fig_17_18_loop_func.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include // For fpga_emulator_selector_v 6 | #include 7 | using namespace sycl; 8 | 9 | int generate_random_number(const int& state) { 10 | return 0; // Useless non-RNG generator as proxy! 11 | }; 12 | 13 | int main() { 14 | constexpr int size = 64; 15 | queue q{ext::intel::fpga_emulator_selector_v}; 16 | 17 | buffer b{range{size}}; 18 | 19 | q.submit([&](handler& h) { 20 | accessor output(b, h); 21 | 22 | h.single_task([=]() { 23 | // BEGIN CODE SNIP 24 | int state = 0; 25 | for (int i = 0; i < size; i++) { 26 | state = generate_random_number(state); 27 | output[i] = state; 28 | } 29 | // END CODE SNIP 30 | }); 31 | }); 32 | 33 | return 0; 34 | } 35 | -------------------------------------------------------------------------------- /samples/Ch17_programming_for_fpgas/fig_17_20_loop_carried_deps.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include // For fpga_emulator_selector_v 6 | #include 7 | using namespace sycl; 8 | 9 | int generate_random_number(const int& state) { 10 | return 0; // Useless non-RNG generator as proxy! 11 | }; 12 | 13 | int main() { 14 | constexpr int size = 64; 15 | queue q{ext::intel::fpga_emulator_selector_v}; 16 | 17 | buffer b{range{size}}; 18 | 19 | q.submit([&](handler& h) { 20 | accessor output(b, h); 21 | 22 | h.single_task([=]() { 23 | // BEGIN CODE SNIP 24 | int a = 0; 25 | for (int i = 0; i < size; i++) { 26 | a = a + i; 27 | } 28 | // END CODE SNIP 29 | }); 30 | }); 31 | 32 | return 0; 33 | } 34 | -------------------------------------------------------------------------------- /samples/Ch17_programming_for_fpgas/fig_17_22_loop_carried_state.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include // For fpga_emulator_selector_v 6 | #include 7 | using namespace sycl; 8 | 9 | int generate_incremental_random_number(const int& state) { 10 | return 0; // Useless non-RNG generator as proxy! 11 | }; 12 | 13 | int main() { 14 | constexpr int size = 64; 15 | constexpr int seed = 0; 16 | 17 | queue q{ext::intel::fpga_emulator_selector_v}; 18 | 19 | buffer b{range{size}}; 20 | 21 | q.submit([&](handler& h) { 22 | accessor output(b, h); 23 | 24 | // BEGIN CODE SNIP 25 | h.single_task([=]() { 26 | int state = seed; 27 | for (int i = 0; i < size; i++) { 28 | state = generate_incremental_random_number(state); 29 | output[i] = state; 30 | } 31 | }); 32 | // END CODE SNIP 33 | }); 34 | 35 | return 0; 36 | } 37 | -------------------------------------------------------------------------------- /samples/Ch17_programming_for_fpgas/fig_17_31_inter_kernel_pipe.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include // For fpga_emulator_selector_v 7 | #include 8 | using namespace sycl; 9 | 10 | int main() { 11 | constexpr int count = 1024; 12 | std::array in_array; 13 | 14 | // Initialize input array 15 | for (int i = 0; i < count; i++) { 16 | in_array[i] = i; 17 | } 18 | 19 | // Buffer initialized from in_array (std::array) 20 | buffer b_in{in_array}; 21 | 22 | // Uninitialized buffer with count elements 23 | buffer b_out{range{count}}; 24 | 25 | // Acquire queue to emulated FPGA device 26 | queue q{ext::intel::fpga_emulator_selector_v}; 27 | 28 | // BEGIN CODE SNIP 29 | // Create alias for pipe type to be consistent across uses 30 | using my_pipe = ext::intel::pipe; 31 | 32 | // ND-range kernel 33 | q.submit([&](handler& h) { 34 | auto a = accessor(b_in, h); 35 | 36 | h.parallel_for( 37 | count, [=](auto idx) { my_pipe::write(a[idx]); }); 38 | }); 39 | 40 | // Single_task kernel 41 | q.submit([&](handler& h) { 42 | auto a = accessor(b_out, h); 43 | 44 | h.single_task([=]() { 45 | for (int i = 0; i < count; i++) { 46 | a[i] = my_pipe::read(); 47 | } 48 | }); 49 | }); 50 | 51 | // END CODE SNIP 52 | 53 | auto a = host_accessor(b_out); 54 | for (int i = 0; i < count; i++) { 55 | if (a[i] != i) { 56 | std::cout << "Failure on element " << i << "\n"; 57 | return 1; 58 | } 59 | } 60 | std::cout << "Passed!\n"; 61 | return 0; 62 | } 63 | -------------------------------------------------------------------------------- /samples/Ch17_programming_for_fpgas/fig_17_32.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | template 10 | class pipe; 11 | -------------------------------------------------------------------------------- /samples/Ch17_programming_for_fpgas/fig_17_33.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | // Blocking 9 | T read(); 10 | void write(const T &data); 11 | 12 | // Non-blocking 13 | T read(bool &success_code); 14 | void write(const T &data, bool &success_code); 15 | -------------------------------------------------------------------------------- /samples/Ch17_programming_for_fpgas/fig_17_9_fpga_selector.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include // For fpga_selector_v 6 | #include 7 | using namespace sycl; 8 | 9 | void say_device(const queue& q) { 10 | std::cout << "Device : " 11 | << q.get_device().get_info() 12 | << "\n"; 13 | } 14 | 15 | int main() { 16 | queue q{ext::intel::fpga_selector_v}; 17 | say_device(q); 18 | 19 | q.submit([&](handler& h) { 20 | h.parallel_for(1024, [=](auto idx) { 21 | // ... 22 | }); 23 | }); 24 | 25 | return 0; 26 | } 27 | -------------------------------------------------------------------------------- /samples/Ch18_libraries/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Intel Corporation 2 | 3 | # SPDX-License-Identifier: MIT 4 | 5 | if(NOT WITHCUDA) 6 | # TEMPORARILY DISABLE: doesn't work with CUDA backend. 7 | add_book_sample( 8 | TEST 9 | TARGET fig_18_1_builtin 10 | SOURCES fig_18_1_builtin.cpp) 11 | endif() 12 | 13 | add_book_sample( 14 | TEST 15 | TARGET fig_18_2_swap 16 | SOURCES fig_18_2_swap.cpp) 17 | 18 | if(NOT NODPL) 19 | add_book_sample( 20 | TEST 21 | TARGET fig_18_6_std_fill 22 | SOURCES fig_18_6_std_fill.cpp) 23 | endif() 24 | 25 | if(NOT NODPL) 26 | add_book_sample( 27 | TEST 28 | TARGET fig_18_7_std_fill_default_policy 29 | SOURCES fig_18_7_std_fill_default_policy.cpp) 30 | endif() 31 | 32 | if(NOT NODPL) 33 | add_book_sample( 34 | TEST 35 | TARGET fig_18_8_binary_search 36 | SOURCES fig_18_8_binary_search.cpp) 37 | endif() 38 | 39 | if(NOT NODPL) 40 | add_book_sample( 41 | TEST 42 | TARGET fig_18_9_pstl_usm_device 43 | SOURCES fig_18_9_pstl_usm_device.cpp) 44 | endif() 45 | 46 | if(NOT NODPL) 47 | add_book_sample( 48 | TEST 49 | TARGET fig_18_10_pstl_usm 50 | SOURCES fig_18_10_pstl_usm.cpp) 51 | endif() 52 | 53 | -------------------------------------------------------------------------------- /samples/Ch18_libraries/fig_18_10_pstl_usm.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | int main() { 10 | sycl::queue q; 11 | const int n = 10; 12 | sycl::usm_allocator alloc( 13 | q); 14 | std::vector vec(n, alloc); 15 | 16 | std::fill(oneapi::dpl::execution::make_device_policy(q), 17 | vec.begin(), vec.end(), 78); 18 | q.wait(); 19 | 20 | return 0; 21 | } 22 | -------------------------------------------------------------------------------- /samples/Ch18_libraries/fig_18_1_builtin.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | using namespace sycl; 10 | 11 | int main() { 12 | // BEGIN CODE SNIP 13 | constexpr int size = 9; 14 | std::array a; 15 | std::array b; 16 | 17 | bool pass = true; 18 | 19 | for (int i = 0; i < size; ++i) { 20 | a[i] = i; 21 | b[i] = i; 22 | } 23 | 24 | queue q; 25 | 26 | range sz{size}; 27 | 28 | buffer bufA(a); 29 | buffer bufB(b); 30 | buffer bufP(&pass, 1); 31 | 32 | q.submit([&](handler &h) { 33 | accessor accA{bufA, h}; 34 | accessor accB{bufB, h}; 35 | accessor accP{bufP, h}; 36 | 37 | h.parallel_for(size, [=](id<1> idx) { 38 | accA[idx] = std::log(accA[idx]); 39 | accB[idx] = sycl::log(accB[idx]); 40 | if (!sycl::isequal(accA[idx], accB[idx])) { 41 | accP[0] = false; 42 | } 43 | }); 44 | }); 45 | // END CODE SNIP 46 | 47 | host_accessor host_P(bufP); 48 | 49 | if (host_P[0]) { 50 | std::cout << "Matched\n"; 51 | } else { 52 | std::cout << "Unmatched\n"; 53 | } 54 | return 0; 55 | } 56 | -------------------------------------------------------------------------------- /samples/Ch18_libraries/fig_18_2_swap.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | using namespace sycl; 10 | 11 | int main() { 12 | std::array arr{8, 9}; 13 | buffer buf{arr}; 14 | 15 | { 16 | host_accessor host_A(buf); 17 | std::cout << "Before: " << host_A[0] << ", " 18 | << host_A[1] << "\n"; 19 | } // End scope of host_A so that upcoming kernel can 20 | // operate on buf 21 | 22 | queue q; 23 | q.submit([&](handler &h) { 24 | accessor a{buf, h}; 25 | h.single_task([=]() { 26 | // Call std::swap! 27 | std::swap(a[0], a[1]); 28 | }); 29 | }); 30 | 31 | host_accessor host_B(buf); 32 | std::cout << "After: " << host_B[0] << ", " << host_B[1] 33 | << "\n"; 34 | return 0; 35 | } 36 | 37 | // Sample output: 38 | // 8, 9 39 | // 9, 8 40 | 41 | -------------------------------------------------------------------------------- /samples/Ch18_libraries/fig_18_5.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | auto policy_b = device_policy{ 10 | sycl::device{sycl::gpu_selector{}}}; 11 | std::for_each(policy_b, …); 12 | auto policy_c = 13 | device_policy{sycl::default_selector{}}; 15 | std::for_each(policy_c, …); 16 | auto policy_d = 17 | make_device_policy(default_policy); 18 | std::for_each(policy_d, …); 19 | auto policy_e = 20 | make_device_policy(sycl::queue{}); 21 | std::for_each(policy_e, …); 22 | -------------------------------------------------------------------------------- /samples/Ch18_libraries/fig_18_6_std_fill.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // ------------------------------------------------------- 6 | // Changed from Book: 7 | // old naming dpstd:: is now oneapi::dpl:: 8 | // ------------------------------------------------------- 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | int main() { 16 | sycl::queue q; 17 | sycl::buffer buf{1000}; 18 | 19 | auto buf_begin = oneapi::dpl::begin(buf); 20 | auto buf_end = oneapi::dpl::end(buf); 21 | 22 | auto policy = oneapi::dpl::execution::make_device_policy< 23 | class fill>(q); 24 | std::fill(policy, buf_begin, buf_end, 42); 25 | 26 | return 0; 27 | } 28 | -------------------------------------------------------------------------------- /samples/Ch18_libraries/fig_18_7_std_fill_default_policy.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | int main() { 11 | std::vector v(100000); 12 | std::fill(oneapi::dpl::execution::dpcpp_default, 13 | v.begin(), v.end(), 42); 14 | 15 | if (v[788] == 42) 16 | std::cout << "passed" << std::endl; 17 | else 18 | std::cout << "failed" << std::endl; 19 | 20 | return 0; 21 | } 22 | -------------------------------------------------------------------------------- /samples/Ch18_libraries/fig_18_8_binary_search.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // clang-format off 6 | #include 7 | // clang-format on 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | using namespace sycl; 14 | 15 | int main() { 16 | buffer kB{range<1>(10)}; 17 | buffer vB{range<1>(5)}; 18 | buffer rB{range<1>(5)}; 19 | { 20 | host_accessor k{kB}; 21 | host_accessor v{vB}; 22 | 23 | // Initialize data, sorted 24 | k[0] = 0; 25 | k[1] = 5; 26 | k[2] = 6; 27 | k[3] = 6; 28 | k[4] = 7; 29 | k[5] = 7; 30 | k[6] = 8; 31 | k[7] = 8; 32 | k[8] = 9; 33 | k[9] = 9; 34 | 35 | v[0] = 1; 36 | v[1] = 6; 37 | v[2] = 3; 38 | v[3] = 7; 39 | v[4] = 8; 40 | } 41 | 42 | // create dpc++ iterators 43 | auto k_beg = oneapi::dpl::begin(kB); 44 | auto k_end = oneapi::dpl::end(kB); 45 | auto v_beg = oneapi::dpl::begin(vB); 46 | auto v_end = oneapi::dpl::end(vB); 47 | auto r_beg = oneapi::dpl::begin(rB); 48 | 49 | // create named policy from existing one 50 | auto policy = oneapi::dpl::execution::make_device_policy< 51 | class bSearch>(oneapi::dpl::execution::dpcpp_default); 52 | 53 | // call algorithm 54 | oneapi::dpl::binary_search(policy, k_beg, k_end, v_beg, 55 | v_end, r_beg); 56 | 57 | // check data 58 | host_accessor r{rB}; 59 | if ((r[0] == false) && (r[1] == true) && 60 | (r[2] == false) && (r[3] == true) && (r[4] == true)) { 61 | std::cout << "Passed. \nRun on " 62 | << policy.queue() 63 | .get_device() 64 | .get_info() 65 | << "\n"; 66 | } else 67 | std::cout << "failed: values do not match.\n"; 68 | 69 | return 0; 70 | } 71 | -------------------------------------------------------------------------------- /samples/Ch18_libraries/fig_18_9_pstl_usm_device.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | int main() { 10 | sycl::queue q; 11 | const int n = 10; 12 | int* h_head = sycl::malloc_host(n, q); 13 | int* d_head = sycl::malloc_device(n, q); 14 | std::fill(oneapi::dpl::execution::make_device_policy(q), 15 | d_head, d_head + n, 78); 16 | q.wait(); 17 | 18 | q.memcpy(h_head, d_head, n * sizeof(int)); 19 | q.wait(); 20 | 21 | if (h_head[8] == 78) 22 | std::cout << "passed" << std::endl; 23 | else 24 | std::cout << "failed" << std::endl; 25 | 26 | sycl::free(h_head, q); 27 | sycl::free(d_head, q); 28 | return 0; 29 | } 30 | -------------------------------------------------------------------------------- /samples/Ch19_memory_model_and_atomics/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Intel Corporation 2 | 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_book_sample( 6 | TARGET fig_19_3_data_race 7 | SOURCES fig_19_3_data_race.cpp) 8 | 9 | add_book_sample( 10 | TEST 11 | TARGET fig_19_6_avoid_data_race_with_barrier 12 | SOURCES fig_19_6_avoid_data_race_with_barrier.cpp) 13 | 14 | add_book_sample( 15 | TEST 16 | TARGET fig_19_7_avoid_data_race_with_atomics 17 | SOURCES fig_19_7_avoid_data_race_with_atomics.cpp) 18 | 19 | add_book_sample( 20 | TEST 21 | TARGET fig_19_15_buffer_and_atomic_ref 22 | SOURCES fig_19_15_buffer_and_atomic_ref.cpp) 23 | 24 | add_book_sample( 25 | TEST 26 | TARGET fig_19_16_usm_and_atomic_ref 27 | SOURCES fig_19_16_usm_and_atomic_ref.cpp) 28 | 29 | add_book_sample( 30 | TEST 31 | TARGET fig_19_17_histogram 32 | SOURCES fig_19_17_histogram.cpp) 33 | 34 | add_book_sample( 35 | TARGET fig_19_18-19_device_latch 36 | SOURCES fig_19_18-19_device_latch.cpp) 37 | -------------------------------------------------------------------------------- /samples/Ch19_memory_model_and_atomics/fig_19_11.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | template 11 | class atomic_ref { 12 | public: 13 | using value_type = T; 14 | static constexpr size_t required_alignment = 15 | /* implementation-defined */; 16 | static constexpr bool is_always_lock_free = 17 | /* implementation-defined */; 18 | static constexpr memory_order default_read_order = 19 | memory_order_traits::read_order; 20 | static constexpr memory_order default_write_order = 21 | memory_order_traits::write_order; 22 | static constexpr memory_order 23 | default_read_modify_write_order = DefaultOrder; 24 | static constexpr memory_scope default_scope = 25 | DefaultScope; 26 | 27 | explicit atomic_ref(T& obj); 28 | atomic_ref(const atomic_ref& ref) noexcept; 29 | }; 30 | -------------------------------------------------------------------------------- /samples/Ch19_memory_model_and_atomics/fig_19_12.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | void store( 9 | T operand, memory_order order = default_write_order, 10 | memory_scope scope = default_scope) const noexcept; 11 | T operator=( 12 | T desired) const noexcept; // equivalent to store 13 | 14 | T load(memory_order order = default_read_order, 15 | memory_scope scope = default_scope) const noexcept; 16 | operator T() const noexcept; // equivalent to load 17 | 18 | T exchange( 19 | T operand, 20 | memory_order order = default_read_modify_write_order, 21 | memory_scope scope = default_scope) const noexcept; 22 | 23 | bool compare_exchange_weak( 24 | T &expected, T desired, memory_order success, 25 | memory_order failure, 26 | memory_scope scope = default_scope) const noexcept; 27 | 28 | bool compare_exchange_weak( 29 | T &expected, T desired, 30 | memory_order order = default_read_modify_write_order, 31 | memory_scope scope = default_scope) const noexcept; 32 | 33 | bool compare_exchange_strong( 34 | T &expected, T desired, memory_order success, 35 | memory_order failure, 36 | memory_scope scope = default_scope) const noexcept; 37 | 38 | bool compare_exchange_strong( 39 | T &expected, T desired, 40 | memory_order order = default_read_modify_write_order, 41 | memory_scope scope = default_scope) const noexcept; 42 | -------------------------------------------------------------------------------- /samples/Ch19_memory_model_and_atomics/fig_19_13.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | Integral fetch_add( 9 | Integral operand, 10 | memory_order order = default_read_modify_write_order, 11 | memory_scope scope = default_scope) const noexcept; 12 | 13 | Integral fetch_sub( 14 | Integral operand, 15 | memory_order order = default_read_modify_write_order, 16 | memory_scope scope = default_scope) const noexcept; 17 | 18 | Integral fetch_and( 19 | Integral operand, 20 | memory_order order = default_read_modify_write_order, 21 | memory_scope scope = default_scope) const noexcept; 22 | 23 | Integral fetch_or( 24 | Integral operand, 25 | memory_order order = default_read_modify_write_order, 26 | memory_scope scope = default_scope) const noexcept; 27 | 28 | Integral fetch_min( 29 | Integral operand, 30 | memory_order order = default_read_modify_write_order, 31 | memory_scope scope = default_scope) const noexcept; 32 | 33 | Integral fetch_max( 34 | Integral operand, 35 | memory_order order = default_read_modify_write_order, 36 | memory_scope scope = default_scope) const noexcept; 37 | 38 | Integral operator++(int) const noexcept; 39 | Integral operator--(int) const noexcept; 40 | Integral operator++() const noexcept; 41 | Integral operator--() const noexcept; 42 | Integral operator+=(Integral) const noexcept; 43 | Integral operator-=(Integral) const noexcept; 44 | Integral operator&=(Integral) const noexcept; 45 | Integral operator|=(Integral) const noexcept; 46 | -------------------------------------------------------------------------------- /samples/Ch19_memory_model_and_atomics/fig_19_14.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | Floating fetch_add( 9 | Floating operand, 10 | memory_order order = default_read_modify_write_order, 11 | memory_scope scope = default_scope) const noexcept; 12 | 13 | Floating fetch_sub( 14 | Floating operand, 15 | memory_order order = default_read_modify_write_order, 16 | memory_scope scope = default_scope) const noexcept; 17 | 18 | Floating fetch_min( 19 | Floating operand, 20 | memory_order order = default_read_modify_write_order, 21 | memory_scope scope = default_scope) const noexcept; 22 | 23 | Floating fetch_max( 24 | Floating operand, 25 | memory_order order = default_read_modify_write_order, 26 | memory_scope scope = default_scope) const noexcept; 27 | 28 | Floating operator+=(Floating) const noexcept; 29 | Floating operator-=(Floating) const noexcept; 30 | -------------------------------------------------------------------------------- /samples/Ch19_memory_model_and_atomics/fig_19_15_buffer_and_atomic_ref.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace sycl; 10 | 11 | int main() { 12 | queue q; 13 | 14 | const size_t N = 32; 15 | const size_t M = 4; 16 | std::vector data(N); 17 | std::fill(data.begin(), data.end(), 0); 18 | 19 | { 20 | buffer buf(data); 21 | 22 | // BEGIN CODE SNIP 23 | q.submit([&](handler& h) { 24 | accessor acc{buf, h}; 25 | h.parallel_for(N, [=](id<1> i) { 26 | int j = i % M; 27 | atomic_ref 30 | atomic_acc(acc[j]); 31 | atomic_acc += 1; 32 | }); 33 | }); 34 | // END CODE SNIP 35 | } 36 | 37 | for (int i = 0; i < N; ++i) { 38 | std::cout << "data [" << i << "] = " << data[i] << "\n"; 39 | } 40 | 41 | bool passed = true; 42 | int* gold = (int*)malloc(N * sizeof(int)); 43 | std::fill(gold, gold + N, 0); 44 | for (int i = 0; i < N; ++i) { 45 | int j = i % M; 46 | gold[j] += 1; 47 | } 48 | for (int i = 0; i < N; ++i) { 49 | if (data[i] != gold[i]) { 50 | passed = false; 51 | } 52 | } 53 | std::cout << ((passed) ? "SUCCESS\n" : "FAILURE\n"); 54 | free(gold); 55 | return (passed) ? 0 : 1; 56 | } 57 | -------------------------------------------------------------------------------- /samples/Ch19_memory_model_and_atomics/fig_19_16_usm_and_atomic_ref.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // The contents of this file are identical to 6 | // fig_19_7_avoid_data_race_with_atomics.cpp. 7 | // The figure is reproduced in the book for readability, 8 | // and duplicated here to avoid confusion. 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | using namespace sycl; 15 | 16 | int main() { 17 | queue q; 18 | 19 | const size_t N = 32; 20 | const size_t M = 4; 21 | 22 | int* data = malloc_shared(N, q); 23 | std::fill(data, data + N, 0); 24 | 25 | // BEGIN CODE SNIP 26 | q.parallel_for(N, [=](id<1> i) { 27 | int j = i % M; 28 | atomic_ref 31 | atomic_data(data[j]); 32 | atomic_data += 1; 33 | }).wait(); 34 | // END CODE SNIP 35 | 36 | for (int i = 0; i < N; ++i) { 37 | std::cout << "data [" << i << "] = " << data[i] << "\n"; 38 | } 39 | bool passed = true; 40 | int* gold = (int*)malloc(N * sizeof(int)); 41 | std::fill(gold, gold + N, 0); 42 | for (int i = 0; i < N; ++i) { 43 | int j = i % M; 44 | gold[j] += 1; 45 | } 46 | for (int i = 0; i < N; ++i) { 47 | if (data[i] != gold[i]) { 48 | passed = false; 49 | } 50 | } 51 | std::cout << ((passed) ? "SUCCESS\n" : "FAILURE\n"); 52 | free(gold); 53 | free(data, q); 54 | return (passed) ? 0 : 1; 55 | } 56 | -------------------------------------------------------------------------------- /samples/Ch19_memory_model_and_atomics/fig_19_3_data_race.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace sycl; 10 | 11 | int main() { 12 | queue q; 13 | 14 | const size_t N = 32; 15 | const size_t M = 4; 16 | 17 | // BEGIN CODE SNIP 18 | int* data = malloc_shared(N, q); 19 | std::fill(data, data + N, 0); 20 | 21 | q.parallel_for(N, [=](id<1> i) { 22 | int j = i % M; 23 | data[j] += 1; 24 | }).wait(); 25 | 26 | for (int i = 0; i < N; ++i) { 27 | std::cout << "data [" << i << "] = " << data[i] << "\n"; 28 | } 29 | // END CODE SNIP 30 | 31 | bool passed = true; 32 | int* gold = (int*)malloc(N * sizeof(int)); 33 | std::fill(gold, gold + N, 0); 34 | for (int i = 0; i < N; ++i) { 35 | int j = i % M; 36 | gold[j] += 1; 37 | } 38 | for (int i = 0; i < N; ++i) { 39 | if (data[i] != gold[i]) { 40 | passed = false; 41 | } 42 | } 43 | std::cout << ((passed) ? "SUCCESS\n" : "FAILURE\n"); 44 | free(gold); 45 | free(data, q); 46 | return (passed) ? 0 : 1; 47 | } 48 | 49 | 50 | // N = 2, M = 2: 51 | // data [0] = 1 52 | // data [1] = 1 53 | // 54 | // N = 2, M = 1: 55 | // data [0] = 1 56 | // data [1] = 0 57 | 58 | -------------------------------------------------------------------------------- /samples/Ch19_memory_model_and_atomics/fig_19_6_avoid_data_race_with_barrier.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace sycl; 10 | 11 | int main() { 12 | queue q; 13 | 14 | const uint32_t N = 32; 15 | const uint32_t M = 4; 16 | 17 | // BEGIN CODE SNIP 18 | int* data = malloc_shared(N, q); 19 | std::fill(data, data + N, 0); 20 | 21 | // Launch exactly one work-group 22 | // Number of work-groups = global / local 23 | range<1> global{N}; 24 | range<1> local{N}; 25 | 26 | q.parallel_for(nd_range<1>{global, local}, 27 | [=](nd_item<1> it) { 28 | int i = it.get_global_id(0); 29 | int j = i % M; 30 | for (int round = 0; round < N; ++round) { 31 | // Allow exactly one work-item update 32 | // per round 33 | if (i == round) { 34 | data[j] += 1; 35 | } 36 | group_barrier(it.get_group()); 37 | } 38 | }) 39 | .wait(); 40 | 41 | for (int i = 0; i < N; ++i) { 42 | std::cout << "data [" << i << "] = " << data[i] << "\n"; 43 | } 44 | // END CODE SNIP 45 | 46 | bool passed = true; 47 | int* gold = (int*)malloc(N * sizeof(int)); 48 | std::fill(gold, gold + N, 0); 49 | for (int i = 0; i < N; ++i) { 50 | int j = i % M; 51 | gold[j] += 1; 52 | } 53 | for (int i = 0; i < N; ++i) { 54 | if (data[i] != gold[i]) { 55 | passed = false; 56 | } 57 | } 58 | std::cout << ((passed) ? "SUCCESS\n" : "FAILURE\n"); 59 | free(gold); 60 | free(data, q); 61 | return (passed) ? 0 : 1; 62 | } 63 | -------------------------------------------------------------------------------- /samples/Ch19_memory_model_and_atomics/fig_19_7_avoid_data_race_with_atomics.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace sycl; 10 | 11 | int main() { 12 | queue q; 13 | 14 | const size_t N = 32; 15 | const size_t M = 4; 16 | 17 | // BEGIN CODE SNIP 18 | int* data = malloc_shared(N, q); 19 | std::fill(data, data + N, 0); 20 | 21 | q.parallel_for(N, [=](id<1> i) { 22 | int j = i % M; 23 | atomic_ref 26 | atomic_data(data[j]); 27 | atomic_data += 1; 28 | }).wait(); 29 | 30 | for (int i = 0; i < N; ++i) { 31 | std::cout << "data [" << i << "] = " << data[i] << "\n"; 32 | } 33 | // END CODE SNIP 34 | 35 | bool passed = true; 36 | int* gold = (int*)malloc(N * sizeof(int)); 37 | std::fill(gold, gold + N, 0); 38 | for (int i = 0; i < N; ++i) { 39 | int j = i % M; 40 | gold[j] += 1; 41 | } 42 | for (int i = 0; i < N; ++i) { 43 | if (data[i] != gold[i]) { 44 | passed = false; 45 | } 46 | } 47 | std::cout << ((passed) ? "SUCCESS\n" : "FAILURE\n"); 48 | free(gold); 49 | free(data, q); 50 | return (passed) ? 0 : 1; 51 | } 52 | -------------------------------------------------------------------------------- /samples/Ch20_backend_interoperability/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Intel Corporation 2 | 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_book_sample( 6 | TEST 7 | TARGET fig_20_2_querying_backends 8 | SOURCES fig_20_2_querying_backends.cpp) 9 | 10 | add_book_sample( 11 | #TEST # disable temporarily due to bug 12 | TARGET fig_20_3_opencl_to_sycl 13 | SOURCES fig_20_3_opencl_to_sycl.cpp 14 | LIBS OpenCL) 15 | 16 | if(NOT NOL0) 17 | add_book_sample( 18 | #TEST # disable temporarily due to bug 19 | TARGET fig_20_4_level_zero_to_sycl 20 | SOURCES fig_20_4_level_zero_to_sycl.cpp 21 | LIBS ze_loader) 22 | endif() 23 | 24 | add_book_sample( 25 | TEST 26 | TARGET fig_20_5_sycl_to_opencl 27 | SOURCES fig_20_5_sycl_to_opencl.cpp 28 | LIBS OpenCL) 29 | 30 | if(NOT NOL0) 31 | add_book_sample( 32 | TEST 33 | TARGET fig_20_6_sycl_to_level_zero 34 | SOURCES fig_20_6_sycl_to_level_zero.cpp 35 | LIBS ze_loader) 36 | endif() 37 | 38 | add_book_sample( 39 | TEST 40 | TARGET fig_20_7_interop_handle_opencl 41 | SOURCES fig_20_7_interop_handle_opencl.cpp 42 | LIBS OpenCL) 43 | 44 | if(NOT NOL0) 45 | add_book_sample( 46 | TEST 47 | TARGET fig_20_8_interop_handle_level_zero 48 | SOURCES fig_20_8_interop_handle_level_zero.cpp 49 | LIBS ze_loader) 50 | endif() 51 | 52 | add_book_sample( 53 | TEST 54 | TARGET fig_20_9_opencl_kernel_interop 55 | SOURCES fig_20_9_opencl_kernel_interop.cpp 56 | LIBS OpenCL) 57 | 58 | if(NOT NOL0) 59 | add_book_sample( 60 | TEST 61 | TARGET fig_20_10_level_zero_kernel_interop 62 | SOURCES fig_20_10_level_zero_kernel_interop.cpp 63 | LIBS ze_loader) 64 | endif() 65 | 66 | add_book_sample( 67 | TEST 68 | TARGET fig_20_11_filter_selector 69 | SOURCES fig_20_11_filter_selector.cpp) 70 | -------------------------------------------------------------------------------- /samples/Ch20_backend_interoperability/fig_20_11_filter_selector.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | using namespace sycl; 8 | 9 | int main() { 10 | auto find_device = [](backend b, 11 | info::device_type t = 12 | info::device_type::all) { 13 | for (auto d : device::get_devices(t)) { 14 | if (d.get_backend() == b) { 15 | return d; 16 | } 17 | } 18 | throw sycl::exception(errc::runtime, 19 | "Could not find a device with " 20 | "the requested backend!"); 21 | }; 22 | 23 | try { 24 | device d{find_device(backend::opencl)}; 25 | std::cout << "Found an OpenCL SYCL device: " 26 | << d.get_info() << "\n"; 27 | } catch (const sycl::exception &e) { 28 | std::cout << "No OpenCL SYCL devices were found.\n"; 29 | } 30 | 31 | try { 32 | device d{find_device(backend::ext_oneapi_level_zero)}; 33 | std::cout << "Found a Level Zero SYCL device: " 34 | << d.get_info() << "\n"; 35 | } catch (const sycl::exception &e) { 36 | std::cout << "No Level Zero SYCL devices were found.\n"; 37 | } 38 | 39 | return 0; 40 | } 41 | 42 | // Example Output: 43 | // Found an OpenCL SYCL device: pthread-12th Gen Intel(R) Core(TM) i9-12900K 44 | // Found a Level Zero SYCL device: Intel(R) UHD Graphics 770 [0x4680] 45 | 46 | -------------------------------------------------------------------------------- /samples/Ch20_backend_interoperability/fig_20_2_querying_backends.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | using namespace sycl; 8 | 9 | int main() { 10 | for (auto& p : platform::get_platforms()) { 11 | std::cout << "SYCL Platform: " 12 | << p.get_info() 13 | << " is associated with SYCL Backend: " 14 | << p.get_backend() << std::endl; 15 | } 16 | return 0; 17 | } 18 | 19 | // Example Output: 20 | // SYCL Platform: Portable Computing Language is associated with SYCL Backend: opencl 21 | // SYCL Platform: Intel(R) OpenCL HD Graphics is associated with SYCL Backend: opencl 22 | // SYCL Platform: Intel(R) OpenCL is associated with SYCL Backend: opencl 23 | // SYCL Platform: Intel(R) FPGA Emulation Platform for OpenCL(TM) is associated with SYCL Backend: opencl 24 | // SYCL Platform: Intel(R) Level-Zero is associated with SYCL Backend: ext_oneapi_level_zero 25 | // SYCL Platform: NVIDIA CUDA BACKEND is associated with SYCL Backend: ext_oneapi_cuda 26 | // SYCL Platform: AMD HIP BACKEND is associated with SYCL Backend: ext_oneapi_hip 27 | -------------------------------------------------------------------------------- /samples/Ch21_migrating_cuda_code/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Intel Corporation 2 | 3 | # SPDX-License-Identifier: MIT 4 | 5 | add_book_sample( 6 | TEST 7 | TARGET fig_21_1_basicsycl 8 | SOURCES fig_21_1_basicsycl.cpp) 9 | 10 | add_book_sample( 11 | TEST 12 | TARGET fig_21_5_walkorder 13 | SOURCES fig_21_5_walkorder.cpp) 14 | 15 | add_book_sample( 16 | TARGET fig_21_7_possible_deadlock 17 | SOURCES fig_21_7_possible_deadlock.cpp) 18 | 19 | add_book_sample( 20 | TEST 21 | TARGET fig_21_8_barriers 22 | SOURCES fig_21_8_barriers.cpp) 23 | 24 | add_book_sample( 25 | TEST 26 | TARGET fig_21_9_atomics 27 | SOURCES fig_21_9_atomics.cpp) 28 | 29 | if(NOT NODPCT) 30 | add_book_sample( 31 | TEST 32 | TARGET fig_21_13-14_reverse_migrated 33 | SOURCES fig_21_13-14_reverse_migrated.cpp) 34 | endif() 35 | 36 | # Note: we do not currently build these pure CUDA samples: 37 | # fig_21_2_basiccuda.cu 38 | # fig_21_4-6_walkorder.cu 39 | # fig_21_10_reverse.cu 40 | -------------------------------------------------------------------------------- /samples/Ch21_migrating_cuda_code/fig_21_10_reverse.cu: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | constexpr size_t size = 1024 * 1024; 12 | 13 | // BEGIN CODE SNIP 14 | __shared__ int scratch[256]; 15 | __global__ void Reverse(int* ptr, size_t size) { 16 | auto gid = blockIdx.x * blockDim.x + threadIdx.x; 17 | auto lid = threadIdx.x; 18 | 19 | scratch[lid] = ptr[gid]; 20 | __syncthreads(); 21 | ptr[gid] = scratch[256 - lid - 1]; 22 | } 23 | 24 | int main() { 25 | std::vector data(size); 26 | std::iota(data.begin(), data.end(), 0); 27 | 28 | cudaDeviceProp deviceProp; 29 | cudaGetDeviceProperties(&deviceProp, 0); 30 | std::cout << "Running on device: " << deviceProp.name << "\n"; 31 | 32 | int* ptr = nullptr; 33 | cudaMalloc(&ptr, size * sizeof(int)); 34 | cudaMemcpy(ptr, data.data(), size * sizeof(int), 35 | cudaMemcpyDefault); 36 | Reverse<<>>(ptr, size); 37 | cudaError_t result = cudaDeviceSynchronize(); 38 | if (result != cudaSuccess) { 39 | std::cout << "An error occurred!\n"; 40 | } 41 | // ... 42 | // END CODE SNIP 43 | 44 | cudaMemcpy(data.data(), ptr, size * sizeof(int), 45 | cudaMemcpyDefault); 46 | 47 | for (size_t s = 0; s < size; s += 256) { 48 | for (size_t i = 0; i < 256; i++) { 49 | auto got = data[s + i]; 50 | auto want = s + 256 - i - 1; 51 | if (got != want) { 52 | std::cout << "Mismatch at index " << s + i << ", got " 53 | << got << ", wanted " << want << "\n"; 54 | return -1; 55 | } 56 | } 57 | } 58 | 59 | cudaFree(ptr); 60 | std::cout << "Success.\n"; 61 | return 0; 62 | } 63 | -------------------------------------------------------------------------------- /samples/Ch21_migrating_cuda_code/fig_21_1_basicsycl.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | using namespace sycl; 8 | 9 | constexpr int count = 1024 * 1024; 10 | 11 | int main() { 12 | // BEGIN CODE SNIP 13 | // Declare an in-order SYCL queue for the default device 14 | queue q{property::queue::in_order()}; 15 | std::cout << "Running on device: " 16 | << q.get_device().get_info() 17 | << "\n"; 18 | 19 | int* buffer = malloc_host(count, q); 20 | q.fill(buffer, 0, count); 21 | 22 | q.parallel_for(count, [=](auto id) { 23 | buffer[id] = id; 24 | }).wait(); 25 | // END CODE SNIP 26 | 27 | int mismatches = 0; 28 | for (int i = 0; i < count; i++) { 29 | if (buffer[i] != i) { 30 | mismatches++; 31 | } 32 | } 33 | if (mismatches) { 34 | std::cout << "Found " << mismatches 35 | << " mismatches out of " << count 36 | << " elements.\n"; 37 | } else { 38 | std::cout << "Success.\n"; 39 | } 40 | 41 | free(buffer, q); 42 | return 0; 43 | } 44 | -------------------------------------------------------------------------------- /samples/Ch21_migrating_cuda_code/fig_21_2_basiccuda.cu: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | 7 | #include 8 | 9 | constexpr int count = 1024 * 1024; 10 | 11 | // BEGIN CODE SNIP 12 | // The CUDA kernel is a separate function 13 | __global__ void TestKernel(int* dst) { 14 | auto id = blockIdx.x * blockDim.x + threadIdx.x; 15 | dst[id] = id; 16 | } 17 | 18 | int main() { 19 | // CUDA uses device zero by default 20 | cudaDeviceProp deviceProp; 21 | cudaGetDeviceProperties(&deviceProp, 0); 22 | std::cout << "Running on device: " << deviceProp.name << "\n"; 23 | 24 | int* buffer = nullptr; 25 | cudaMallocHost(&buffer, count * sizeof(int)); 26 | cudaMemset(buffer, 0, count * sizeof(int)); 27 | 28 | TestKernel<<>>(buffer); 29 | cudaDeviceSynchronize(); 30 | // ... 31 | // END CODE SNIP 32 | 33 | int mismatches = 0; 34 | for (int i = 0; i < count; i++) { 35 | if (buffer[i] != i) { 36 | mismatches++; 37 | } 38 | } 39 | if (mismatches) { 40 | std::cout << "Found " << mismatches << " mismatches out of " 41 | << count << " elements.\n"; 42 | } else { 43 | std::cout << "Success.\n"; 44 | } 45 | 46 | cudaFreeHost(buffer); 47 | return 0; 48 | } 49 | -------------------------------------------------------------------------------- /samples/Ch21_migrating_cuda_code/fig_21_4-6_walkorder.cu: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | constexpr int count = 16 * 2; 11 | 12 | __device__ int get_global_linear_id() { 13 | auto blockId = gridDim.x * blockIdx.y + blockIdx.x; 14 | return blockId * blockDim.y * blockDim.x + 15 | threadIdx.y * blockDim.x + 16 | threadIdx.x; 17 | } 18 | 19 | // BEGIN CODE SNIP #1, Part 1/2 20 | __global__ void ExchangeKernel(int* dst) { 21 | auto index = get_global_linear_id(); // helper function 22 | auto fastest = threadIdx.x; 23 | auto neighbor = __shfl_xor_sync(0xFFFFFFFF, fastest, 1); 24 | dst[index] = neighbor; 25 | } 26 | // END CODE SNIP #1, Part 1/2 27 | 28 | // BEGIN CODE SNIP #2 29 | __global__ void ExchangeKernelCoopGroups(int* dst) { 30 | namespace cg = cooperative_groups; 31 | auto index = cg::this_grid().thread_rank(); 32 | auto fastest = threadIdx.x; 33 | auto warp = cg::tiled_partition<32>(cg::this_thread_block()); 34 | auto neighbor = warp.shfl_xor(fastest, 1); 35 | dst[index] = neighbor; 36 | } 37 | // END CODE SNIP #2 38 | 39 | int main() { 40 | cudaDeviceProp deviceProp; 41 | cudaGetDeviceProperties(&deviceProp, 0); 42 | std::cout << "Running on device: " << deviceProp.name << "\n"; 43 | 44 | int* buffer = nullptr; 45 | cudaMallocHost(&buffer, count * sizeof(int)); 46 | cudaMemset(buffer, 0, count * sizeof(int)); 47 | 48 | #if 0 49 | // BEGIN CODE SNIP #1, Part 2/2 50 | dim3 threadsPerBlock(16, 2); 51 | ExchangeKernel<<<1, threadsPerBlock>>>(buffer); 52 | cudaDeviceSynchronize(); 53 | // END CODE SNIP #1, Part 2/2 54 | #else 55 | dim3 threadsPerBlock(16, 2); 56 | ExchangeKernelCoopGroups<<<1, threadsPerBlock>>>(buffer); 57 | cudaDeviceSynchronize(); 58 | #endif 59 | 60 | int unexpected = 0; 61 | for (int i = 0; i < count; i+=2) { 62 | if (buffer[i] == buffer[i+1]) { 63 | unexpected++; 64 | } 65 | } 66 | if (unexpected) { 67 | std::cout << "Error, found " << unexpected << " matching pairs.\n"; 68 | } else { 69 | std::cout << "Success.\n"; 70 | } 71 | 72 | cudaFreeHost(buffer); 73 | return 0; 74 | } 75 | -------------------------------------------------------------------------------- /samples/Ch21_migrating_cuda_code/fig_21_5_walkorder.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | using namespace sycl; 8 | 9 | constexpr int count = 16 * 2; 10 | 11 | int main() { 12 | queue q{property::queue::in_order()}; 13 | std::cout << "Running on device: " 14 | << q.get_device().get_info() 15 | << "\n"; 16 | 17 | int* buffer = malloc_host(count, q); 18 | q.fill(buffer, 0, count); 19 | 20 | // BEGIN CODE SNIP 21 | q.parallel_for(nd_range<2>{{2, 16}, {2, 16}}, 22 | [=](auto item) { 23 | auto index = item.get_global_linear_id(); 24 | auto fastest = item.get_local_id(1); 25 | auto sg = item.get_sub_group(); 26 | auto neighbor = 27 | permute_group_by_xor(sg, fastest, 1); 28 | buffer[index] = neighbor; 29 | }) 30 | .wait(); 31 | // END CODE SNIP 32 | 33 | int unexpected = 0; 34 | for (int i = 0; i < count; i += 2) { 35 | if (buffer[i] == buffer[i + 1]) { 36 | unexpected++; 37 | } 38 | } 39 | if (unexpected) { 40 | std::cout << "Error, found " << unexpected 41 | << " matching pairs.\n"; 42 | } else { 43 | std::cout << "Success.\n"; 44 | } 45 | 46 | free(buffer, q); 47 | return 0; 48 | } 49 | -------------------------------------------------------------------------------- /samples/Ch21_migrating_cuda_code/fig_21_7_possible_deadlock.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | using namespace sycl; 8 | 9 | constexpr int count = 10; 10 | 11 | int main() { 12 | queue q{property::queue::in_order()}; 13 | std::cout << "Running on device: " 14 | << q.get_device().get_info() 15 | << "\n"; 16 | 17 | int* buffer = malloc_host(count, q); 18 | q.fill(buffer, 0, count); 19 | 20 | // BEGIN CODE SNIP 21 | std::cout << "WARNING: May deadlock on some devices!\n"; 22 | q.parallel_for(nd_range<1>{64, 64}, [=](auto item) { 23 | int id = item.get_global_id(0); 24 | if (id >= count) { 25 | return; // early exit 26 | } 27 | group_barrier(item.get_group()); 28 | buffer[id] = id; 29 | }).wait(); 30 | // END CODE SNIP 31 | 32 | int mismatches = 0; 33 | for (int i = 0; i < count; i++) { 34 | if (buffer[i] != i) { 35 | mismatches++; 36 | } 37 | } 38 | if (mismatches) { 39 | std::cout << "Found " << mismatches 40 | << " mismatches out of " << count 41 | << " elements.\n"; 42 | } else { 43 | std::cout << "Success.\n"; 44 | } 45 | 46 | free(buffer, q); 47 | return 0; 48 | } 49 | -------------------------------------------------------------------------------- /samples/Ch21_migrating_cuda_code/fig_21_8_barriers.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | using namespace sycl; 8 | 9 | int main() { 10 | queue q{property::queue::in_order()}; 11 | std::cout << "Running on device: " 12 | << q.get_device().get_info() 13 | << "\n"; 14 | 15 | // BEGIN CODE SNIP 16 | q.parallel_for(nd_range<1>{16, 16}, [=](auto item) { 17 | // Equivalent of __syncthreads, or 18 | // this_thread_block().sync(): 19 | group_barrier(item.get_group()); 20 | 21 | // Equivalent of __syncwarp, or 22 | // tiled_partition<32>(this_thread_block()).sync(): 23 | group_barrier(item.get_sub_group()); 24 | }).wait(); 25 | // END CODE SNIP 26 | 27 | std::cout << "Success.\n"; 28 | return 0; 29 | } 30 | -------------------------------------------------------------------------------- /samples/Ch21_migrating_cuda_code/fig_21_9_atomics.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | using namespace sycl; 8 | 9 | constexpr int count = 1024 * 1024; 10 | 11 | int main() { 12 | queue q{property::queue::in_order()}; 13 | std::cout << "Running on device: " 14 | << q.get_device().get_info() 15 | << "\n"; 16 | 17 | int* buffer = malloc_device(1, q); 18 | q.fill(buffer, 0, 1); 19 | 20 | // BEGIN CODE SNIP 21 | q.parallel_for(count, [=](auto id) { 22 | // The SYCL atomic_ref must specify the default order 23 | // and default scope as part of the atomic_ref type. To 24 | // match the behavior of the CUDA atomicAdd we want a 25 | // relaxed atomic with device scope: 26 | atomic_ref 28 | aref(*buffer); 29 | 30 | // When no memory order is specified, the defaults are 31 | // used: 32 | aref.fetch_add(1); 33 | 34 | // We can also specify the memory order and scope as 35 | // part of the atomic operation: 36 | aref.fetch_add(1, memory_order::relaxed, 37 | memory_scope::device); 38 | }); 39 | // END CODE SNIP 40 | 41 | int test = -1; 42 | q.copy(buffer, &test, 1).wait(); 43 | 44 | if (test != 2 * count) { 45 | std::cout << "Found " << test << ", wanted " 46 | << 2 * count << ".\n"; 47 | } else { 48 | std::cout << "Success.\n"; 49 | } 50 | 51 | free(buffer, q); 52 | return 0; 53 | } 54 | -------------------------------------------------------------------------------- /samples/Epilogue_future_direction_of_sycl/fig_ep_1_mdspan.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | #include 7 | 8 | using namespace sycl; 9 | namespace stdex = std::experimental; 10 | 11 | int main() { 12 | // BEGIN CODE SNIP 13 | queue q; 14 | constexpr int N = 4; 15 | constexpr int M = 2; 16 | int* data = malloc_shared(N * M, q); 17 | 18 | stdex::mdspan view{data}; 19 | q.parallel_for(range<2>{N, M}, [=](id<2> idx) { 20 | int i = idx[0]; 21 | int j = idx[1]; 22 | view(i, j) = i * M + j; 23 | }).wait(); 24 | // END CODE SNIP 25 | 26 | bool passed = true; 27 | for (int i = 0; i < N; ++i) { 28 | for (int j = 0; j < M; ++j) { 29 | if (data[i * M + j] != i * M + j) { 30 | passed = false; 31 | } 32 | } 33 | } 34 | std::cout << ((passed) ? "SUCCESS" : "FAILURE") << "\n"; 35 | 36 | free(data, q); 37 | return (passed) ? 0 : 1; 38 | } 39 | -------------------------------------------------------------------------------- /samples/Epilogue_future_direction_of_sycl/fig_ep_2.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | // These ".hpp" files are text from the book that are 6 | // snippets that are not set up to be compiled as is. 7 | 8 | // Function expects one vector argument (x) and one scalar 9 | // argument (n) 10 | simd scale(simd x, float n) { 11 | return x * n; 12 | } 13 | 14 | q.parallel_for(..., sycl::nd_item<1> it) 15 | [[sycl::reqd_sub_group_size(8)]] { 16 | // In SPMD code, each work-item has its own x and n 17 | // variables 18 | float x = ...; 19 | float n = ...; 20 | 21 | // Invoke SIMD function (scale) using work-items in the 22 | // sub-group x values from each work-item are combined 23 | // into a simd 24 | // The value of n is defined to be the 25 | // same (uniform) across all work-items 26 | // Returned simd is unpacked 27 | sycl::sub_group sg = it.get_sub_group(); 28 | float y = invoke_simd(sg, scale, x, uniform(n)); 29 | }); 30 | -------------------------------------------------------------------------------- /samples/Epilogue_future_direction_of_sycl/fig_ep_3_device_constexpr.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Intel Corporation 2 | 3 | // SPDX-License-Identifier: MIT 4 | 5 | #include 6 | 7 | using namespace sycl; 8 | 9 | int main() { 10 | queue q; 11 | 12 | q.submit([&](handler& h) { 13 | stream out(9, 9, h); 14 | // BEGIN CODE SNIP 15 | h.parallel_for(range{1}, [=](id<1> idx) { 16 | if_device_has([&]() { 17 | /* Code specialized for CPUs */ 18 | out << "On a CPU!" << endl; 19 | }).else_if_device_has([&]() { 20 | /* Code specialized for GPUs */ 21 | out << "On a GPU!" << endl; 22 | }); 23 | }); 24 | // END CODE SNIP 25 | }).wait(); 26 | } 27 | --------------------------------------------------------------------------------