├── CMakeLists.txt
├── Contributing.md
├── LICENSE.txt
├── README.md
├── first_edition_errata.txt
├── images
    ├── cover_first_edition.jpg
    └── cover_second_edition.jpg
├── samples
    ├── CMakeLists.txt
    ├── Ch01_introduction
    │   ├── CMakeLists.txt
    │   ├── fig_1_1_hello.cpp
    │   ├── fig_1_2.hpp
    │   ├── fig_1_3_race.cpp
    │   ├── fig_1_4_lambda.cpp
    │   └── fig_1_6_functor.cpp
    ├── Ch02_where_code_runs
    │   ├── CMakeLists.txt
    │   ├── fig_2_10_gpu_selector.cpp
    │   ├── fig_2_12_multiple_selectors.cpp
    │   ├── fig_2_13_gpu_plus_fpga.cpp
    │   ├── fig_2_15_aspect_selector.cpp
    │   ├── fig_2_16_custom_selector.cpp
    │   ├── fig_2_18_simple_device_code.cpp
    │   ├── fig_2_19.hpp
    │   ├── fig_2_20.hpp
    │   ├── fig_2_22_simple_device_code_2.cpp
    │   ├── fig_2_23_host_task.cpp
    │   ├── fig_2_2_simple_program.cpp
    │   ├── fig_2_3.hpp
    │   ├── fig_2_4.hpp
    │   ├── fig_2_7_implicit_default_selector.cpp
    │   └── fig_2_9_cpu_selector.cpp
    ├── Ch03_data_management
    │   ├── CMakeLists.txt
    │   ├── fig_3_10_in_order.cpp
    │   ├── fig_3_11_depends_on.cpp
    │   ├── fig_3_13_read_after_write.cpp
    │   ├── fig_3_15_write_after_read_and_write_after_write.cpp
    │   ├── fig_3_17.hpp
    │   ├── fig_3_18.hpp
    │   ├── fig_3_4_usm_explicit_data_movement.cpp
    │   ├── fig_3_5_usm_implicit_data_movement.cpp
    │   └── fig_3_6_buffers_and_accessors.cpp
    ├── Ch04_expressing_parallelism
    │   ├── CMakeLists.txt
    │   ├── fig_4_1.hpp
    │   ├── fig_4_10.hpp
    │   ├── fig_4_11.hpp
    │   ├── fig_4_15_nd_range_matrix_multiply.cpp
    │   ├── fig_4_17.hpp
    │   ├── fig_4_18.hpp
    │   ├── fig_4_19.hpp
    │   ├── fig_4_2.hpp
    │   ├── fig_4_20.hpp
    │   ├── fig_4_21.hpp
    │   ├── fig_4_22.hpp
    │   ├── fig_4_5_vector_add.cpp
    │   ├── fig_4_6_matrix_add.cpp
    │   ├── fig_4_7_basic_matrix_multiply.cpp
    │   └── fig_4_9.hpp
    ├── Ch05_error_handling
    │   ├── CMakeLists.txt
    │   ├── fig_5_1_async_task_graph.cpp
    │   ├── fig_5_2_sync_error.cpp
    │   ├── fig_5_3_async_error.cpp
    │   ├── fig_5_4_unhandled_exception.cpp
    │   ├── fig_5_5_terminate.cpp
    │   ├── fig_5_6_catch_snip.cpp
    │   ├── fig_5_7_catch.cpp
    │   ├── fig_5_8_lambda_handler.cpp
    │   └── fig_5_9_default_handler_proxy.cpp
    ├── Ch06_unified_shared_memory
    │   ├── CMakeLists.txt
    │   ├── fig_6_2.hpp
    │   ├── fig_6_3.hpp
    │   ├── fig_6_4.hpp
    │   ├── fig_6_5_allocation_styles.cpp
    │   ├── fig_6_6_usm_explicit_data_movement.cpp
    │   ├── fig_6_7_usm_implicit_data_movement.cpp
    │   ├── fig_6_8_prefetch_memadvise.cpp
    │   └── fig_6_9_queries.cpp
    ├── Ch07_buffers
    │   ├── CMakeLists.txt
    │   ├── fig_7_1.hpp
    │   ├── fig_7_10_accessors.cpp
    │   ├── fig_7_2_3_4_creating_buffers.cpp
    │   ├── fig_7_5_buffer_properties.cpp
    │   └── fig_7_8_accessors_simple.cpp
    ├── Ch08_scheduling_kernels_and_data_movement
    │   ├── CMakeLists.txt
    │   ├── fig_8_3_linear_dependence_in_order.cpp
    │   ├── fig_8_4_linear_dependence_events.cpp
    │   ├── fig_8_5_linear_dependence_buffers.cpp
    │   ├── fig_8_6_y_in_order.cpp
    │   ├── fig_8_7_y_events.cpp
    │   └── fig_8_8_y_buffers.cpp
    ├── Ch09_communication_and_sychronization
    │   ├── CMakeLists.txt
    │   ├── fig_9_11_matmul_broadcast.cpp
    │   ├── fig_9_12_ndrange_sub_group_matmul.cpp
    │   ├── fig_9_4_naive_matmul.cpp
    │   ├── fig_9_7_local_accessors.cpp
    │   ├── fig_9_8_ndrange_tiled_matmul.cpp
    │   ├── fig_9_9_sub_group_barrier.cpp
    │   └── matmul_harness.cpp
    ├── Ch10_defining_kernels
    │   ├── CMakeLists.txt
    │   ├── fig_10_10_kernel_query.cpp
    │   ├── fig_10_2_kernel_lambda.cpp
    │   ├── fig_10_3_optional_kernel_lambda_elements.cpp
    │   ├── fig_10_4_named_kernel_lambda.cpp
    │   ├── fig_10_5_unnamed_kernel_lambda.cpp
    │   ├── fig_10_6_kernel_functor.cpp
    │   ├── fig_10_7_optional_kernel_functor_elements.cpp
    │   ├── fig_10_8_use_kernel_bundle.cpp
    │   └── fig_10_9_use_specific_kernel_bundle.cpp
    ├── Ch11_vectors_and_math_arrays
    │   ├── CMakeLists.txt
    │   ├── fig_11_10.hpp
    │   ├── fig_11_2_marray.cpp
    │   ├── fig_11_3.hpp
    │   ├── fig_11_4_load_store.cpp
    │   ├── fig_11_5.hpp
    │   ├── fig_11_6_swizzle_vec.cpp
    │   └── fig_11_7_vector_exec.cpp
    ├── Ch12_device_information_and_kernel_specialization
    │   ├── CMakeLists.txt
    │   ├── fig_12_10_specialize.cpp
    │   ├── fig_12_1_assigned_device.cpp
    │   ├── fig_12_2_try_catch.cpp
    │   ├── fig_12_4_device_selector.cpp
    │   ├── fig_12_5_curious.cpp
    │   ├── fig_12_7_very_curious.cpp
    │   ├── fig_12_8_invocation_parameters.cpp
    │   └── tst_12_4_device_selector.cpp
    ├── Ch13_practical_tips
    │   ├── CMakeLists.txt
    │   ├── fig_13_10_common_pattern_bug.cpp
    │   ├── fig_13_11_host_accessor.cpp
    │   ├── fig_13_12_host_accessor_for_init.cpp
    │   ├── fig_13_13_host_accessor_deadlock.cpp
    │   ├── fig_13_4_stream.cpp
    │   ├── fig_13_6_queue_profiling_timing.cpp
    │   └── fig_13_9_common_buffer_pattern.cpp
    ├── Ch14_common_parallel_patterns
    │   ├── CMakeLists.txt
    │   ├── fig_14_10.hpp
    │   ├── fig_14_11_array_reduction.cpp
    │   ├── fig_14_12_user_defined_reduction.cpp
    │   ├── fig_14_13_algorithm_comparison.cpp
    │   ├── fig_14_15_map.cpp
    │   ├── fig_14_16_stencil.cpp
    │   ├── fig_14_17_local_stencil.cpp
    │   ├── fig_14_18_basic_reduction.cpp
    │   ├── fig_14_19_nd_range_reduction.cpp
    │   ├── fig_14_20-22_inclusive_scan.cpp
    │   ├── fig_14_23.hpp
    │   ├── fig_14_24_local_pack.cpp
    │   ├── fig_14_25.hpp
    │   ├── fig_14_26_local_unpack.cpp
    │   ├── fig_14_8_one_reduction.cpp
    │   └── fig_14_9.hpp
    ├── Ch15_programming_for_gpus
    │   ├── CMakeLists.txt
    │   ├── fig_15_10_divergent_control_flow.cpp
    │   ├── fig_15_12_small_work_group_matrix_multiplication.cpp
    │   ├── fig_15_18_columns_matrix_multiplication.cpp
    │   ├── fig_15_3_single_task_matrix_multiplication.cpp
    │   ├── fig_15_5_somewhat_parallel_matrix_multiplication.cpp
    │   ├── fig_15_7_more_parallel_matrix_multiplication.cpp
    │   └── matrix_multiplication_harness.cpp
    ├── Ch16_programming_for_cpus
    │   ├── CMakeLists.txt
    │   ├── fig_16_10.hpp
    │   ├── fig_16_12_forward_dep.cpp
    │   ├── fig_16_15.hpp
    │   ├── fig_16_16.hpp
    │   ├── fig_16_17pre.hpp
    │   ├── fig_16_18_vector_swizzle.cpp
    │   ├── fig_16_2.hpp
    │   ├── fig_16_4.hpp
    │   ├── fig_16_5.hpp
    │   └── fig_16_6_stream_triad.cpp
    ├── Ch17_programming_for_fpgas
    │   ├── CMakeLists.txt
    │   ├── fig_17_11_fpga_emulator_selector.cpp
    │   ├── fig_17_17_ndrange_func.cpp
    │   ├── fig_17_18_loop_func.cpp
    │   ├── fig_17_20_loop_carried_deps.cpp
    │   ├── fig_17_22_loop_carried_state.cpp
    │   ├── fig_17_31_inter_kernel_pipe.cpp
    │   ├── fig_17_32.hpp
    │   ├── fig_17_33.hpp
    │   └── fig_17_9_fpga_selector.cpp
    ├── Ch18_libraries
    │   ├── CMakeLists.txt
    │   ├── fig_18_10_pstl_usm.cpp
    │   ├── fig_18_1_builtin.cpp
    │   ├── fig_18_2_swap.cpp
    │   ├── fig_18_5.hpp
    │   ├── fig_18_6_std_fill.cpp
    │   ├── fig_18_7_std_fill_default_policy.cpp
    │   ├── fig_18_8_binary_search.cpp
    │   └── fig_18_9_pstl_usm_device.cpp
    ├── Ch19_memory_model_and_atomics
    │   ├── CMakeLists.txt
    │   ├── fig_19_11.hpp
    │   ├── fig_19_12.hpp
    │   ├── fig_19_13.hpp
    │   ├── fig_19_14.hpp
    │   ├── fig_19_15_buffer_and_atomic_ref.cpp
    │   ├── fig_19_16_usm_and_atomic_ref.cpp
    │   ├── fig_19_17_histogram.cpp
    │   ├── fig_19_18-19_device_latch.cpp
    │   ├── fig_19_3_data_race.cpp
    │   ├── fig_19_6_avoid_data_race_with_barrier.cpp
    │   └── fig_19_7_avoid_data_race_with_atomics.cpp
    ├── Ch20_backend_interoperability
    │   ├── CMakeLists.txt
    │   ├── fig_20_10_level_zero_kernel_interop.cpp
    │   ├── fig_20_11_filter_selector.cpp
    │   ├── fig_20_2_querying_backends.cpp
    │   ├── fig_20_3_opencl_to_sycl.cpp
    │   ├── fig_20_4_level_zero_to_sycl.cpp
    │   ├── fig_20_5_sycl_to_opencl.cpp
    │   ├── fig_20_6_sycl_to_level_zero.cpp
    │   ├── fig_20_7_interop_handle_opencl.cpp
    │   ├── fig_20_8_interop_handle_level_zero.cpp
    │   └── fig_20_9_opencl_kernel_interop.cpp
    ├── Ch21_migrating_cuda_code
    │   ├── CMakeLists.txt
    │   ├── fig_21_10_reverse.cu
    │   ├── fig_21_13-14_reverse_migrated.cpp
    │   ├── fig_21_1_basicsycl.cpp
    │   ├── fig_21_2_basiccuda.cu
    │   ├── fig_21_4-6_walkorder.cu
    │   ├── fig_21_5_walkorder.cpp
    │   ├── fig_21_7_possible_deadlock.cpp
    │   ├── fig_21_8_barriers.cpp
    │   └── fig_21_9_atomics.cpp
    └── Epilogue_future_direction_of_sycl
    │   ├── fig_ep_1_mdspan.cpp
    │   ├── fig_ep_2.hpp
    │   └── fig_ep_3_device_constexpr.cpp
└── second_edition_errata.txt


/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2023 Intel Corporation
 2 | 
 3 | # SPDX-License-Identifier: MIT
 4 | 
 5 | cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
 6 | 
 7 | # Try to detect the right SYCL compiler if one is not explicitly specified:
 8 | if (NOT CMAKE_CXX_COMPILER)
 9 |     if (WIN32)
10 |         set(CMAKE_CXX_COMPILER icx)
11 |     else()
12 |         find_program(HAS_ICPX "icpx" NO_CACHE)
13 |         if (HAS_ICPX)
14 |             set(CMAKE_CXX_COMPILER icpx)
15 |         else()
16 |             set(CMAKE_CXX_COMPILER clang++)
17 |         endif()
18 |     endif()
19 | endif()
20 | 
21 | set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
22 | set(CMAKE_CXX_STANDARD 17)
23 | set(CMAKE_CXX_STANDARD_REQUIRED ON)
24 | 
25 | if (NOT CMAKE_BUILD_TYPE)
26 |     message(STATUS "No build type selected, default to Release")
27 |     set(CMAKE_BUILD_TYPE "Release" CACHE PATH "Build Type" FORCE)
28 | endif()
29 | 
30 | set_property(GLOBAL PROPERTY USE_FOLDERS ON)
31 | 
32 | project(DPCPPSamples)
33 | option(NODPL    "Disable samples that require the oneAPI DPC++ Library (oneDPL).")
34 | option(NODPCT   "Disable samples that require the DPC++ Compatibility Tool (dpct).")
35 | option(NOL0     "Disable samples that require the oneAPI Level Zero Headers and Loader." ON)
36 | option(WITHCUDA "Enable CUDA device support for the samples.")
37 | option(WITHROCM "Enable ROCm device support for the samples.")
38 | 
39 | if (WITHCUDA AND WITHROCM)
40 |     message(FATAL_ERROR "WITHCUDA and WITHROCM cannot be enabled at the same time.\n" 
41 |     "Clean up the directory and try again with only one of them enabled.")
42 | endif()
43 | 
44 | set(CUDA_GPU_ARCH "sm_60" CACHE STRING "CUDA GPUs to compile for.")
45 | if (WITHCUDA)
46 |     mark_as_advanced(CLEAR FORCE CUDA_GPU_ARCH)
47 | else()
48 |     mark_as_advanced(FORCE CUDA_GPU_ARCH)
49 | endif()
50 | 
51 | set(ROCM_GPU_ARCH "gfx1100" CACHE STRING "ROCm GPUs to compile for.")
52 | if (WITHROCM)
53 |     mark_as_advanced(CLEAR FORCE ROCM_GPU_ARCH)
54 | else()
55 |     mark_as_advanced(FORCE ROCM_GPU_ARCH)
56 | endif()
57 | 
58 | enable_testing()
59 | 
60 | add_subdirectory(samples)
61 | 
62 | if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
63 |     set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_SOURCE_DIR}/install" CACHE PATH "Install Path" FORCE)
64 | endif()
65 | 


--------------------------------------------------------------------------------
/Contributing.md:
--------------------------------------------------------------------------------
 1 | # Contributing to Apress Source Code
 2 | 
 3 | Copyright for Apress source code belongs to the author(s). However, under fair use you are encouraged to fork and contribute minor corrections and updates for the benefit of the author(s) and other readers.
 4 | 
 5 | ## How to Contribute
 6 | 
 7 | 1. Make sure you have a GitHub account.
 8 | 2. Fork the repository for the relevant book.
 9 | 3. Create a new branch on which to make your change, e.g. 
10 | `git checkout -b my_code_contribution`
11 | 4. Keep formating: clang-format -i --style="{BasedOnStyle: Google, ColumnLimit: 60}" <file>.cpp
12 |    [we used find . -name "*cpp" -exec clang-format -i --style="{BasedOnStyle: Google, ColumnLimit: 60}" {} \; originally]
13 | 5. Commit your change. Include a commit message describing the correction. Please note that if your commit message is not clear, the correction will not be accepted.
14 | 6. Submit a pull request.
15 | 
16 | Thank you for your contribution!


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (C) 2020 Intel Corporation
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"),
 7 | to deal in the Software without restriction, including without limitation
 8 | the rights to use, copy, modify, merge, publish, distribute, sublicense,
 9 | and/or sell copies of the Software, and to permit persons to whom
10 | the Software is furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included
13 | in all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
19 | OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
21 | OR OTHER DEALINGS IN THE SOFTWARE.
22 | 
23 | SPDX-License-Identifier: MIT
24 | 


--------------------------------------------------------------------------------
/images/cover_first_edition.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Apress/data-parallel-CPP/6d3dce0df50446645f332df217219d143d48ed03/images/cover_first_edition.jpg


--------------------------------------------------------------------------------
/images/cover_second_edition.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Apress/data-parallel-CPP/6d3dce0df50446645f332df217219d143d48ed03/images/cover_second_edition.jpg


--------------------------------------------------------------------------------
/samples/Ch01_introduction/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2023 Intel Corporation
 2 | 
 3 | # SPDX-License-Identifier: MIT
 4 | 
 5 | add_book_sample(
 6 |     TEST
 7 |     TARGET fig_1_1_hello
 8 |     SOURCES fig_1_1_hello.cpp)
 9 | 
10 | add_book_sample(
11 |     TEST
12 |     TARGET fig_1_3_race
13 |     SOURCES fig_1_3_race.cpp)
14 | 
15 | add_book_sample(
16 |     TEST
17 |     TARGET fig_1_4_lambda
18 |     SOURCES fig_1_4_lambda.cpp)
19 | 
20 | add_book_sample(
21 |     TEST
22 |     TARGET fig_1_6_functor
23 |     SOURCES fig_1_6_functor.cpp)
24 | 


--------------------------------------------------------------------------------
/samples/Ch01_introduction/fig_1_1_hello.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <iostream>
 6 | #include <sycl/sycl.hpp>
 7 | using namespace sycl;
 8 | 
 9 | const std::string secret{
10 |     "Ifmmp-!xpsme\"\012J(n!tpssz-!Ebwf/!"
11 |     "J(n!bgsbje!J!dbo(u!ep!uibu/!.!IBM\01"};
12 | 
13 | const auto sz = secret.size();
14 | 
15 | int main() {
16 |   queue q;
17 | 
18 |   char* result = malloc_shared<char>(sz, q);
19 |   std::memcpy(result, secret.data(), sz);
20 | 
21 |   q.parallel_for(sz, [=](auto& i) {
22 |      result[i] -= 1;
23 |    }).wait();
24 | 
25 |   std::cout << result << "\n";
26 |   free(result, q);
27 |   return 0;
28 | }
29 | 


--------------------------------------------------------------------------------
/samples/Ch01_introduction/fig_1_2.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are snippets
 6 | // that are not set up to be compiled as is.
 7 | 
 8 | ! Fortran loop
 9 | do i = 1, n
10 |   z(i) = alpha * x(i) + y(i)
11 | end do
12 | 
13 | // C++ loop
14 | for (int i=0;i<n;i++) {
15 |   z[i] = alpha * x[i] + y[i];
16 | }
17 | 
18 | // SYCL kernel
19 | q.parallel_for(range{n},[=](id<1> i) {
20 |   z[i] = alpha * x[i] + y[i];
21 | }).wait();
22 | 


--------------------------------------------------------------------------------
/samples/Ch01_introduction/fig_1_3_race.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <iostream>
 6 | #include <sycl/sycl.hpp>
 7 | using namespace sycl;
 8 | 
 9 | const std::string secret{
10 |     "Ifmmp-!xpsme\"\012J(n!tpssz-!Ebwf/!"
11 |     "J(n!bgsbje!J!dbo(u!ep!uibu/!.!IBM\01"};
12 | 
13 | const auto sz = secret.size();
14 | 
15 | int main() {
16 |   queue q;
17 | 
18 |   // BEGIN CODE SNIP
19 |   // ...we are changing one line from Figure 1-1
20 |   char* result = malloc_shared<char>(sz, q);
21 | 
22 |   // Introduce potential data race!  We don't define a
23 |   // dependence to ensure correct ordering with later
24 |   // operations.
25 |   q.memcpy(result, secret.data(), sz);
26 | 
27 |   q.parallel_for(sz, [=](auto& i) {
28 |      result[i] -= 1;
29 |    }).wait();
30 | 
31 |   // ...
32 |   // END CODE SNIP
33 |   std::cout << result << "\n";
34 |   free(result, q);
35 |   return 0;
36 | }
37 | 


--------------------------------------------------------------------------------
/samples/Ch01_introduction/fig_1_4_lambda.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <iostream>
 6 | 
 7 | void print_values(const int& i, const int& j, const int& k,
 8 |                   const int& l) {
 9 |   std::cout << "i == " << i << "\n";
10 |   std::cout << "j == " << j << "\n";
11 |   std::cout << "k == " << k << "\n";
12 |   std::cout << "l == " << l << "\n";
13 | }
14 | 
15 | int main() {
16 |   // BEGIN CODE SNIP
17 |   int i = 1, j = 10, k = 100, l = 1000;
18 | 
19 |   auto lambda = [i, &j](int k0, int& l0) -> int {
20 |     j = 2 * j;
21 |     k0 = 2 * k0;
22 |     l0 = 2 * l0;
23 |     return i + j + k0 + l0;
24 |   };
25 | 
26 |   print_values(i, j, k, l);
27 |   std::cout << "First call returned " << lambda(k, l)
28 |             << "\n";
29 |   print_values(i, j, k, l);
30 |   std::cout << "Second call returned " << lambda(k, l)
31 |             << "\n";
32 |   print_values(i, j, k, l);
33 |   // END CODE SNIP
34 | 
35 |   return 0;
36 | }
37 | 


--------------------------------------------------------------------------------
/samples/Ch01_introduction/fig_1_6_functor.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <iostream>
 6 | 
 7 | // BEGIN CODE SNIP
 8 | class Functor {
 9 |  public:
10 |   Functor(int i, int &j) : my_i{i}, my_jRef{j} {}
11 | 
12 |   int operator()(int k0, int &l0) {
13 |     my_jRef = 2 * my_jRef;
14 |     k0 = 2 * k0;
15 |     l0 = 2 * l0;
16 |     return my_i + my_jRef + k0 + l0;
17 |   }
18 | 
19 |  private:
20 |   int my_i;
21 |   int &my_jRef;
22 | };
23 | // END CODE SNIP
24 | 
25 | int main() {
26 |   int i = 1, j = 10, k = 100, l = 1000;
27 | 
28 |   Functor F{i, j};
29 | 
30 |   std::cout << "First call returned " << F(k, l) << "\n";
31 |   std::cout << "Second call returned " << F(k, l) << "\n";
32 | 
33 |   return 0;
34 | }
35 | 


--------------------------------------------------------------------------------
/samples/Ch02_where_code_runs/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2023 Intel Corporation
 2 | 
 3 | # SPDX-License-Identifier: MIT
 4 | 
 5 | add_book_sample(
 6 |     TEST
 7 |     TARGET fig_2_2_simple_program
 8 |     SOURCES fig_2_2_simple_program.cpp)
 9 | 
10 | add_book_sample(
11 |     TEST
12 |     TARGET fig_2_7_implicit_default_selector
13 |     SOURCES fig_2_7_implicit_default_selector.cpp)
14 | 
15 | add_book_sample(
16 |     TEST
17 |     TARGET fig_2_9_cpu_selector
18 |     SOURCES fig_2_9_cpu_selector.cpp)
19 | 
20 | add_book_sample(
21 |     TEST
22 |     TARGET fig_2_10_gpu_selector
23 |     SOURCES fig_2_10_gpu_selector.cpp)
24 | 
25 | add_book_sample(
26 |     TARGET fig_2_12_multiple_selectors
27 |     SOURCES fig_2_12_multiple_selectors.cpp)
28 | 
29 | add_book_sample(
30 |     TARGET fig_2_13_gpu_plus_fpga
31 |     SOURCES fig_2_13_gpu_plus_fpga.cpp)
32 | 
33 | add_book_sample(
34 |     TARGET fig_2_15_aspect_selector
35 |     SOURCES fig_2_15_aspect_selector.cpp)
36 | 
37 | add_book_sample(
38 |     TARGET fig_2_16_custom_selector
39 |     SOURCES fig_2_16_custom_selector.cpp)
40 | 
41 | add_book_sample(
42 |     TEST
43 |     TARGET fig_2_18_simple_device_code
44 |     SOURCES fig_2_18_simple_device_code.cpp)
45 | 
46 | add_book_sample(
47 |     TEST
48 |     TARGET fig_2_22_simple_device_code_2
49 |     SOURCES fig_2_22_simple_device_code_2.cpp)
50 | 
51 | add_book_sample(
52 |     TEST
53 |     TARGET fig_2_23_host_task
54 |     SOURCES fig_2_23_host_task.cpp)
55 | 


--------------------------------------------------------------------------------
/samples/Ch02_where_code_runs/fig_2_10_gpu_selector.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <iostream>
 6 | #include <sycl/sycl.hpp>
 7 | using namespace sycl;
 8 | 
 9 | int main() {
10 |   // Create queue bound to an available GPU device
11 |   queue q{gpu_selector_v};
12 | 
13 |   std::cout << "Selected device: "
14 |             << q.get_device().get_info<info::device::name>()
15 |             << "\n";
16 |   std::cout
17 |       << " -> Device vendor: "
18 |       << q.get_device().get_info<info::device::vendor>()
19 |       << "\n";
20 | 
21 |   return 0;
22 | }
23 | 
24 | // Example Output:
25 | // Selected device: AMD Radeon RX 5700 XT
26 | // -> Device vendor: AMD Corporation
27 | 


--------------------------------------------------------------------------------
/samples/Ch02_where_code_runs/fig_2_13_gpu_plus_fpga.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <iostream>
 6 | #include <sycl/ext/intel/fpga_extensions.hpp>  // For fpga_selector_v
 7 | #include <sycl/sycl.hpp>
 8 | using namespace sycl;
 9 | 
10 | int main() {
11 |   queue my_gpu_queue(gpu_selector_v);
12 |   queue my_fpga_queue(ext::intel::fpga_selector_v);
13 | 
14 |   std::cout << "Selected device 1: "
15 |             << my_gpu_queue.get_device()
16 |                    .get_info<info::device::name>()
17 |             << "\n";
18 | 
19 |   std::cout << "Selected device 2: "
20 |             << my_fpga_queue.get_device()
21 |                    .get_info<info::device::name>()
22 |             << "\n";
23 | 
24 |   return 0;
25 | }
26 | 
27 | // Example Output:
28 | // Selected device 1: Intel(R) UHD Graphics [0x9a60]
29 | //   Selected device 2: pac_a10 : Intel PAC Platform (pac_ee00000)
30 | 


--------------------------------------------------------------------------------
/samples/Ch02_where_code_runs/fig_2_15_aspect_selector.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <iostream>
 6 | #include <sycl/sycl.hpp>
 7 | using namespace sycl;
 8 | 
 9 | int main() {
10 |   // In the aspect_selector form taking a comma seperated
11 |   // group of aspects, all aspects must be present for a
12 |   // device to be selected.
13 |   queue q1{aspect_selector(aspect::fp16, aspect::gpu)};
14 | 
15 |   // In the aspect_selector form that takes two vectors, the
16 |   // first vector contains aspects that a device must
17 |   // exhibit, and the second contains aspects that must NOT
18 |   // be exhibited.
19 |   queue q2{aspect_selector(
20 |       std::vector{aspect::fp64, aspect::fp16},
21 |       std::vector{aspect::gpu, aspect::accelerator})};
22 | 
23 |   std::cout
24 |       << "First selected device is: "
25 |       << q1.get_device().get_info<info::device::name>()
26 |       << "\n";
27 | 
28 |   std::cout
29 |       << "Second selected device is: "
30 |       << q2.get_device().get_info<info::device::name>()
31 |       << "\n";
32 | 
33 |   return 0;
34 | }
35 | 
36 | // Example Output:
37 | // First selected device is: Intel(R) UHD Graphics [0x9a60]
38 | //   Second selected device is: 11th Gen Intel(R) Core(TM) i9-11900KB @ 3.30GHz
39 | 


--------------------------------------------------------------------------------
/samples/Ch02_where_code_runs/fig_2_16_custom_selector.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <iostream>
 6 | #include <sycl/sycl.hpp>
 7 | using namespace sycl;
 8 | 
 9 | // BEGIN CODE SNIP
10 | int my_selector(const device &dev) {
11 |   if (dev.get_info<info::device::name>().find("pac_a10") !=
12 |           std::string::npos &&
13 |       dev.get_info<info::device::vendor>().find("Intel") !=
14 |           std::string::npos) {
15 |     return 1;
16 |   }
17 |   return -1;
18 | }
19 | // END CODE SNIP
20 | 
21 | int main() {
22 |   queue q(my_selector);
23 | 
24 |   std::cout << "Selected device is: "
25 |             << q.get_device().get_info<info::device::name>()
26 |             << "\n";
27 | 
28 |   return 0;
29 | }
30 | 
31 | // Example Output:
32 | // Selected device is: pac_a10 : Intel PAC Platform (pac_ee00000)
33 | 


--------------------------------------------------------------------------------
/samples/Ch02_where_code_runs/fig_2_18_simple_device_code.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <array>
 6 | #include <iostream>
 7 | #include <sycl/sycl.hpp>
 8 | using namespace sycl;
 9 | 
10 | int main() {
11 |   constexpr int size = 16;
12 |   std::array<int, size> data;
13 |   buffer B{data};
14 | 
15 |   queue q{};  // Select any device for this queue
16 | 
17 |   std::cout << "Selected device is: "
18 |             << q.get_device().get_info<info::device::name>()
19 |             << "\n";
20 | 
21 |   // BEGIN CODE SNIP
22 | 
23 |   q.submit([&](handler& h) {
24 |     accessor acc{B, h};
25 | 
26 |     h.parallel_for(size,
27 |                    [=](auto& idx) { acc[idx] = idx; });
28 |   });
29 | 
30 |   // END CODE SNIP
31 | 
32 |   return 0;
33 | }
34 | 


--------------------------------------------------------------------------------
/samples/Ch02_where_code_runs/fig_2_20.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | class queue {
 9 |  public:
10 |   // Submit a memset operation writing to the specified
11 |   // pointer. Return an event representing this operation.
12 |   event memset(void* ptr, int value, size_t count);
13 | 
14 |   // Submit a memcpy operation copying from src to dest.
15 |   // Return an event representing this operation.
16 |   event memcpy(void* dest, const void* src, size_t count);
17 | 
18 |   // Submit different forms of kernel for execution.
19 |   // Return an event representing the kernel operation.
20 |   template <typename KernelName, typename KernelType>
21 |   event single_task(KernelType kernel);
22 | 
23 |   template <typename KernelName, typename KernelType,
24 |             int Dims>
25 |   event parallel_for(range<Dims> num_work_items,
26 |                      KernelType kernel);
27 | 
28 |   template <typename KernelName, typename KernelType,
29 |             int Dims>
30 |   event parallel_for(nd_range<Dims> execution_range,
31 |                      KernelType kernel);
32 | 
33 |   // Submit different forms of kernel for execution.
34 |   // Wait for the specified event(s) to complete
35 |   // before executing the kernel.
36 |   // Return an event representing the kernel operation.
37 |   template <typename KernelName, typename KernelType>
38 |   event single_task(const std::vector<event>& events,
39 |                     KernelType kernel);
40 | 
41 |   template <typename KernelName, typename KernelType,
42 |             int Dims>
43 |   event parallel_for(range<Dims> num_work_items,
44 |                      const std::vector<event>& events,
45 |                      KernelType kernel);
46 | 
47 |   template <typename KernelName, typename KernelType,
48 |             int Dims>
49 |   event parallel_for(nd_range<Dims> execution_range,
50 |                      const std::vector<event>& events,
51 |                      KernelType kernel);
52 | };
53 | 


--------------------------------------------------------------------------------
/samples/Ch02_where_code_runs/fig_2_22_simple_device_code_2.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <array>
 6 | #include <iostream>
 7 | #include <sycl/sycl.hpp>
 8 | using namespace sycl;
 9 | 
10 | int main() {
11 |   constexpr int size = 16;
12 |   std::array<int, size> data;
13 |   buffer B{data};
14 | 
15 |   queue q{};  // Select any device for this queue
16 | 
17 |   std::cout << "Selected device is: "
18 |             << q.get_device().get_info<info::device::name>()
19 |             << "\n";
20 | 
21 |   q.submit([&](handler& h) {
22 |     accessor acc{B, h};
23 |     h.parallel_for(size,
24 |                    [=](auto& idx) { acc[idx] = idx; });
25 |   });
26 | 
27 |   return 0;
28 | }
29 | 


--------------------------------------------------------------------------------
/samples/Ch02_where_code_runs/fig_2_23_host_task.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <array>
 6 | #include <iostream>
 7 | #include <sycl/sycl.hpp>
 8 | using namespace sycl;
 9 | constexpr int N = 4;
10 | 
11 | int main() {
12 |   queue q;
13 |   int* A = malloc_shared<int>(N, q);
14 | 
15 |   std::cout << "Selected device: "
16 |             << q.get_device().get_info<info::device::name>()
17 |             << "\n";
18 | 
19 |   // Initialize values in the shared allocation
20 |   auto eA = q.submit([&](handler& h) {
21 |     h.parallel_for(N, [=](auto& idx) { A[idx] = idx; });
22 |   });
23 | 
24 |   // Use a host task to output values on the host as part of
25 |   // task graph.  depends_on is used to define a dependence
26 |   // on previous device code having completed. Here the host
27 |   // task is defined as a lambda expression.
28 |   q.submit([&](handler& h) {
29 |     h.depends_on(eA);
30 |     h.host_task([=]() {
31 |       for (int i = 0; i < N; i++)
32 |         std::cout << "host_task @ " << i << " = " << A[i]
33 |                   << "\n";
34 |     });
35 |   });
36 | 
37 |   // Wait for work to be completed in the queue before
38 |   // accessing the shared data in the host program.
39 |   q.wait();
40 | 
41 |   for (int i = 0; i < N; i++)
42 |     std::cout << "main @ " << i << " = " << A[i] << "\n";
43 | 
44 |   free(A, q);
45 | 
46 |   return 0;
47 | }
48 | 
49 | // Example Output:
50 | // Selected device: NVIDIA GeForce RTX 3060
51 | // host_task @ 0 = 0
52 | // host_task @ 1 = 1
53 | // host_task @ 2 = 2
54 | // host_task @ 3 = 3
55 | // main @ 0 = 0
56 | // main @ 1 = 1
57 | // main @ 2 = 2
58 | // main @ 3 = 3
59 | 						    
60 | 


--------------------------------------------------------------------------------
/samples/Ch02_where_code_runs/fig_2_2_simple_program.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // BEGIN CODE SNIP
 6 | #include <array>
 7 | #include <iostream>
 8 | #include <sycl/sycl.hpp>
 9 | using namespace sycl;
10 | 
11 | int main() {
12 |   constexpr int size = 16;
13 |   std::array<int, size> data;
14 | 
15 |   // Create queue on implementation-chosen default device
16 |   queue q;
17 | 
18 |   // Create buffer using host allocated "data" array
19 |   buffer B{data};
20 | 
21 |   q.submit([&](handler& h) {
22 |     accessor A{B, h};
23 |     h.parallel_for(size, [=](auto& idx) { A[idx] = idx; });
24 |   });
25 | 
26 |   // Obtain access to buffer on the host
27 |   // Will wait for device kernel to execute to generate data
28 |   host_accessor A{B};
29 |   for (int i = 0; i < size; i++)
30 |     std::cout << "data[" << i << "] = " << A[i] << "\n";
31 | 
32 |   return 0;
33 | }
34 | // END CODE SNIP
35 | 


--------------------------------------------------------------------------------
/samples/Ch02_where_code_runs/fig_2_3.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | // BEGIN CODE SNIP
 9 | class queue {
10 |  public:
11 |   // Create a queue associated with a default
12 |   // (implementation chosen) device.
13 |   queue(const property_list & = {});
14 | 
15 |   queue(const async_handler &, const property_list & = {});
16 | 
17 |   // Create a queue using a DeviceSelector.
18 |   // A DeviceSelector is a callable that ranks
19 |   // devices numerically. There are a few SYCL-defined
20 |   // device selectors available such as 
21 |   // cpu_selector_v and gpu_selector_v.
22 |   template <typename DeviceSelector>
23 |   explicit queue(const DeviceSelector &deviceSelector,
24 |                  const property_list &propList = {});
25 | 
26 |   // Create a queue associated with an explicit device to
27 |   // which the program already holds a reference.
28 |   queue(const device &, const property_list & = {});
29 | 
30 |   // Create a queue associated with a device in a specific
31 |   // SYCL context. A device selector may be used in place
32 |   // of a device.
33 |   queue(const context &, const device &,
34 |         const property_list & = {});
35 | };
36 | // END CODE SNIP
37 | 


--------------------------------------------------------------------------------
/samples/Ch02_where_code_runs/fig_2_4.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | // BEGIN CODE SNIP
 9 | class queue {
10 |  public:
11 |   // Submit a command group to this queue.
12 |   // The command group may be a lambda expression or
13 |   // function object. Returns an event reflecting the status
14 |   // of the action performed in the command group.
15 |   template <typename T>
16 |   event submit(T);
17 | 
18 |   // Wait for all previously submitted actions to finish
19 |   // executing.
20 |   void wait();
21 | 
22 |   // Wait for all previously submitted actions to finish
23 |   // executing. Pass asynchronous exceptions to an
24 |   // async_handler function.
25 |   void wait_and_throw();
26 | };
27 | // END CODE SNIP
28 | 


--------------------------------------------------------------------------------
/samples/Ch02_where_code_runs/fig_2_7_implicit_default_selector.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <iostream>
 6 | #include <sycl/sycl.hpp>
 7 | using namespace sycl;
 8 | 
 9 | int main() {
10 |   // Create queue on whatever default device that the
11 |   // implementation chooses. Implicit use of
12 |   // default_selector_v
13 |   queue q;
14 | 
15 |   std::cout << "Selected device: "
16 |             << q.get_device().get_info<info::device::name>()
17 |             << "\n";
18 | 
19 |   return 0;
20 | }
21 | 
22 | // Sample Outputs (one line per run depending on system):
23 | // Selected device: NVIDIA GeForce RTX 3060
24 | // Selected device: AMD Radeon RX 5700 XT
25 | // Selected device: Intel(R) Data Center GPU Max 1100
26 | // Selected device: Intel(R) FPGA Emulation Device
27 | // Selected device: AMD Ryzen 5 3600 6-Core Processor
28 | // Selected device: Intel(R) UHD Graphics 770
29 | // Selected device: Intel(R) Xeon(R) Gold 6128 CPU @ 3.40GHz
30 | // Selected device: 11th Gen Intel(R) Core(TM) i9-11900KB @ 3.30GHz
31 | // many more possible… these are only examples
32 | 


--------------------------------------------------------------------------------
/samples/Ch02_where_code_runs/fig_2_9_cpu_selector.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <iostream>
 6 | #include <sycl/sycl.hpp>
 7 | using namespace sycl;
 8 | 
 9 | int main() {
10 |   // Create queue to use the CPU device explicitly
11 |   queue q{cpu_selector_v};
12 | 
13 |   std::cout << "Selected device: "
14 |             << q.get_device().get_info<info::device::name>()
15 |             << "\n";
16 |   std::cout
17 |       << " -> Device vendor: "
18 |       << q.get_device().get_info<info::device::vendor>()
19 |       << "\n";
20 | 
21 |   return 0;
22 | }
23 | 
24 | // Example Output:
25 | // Selected device: Intel(R) Xeon(R) Gold 6128 CPU @ 3.40GHz
26 | //   -> Device vendor: Intel(R) Corporation
27 |   
28 | 


--------------------------------------------------------------------------------
/samples/Ch03_data_management/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2023 Intel Corporation
 2 | 
 3 | # SPDX-License-Identifier: MIT
 4 | 
 5 | add_book_sample(
 6 |     TEST
 7 |     TARGET fig_3_4_usm_explicit_data_movement
 8 |     SOURCES fig_3_4_usm_explicit_data_movement.cpp)
 9 | 
10 | add_book_sample(
11 |     TEST
12 |     TARGET fig_3_5_usm_implicit_data_movement
13 |     SOURCES fig_3_5_usm_implicit_data_movement.cpp)
14 | 
15 | add_book_sample(
16 |     TEST
17 |     TARGET fig_3_6_buffers_and_accessors
18 |     SOURCES fig_3_6_buffers_and_accessors.cpp)
19 | 
20 | add_book_sample(
21 |     TEST
22 |     TARGET fig_3_10_in_order
23 |     SOURCES fig_3_10_in_order.cpp)
24 | 
25 | add_book_sample(
26 |     TEST
27 |     TARGET fig_3_11_depends_on
28 |     SOURCES fig_3_11_depends_on.cpp)
29 | 
30 | add_book_sample(
31 |     TEST
32 |     TARGET fig_3_13_read_after_write
33 |     SOURCES fig_3_13_read_after_write.cpp
34 |     ADDITIONAL_COMPILE_OPTIONS -Wno-unused-variable)
35 | 
36 | add_book_sample(
37 |     TEST
38 |     TARGET fig_3_15_write_after_read_and_write_after_write
39 |     SOURCES fig_3_15_write_after_read_and_write_after_write.cpp)
40 | 


--------------------------------------------------------------------------------
/samples/Ch03_data_management/fig_3_10_in_order.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <sycl/sycl.hpp>
 6 | using namespace sycl;
 7 | constexpr int N = 4;
 8 | 
 9 | int main() {
10 |   queue q{property::queue::in_order()};
11 | 
12 |   q.submit([&](handler& h) {
13 |     h.parallel_for(N, [=](id<1> i) { /*...*/ });  // Task A
14 |   });
15 |   q.submit([&](handler& h) {
16 |     h.parallel_for(N, [=](id<1> i) { /*...*/ });  // Task B
17 |   });
18 |   q.submit([&](handler& h) {
19 |     h.parallel_for(N, [=](id<1> i) { /*...*/ });  // Task C
20 |   });
21 | 
22 |   return 0;
23 | }
24 | 


--------------------------------------------------------------------------------
/samples/Ch03_data_management/fig_3_11_depends_on.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <sycl/sycl.hpp>
 6 | using namespace sycl;
 7 | constexpr int N = 4;
 8 | 
 9 | int main() {
10 |   queue q;
11 | 
12 |   auto eA = q.submit([&](handler &h) {
13 |     h.parallel_for(N, [=](id<1> i) { /*...*/ });  // Task A
14 |   });
15 |   eA.wait();
16 |   auto eB = q.submit([&](handler &h) {
17 |     h.parallel_for(N, [=](id<1> i) { /*...*/ });  // Task B
18 |   });
19 |   auto eC = q.submit([&](handler &h) {
20 |     h.depends_on(eB);
21 |     h.parallel_for(N, [=](id<1> i) { /*...*/ });  // Task C
22 |   });
23 |   auto eD = q.submit([&](handler &h) {
24 |     h.depends_on({eB, eC});
25 |     h.parallel_for(N, [=](id<1> i) { /*...*/ });  // Task D
26 |   });
27 | 
28 |   return 0;
29 | }
30 | 


--------------------------------------------------------------------------------
/samples/Ch03_data_management/fig_3_13_read_after_write.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <array>
 6 | #include <sycl/sycl.hpp>
 7 | using namespace sycl;
 8 | constexpr int N = 42;
 9 | 
10 | int main() {
11 |   std::array<int, N> a, b, c;
12 |   for (int i = 0; i < N; i++) {
13 |     a[i] = b[i] = c[i] = 0;
14 |   }
15 | 
16 |   queue q;
17 | 
18 |   // We will learn how to simplify this example later
19 |   buffer a_buf{a};
20 |   buffer b_buf{b};
21 |   buffer c_buf{c};
22 | 
23 |   q.submit([&](handler &h) {
24 |     accessor a(a_buf, h, read_only);
25 |     accessor b(b_buf, h, write_only);
26 |     h.parallel_for(  // computeB
27 |         N, [=](id<1> i) { b[i] = a[i] + 1; });
28 |   });
29 | 
30 |   q.submit([&](handler &h) {
31 |     accessor a(a_buf, h, read_only);
32 |     h.parallel_for(  // readA
33 |         N, [=](id<1> i) {
34 |           // Useful only as an example
35 |           int data = a[i];
36 |         });
37 |   });
38 | 
39 |   q.submit([&](handler &h) {
40 |     // RAW of buffer B
41 |     accessor b(b_buf, h, read_only);
42 |     accessor c(c_buf, h, write_only);
43 |     h.parallel_for(  // computeC
44 |         N, [=](id<1> i) { c[i] = b[i] + 2; });
45 |   });
46 | 
47 |   // read C on host
48 |   host_accessor host_acc_c(c_buf, read_only);
49 |   for (int i = 0; i < N; i++) {
50 |     std::cout << host_acc_c[i] << " ";
51 |   }
52 |   std::cout << "\n";
53 |   return 0;
54 | }
55 | 


--------------------------------------------------------------------------------
/samples/Ch03_data_management/fig_3_15_write_after_read_and_write_after_write.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <array>
 6 | #include <sycl/sycl.hpp>
 7 | using namespace sycl;
 8 | constexpr int N = 42;
 9 | 
10 | int main() {
11 |   std::array<int, N> a, b;
12 |   for (int i = 0; i < N; i++) {
13 |     a[i] = b[i] = 0;
14 |   }
15 | 
16 |   queue q;
17 |   buffer a_buf{a};
18 |   buffer b_buf{b};
19 | 
20 |   q.submit([&](handler &h) {
21 |     accessor a(a_buf, h, read_only);
22 |     accessor b(b_buf, h, write_only);
23 |     h.parallel_for(  // computeB
24 |         N, [=](id<1> i) { b[i] = a[i] + 1; });
25 |   });
26 | 
27 |   q.submit([&](handler &h) {
28 |     // WAR of buffer A
29 |     accessor a(a_buf, h, write_only);
30 |     h.parallel_for(  // rewriteA
31 |         N, [=](id<1> i) { a[i] = 21 + 21; });
32 |   });
33 | 
34 |   q.submit([&](handler &h) {
35 |     // WAW of buffer B
36 |     accessor b(b_buf, h, write_only);
37 |     h.parallel_for(  // rewriteB
38 |         N, [=](id<1> i) { b[i] = 30 + 12; });
39 |   });
40 | 
41 |   host_accessor host_acc_a(a_buf, read_only);
42 |   host_accessor host_acc_b(b_buf, read_only);
43 |   for (int i = 0; i < N; i++) {
44 |     std::cout << host_acc_a[i] << " " << host_acc_b[i]
45 |               << " ";
46 |   }
47 |   std::cout << "\n";
48 |   return 0;
49 | }
50 | 


--------------------------------------------------------------------------------
/samples/Ch03_data_management/fig_3_17.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | class handler {
 9 |   ...
10 |   // Specifies event(s) that must be complete before the
11 |   // action defined in this command group executes.
12 |   void depends_on({event / std::vector<event> & });
13 | 
14 |   // Enqueues a memcpy from Src to Dest.
15 |   // Count bytes are copied.
16 |   void memcpy(void* Dest, const void* Src, size_t Count);
17 | 
18 |   // Enqueues a memcpy from Src to Dest.
19 |   // Count elements are copied.
20 |   template <typename T>
21 |   void copy(const T* Src, T* Dest, size_t Count);
22 | 
23 |   // Enqueues a memset operation on the specified pointer.
24 |   // Writes the first byte of Value into Count bytes.
25 |   void memset(void* Ptr, int Value, size_t Count)
26 | 
27 |   // Enques a fill operation on the specified pointer.
28 |   // Fills Pattern into Ptr Count times.
29 |   template <typename T>
30 |   void fill(void* Ptr, const T& Pattern, size_t Count);
31 | 
32 |   // Submits a kernel of one work-item for execution.
33 |   template <typename KernelName, typename KernelType>
34 |   void single_task(KernelType KernelFunc);
35 | 
36 |   // Submits a kernel with NumWorkItems work-items for
37 |   // execution.
38 |   template <typename KernelName, typename KernelType,
39 |             int Dims>
40 |   void parallel_for(range<Dims> NumWorkItems,
41 |                     KernelType KernelFunc);
42 | 
43 |   // Submits a kernel for execution over the supplied
44 |   // nd_range.
45 |   template <typename KernelName, typename KernelType,
46 |             int Dims>
47 |   void parallel_for(nd_range<Dims> ExecutionRange,
48 |                     KernelType KernelFunc);
49 |   ...
50 | };
51 | 


--------------------------------------------------------------------------------
/samples/Ch03_data_management/fig_3_4_usm_explicit_data_movement.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <array>
 6 | #include <sycl/sycl.hpp>
 7 | using namespace sycl;
 8 | constexpr int N = 42;
 9 | 
10 | int main() {
11 |   queue q;
12 | 
13 |   std::array<int, N> host_array;
14 |   int *device_array = malloc_device<int>(N, q);
15 | 
16 |   for (int i = 0; i < N; i++) host_array[i] = N;
17 | 
18 |   // We will learn how to simplify this example later
19 |   q.submit([&](handler &h) {
20 |     // copy host_array to device_array
21 |     h.memcpy(device_array, &host_array[0], N * sizeof(int));
22 |   });
23 |   q.wait();
24 | 
25 |   q.submit([&](handler &h) {
26 |     h.parallel_for(N, [=](id<1> i) { device_array[i]++; });
27 |   });
28 |   q.wait();
29 | 
30 |   q.submit([&](handler &h) {
31 |     // copy device_array back to host_array
32 |     h.memcpy(&host_array[0], device_array, N * sizeof(int));
33 |   });
34 |   q.wait();
35 | 
36 |   free(device_array, q);
37 |   return 0;
38 | }
39 | 


--------------------------------------------------------------------------------
/samples/Ch03_data_management/fig_3_5_usm_implicit_data_movement.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <sycl/sycl.hpp>
 6 | using namespace sycl;
 7 | constexpr int N = 42;
 8 | 
 9 | int main() {
10 |   queue q;
11 |   int *host_array = malloc_host<int>(N, q);
12 |   int *shared_array = malloc_shared<int>(N, q);
13 | 
14 |   for (int i = 0; i < N; i++) {
15 |     // Initialize host_array on host
16 |     host_array[i] = i;
17 |   }
18 | 
19 |   // We will learn how to simplify this example later
20 |   q.submit([&](handler &h) {
21 |     h.parallel_for(N, [=](id<1> i) {
22 |       // access shared_array and host_array on device
23 |       shared_array[i] = host_array[i] + 1;
24 |     });
25 |   });
26 |   q.wait();
27 | 
28 |   for (int i = 0; i < N; i++) {
29 |     // access shared_array on host
30 |     host_array[i] = shared_array[i];
31 |   }
32 | 
33 |   free(shared_array, q);
34 |   free(host_array, q);
35 |   return 0;
36 | }
37 | 


--------------------------------------------------------------------------------
/samples/Ch03_data_management/fig_3_6_buffers_and_accessors.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <array>
 6 | #include <sycl/sycl.hpp>
 7 | using namespace sycl;
 8 | constexpr int N = 42;
 9 | 
10 | int main() {
11 |   std::array<int, N> my_data;
12 |   for (int i = 0; i < N; i++) my_data[i] = 0;
13 | 
14 |   {
15 |     queue q;
16 |     buffer my_buffer(my_data);
17 | 
18 |     q.submit([&](handler &h) {
19 |       // create an accessor to update
20 |       // the buffer on the device
21 |       accessor my_accessor(my_buffer, h);
22 | 
23 |       h.parallel_for(N, [=](id<1> i) { my_accessor[i]++; });
24 |     });
25 | 
26 |     // create host accessor
27 |     host_accessor host_accessor(my_buffer);
28 | 
29 |     for (int i = 0; i < N; i++) {
30 |       // access my_buffer on host
31 |       std::cout << host_accessor[i] << " ";
32 |     }
33 |     std::cout << "\n";
34 |   }
35 | 
36 |   // my_data is updated when my_buffer is
37 |   // destroyed upon exiting scope
38 |   for (int i = 0; i < N; i++) {
39 |     std::cout << my_data[i] << " ";
40 |   }
41 |   std::cout << "\n";
42 | }
43 | 


--------------------------------------------------------------------------------
/samples/Ch04_expressing_parallelism/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2023 Intel Corporation
 2 | 
 3 | # SPDX-License-Identifier: MIT
 4 | 
 5 | add_book_sample(
 6 |     TEST
 7 |     TARGET fig_4_5_vector_add
 8 |     SOURCES fig_4_5_vector_add.cpp)
 9 | 
10 | add_book_sample(
11 |     TEST
12 |     TARGET fig_4_6_matrix_add
13 |     SOURCES fig_4_6_matrix_add.cpp)
14 | 
15 | add_book_sample(
16 |     TEST
17 |     TARGET fig_4_7_basic_matrix_multiply
18 |     SOURCES fig_4_7_basic_matrix_multiply.cpp)
19 | 
20 | add_book_sample(
21 |     TEST
22 |     TARGET fig_4_15_nd_range_matrix_multiply
23 |     SOURCES fig_4_15_nd_range_matrix_multiply.cpp)
24 | 


--------------------------------------------------------------------------------
/samples/Ch04_expressing_parallelism/fig_4_1.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | for (int i = 0; i < N; ++i) {
 9 |   c[i] = a[i] + b[i];
10 | }
11 | 


--------------------------------------------------------------------------------
/samples/Ch04_expressing_parallelism/fig_4_10.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | template <int Dimensions = 1>
 9 | class id {
10 |  public:
11 |   // Construct an id with one, two or three dimensions
12 |   id(size_t dim0);
13 |   id(size_t dim0, size_t dim1);
14 |   id(size_t dim0, size_t dim1, size_t dim2);
15 | 
16 |   // Return the component of the id in a specific dimension
17 |   size_t get(int dimension) const;
18 |   size_t &operator[](int dimension);
19 |   size_t operator[](int dimension) const;
20 | 
21 |   // Arithmetic operations on ids are also supported
22 | };
23 | 


--------------------------------------------------------------------------------
/samples/Ch04_expressing_parallelism/fig_4_11.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | template <int Dimensions = 1, bool WithOffset = true>
 9 | class item {
10 |  public:
11 |   // Return the index of this item in the kernel's execution
12 |   // range
13 |   id<Dimensions> get_id() const;
14 |   size_t get_id(int dimension) const;
15 |   size_t operator[](int dimension) const;
16 | 
17 |   // Return the execution range of the kernel executed by
18 |   // this item
19 |   range<Dimensions> get_range() const;
20 |   size_t get_range(int dimension) const;
21 | 
22 |   // Return the offset of this item (if WithOffset == true)
23 |   id<Dimensions> get_offset() const;
24 | 
25 |   // Return the linear index of this item
26 |   // e.g. id(0) * range(1) * range(2) + id(1) * range(2) +
27 |   // id(2)
28 |   size_t get_linear_id() const;
29 | };
30 | 


--------------------------------------------------------------------------------
/samples/Ch04_expressing_parallelism/fig_4_15_nd_range_matrix_multiply.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <algorithm>
 6 | #include <iostream>
 7 | #include <random>
 8 | #include <sycl/sycl.hpp>
 9 | using namespace sycl;
10 | 
11 | int main() {
12 |   // Set up queue on any available device
13 |   queue q;
14 | 
15 |   // Initialize input and output memory on the host
16 |   constexpr size_t N = 256;
17 |   constexpr size_t B = 4;
18 |   std::vector<float> a(N * N), b(N * N), c(N * N);
19 |   std::default_random_engine gen(42);
20 |   std::uniform_real_distribution<float> dist(0.0, 1.0);
21 |   auto rng = [&]() { return dist(gen); };
22 |   std::generate(a.begin(), a.end(), rng);
23 |   std::generate(b.begin(), b.end(), rng);
24 |   std::fill(c.begin(), c.end(), 0);
25 | 
26 |   {
27 |     // Create buffers associated with inputs and output
28 |     buffer<float, 2> a_buf(a.data(), range<2>(N, N)),
29 |         b_buf(b.data(), range<2>(N, N)),
30 |         c_buf(c.data(), range<2>(N, N));
31 | 
32 |     // Submit the kernel to the queue
33 |     q.submit([&](handler& h) {
34 |       accessor a{a_buf, h};
35 |       accessor b{b_buf, h};
36 |       accessor c{c_buf, h};
37 | 
38 |       // BEGIN CODE SNIP
39 |       range global{N, N};
40 |       range local{B, B};
41 |       h.parallel_for(nd_range{global, local},
42 |                      [=](nd_item<2> it) {
43 |                        int j = it.get_global_id(0);
44 |                        int i = it.get_global_id(1);
45 | 
46 |                        for (int k = 0; k < N; ++k) {
47 |                          c[j][i] += a[j][k] * b[k][i];
48 |                        }
49 |                      });
50 |       // END CODE SNIP
51 |     });
52 |   }
53 | 
54 |   // Check that all outputs match serial execution.
55 |   bool passed = true;
56 |   for (int j = 0; j < N; ++j) {
57 |     for (int i = 0; i < N; ++i) {
58 |       float gold = 0;
59 |       for (int k = 0; k < N; ++k) {
60 |         gold += a[j * N + k] * b[k * N + i];
61 |       }
62 |       if (std::abs(gold - c[j * N + i]) / gold > 1.0E-05) {
63 |         passed = false;
64 |       }
65 |     }
66 |   }
67 |   std::cout << ((passed) ? "SUCCESS" : "FAILURE")
68 |             << std::endl;
69 |   return (passed) ? 0 : 1;
70 | }
71 | 


--------------------------------------------------------------------------------
/samples/Ch04_expressing_parallelism/fig_4_17.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | template <int Dimensions = 1>
 9 | class nd_range {
10 |  public:
11 |   // Construct an nd_range from global and work-group local
12 |   // ranges
13 |   nd_range(range<Dimensions> global,
14 |            range<Dimensions> local);
15 | 
16 |   // Return the global and work-group local ranges
17 |   range<Dimensions> get_global_range() const;
18 |   range<Dimensions> get_local_range() const;
19 | 
20 |   // Return the number of work-groups in the global range
21 |   range<Dimensions> get_group_range() const;
22 | };
23 | 


--------------------------------------------------------------------------------
/samples/Ch04_expressing_parallelism/fig_4_18.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | template <int Dimensions = 1>
 9 | class nd_item {
10 |  public:
11 |   // Return the index of this item in the kernel's execution
12 |   // range
13 |   id<Dimensions> get_global_id() const;
14 |   size_t get_global_id(int dimension) const;
15 |   size_t get_global_linear_id() const;
16 | 
17 |   // Return the execution range of the kernel executed by
18 |   // this item
19 |   range<Dimensions> get_global_range() const;
20 |   size_t get_global_range(int dimension) const;
21 | 
22 |   // Return the index of this item within its parent
23 |   // work-group
24 |   id<Dimensions> get_local_id() const;
25 |   size_t get_local_id(int dimension) const;
26 |   size_t get_local_linear_id() const;
27 | 
28 |   // Return the execution range of this item's parent
29 |   // work-group
30 |   range<Dimensions> get_local_range() const;
31 |   size_t get_local_range(int dimension) const;
32 | 
33 |   // Return a handle to the work-group
34 |   // or sub-group containing this item
35 |   group<Dimensions> get_group() const;
36 |   sub_group get_sub_group() const;
37 | };
38 | 


--------------------------------------------------------------------------------
/samples/Ch04_expressing_parallelism/fig_4_19.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | template <int Dimensions = 1>
 9 | class group {
10 |  public:
11 |   // Return the index of this group in the kernel's
12 |   // execution range
13 |   id<Dimensions> get_group_id() const;
14 |   size_t get_group_id(int dimension) const;
15 |   size_t get_group_linear_id() const;
16 | 
17 |   // Return the number of groups in the kernel's execution
18 |   // range
19 |   range<Dimensions> get_group_range() const;
20 |   size_t get_group_range(int dimension) const;
21 | 
22 |   // Return the number of work-items in this group
23 |   range<Dimensions> get_local_range() const;
24 |   size_t get_local_range(int dimension) const;
25 | };
26 | 


--------------------------------------------------------------------------------
/samples/Ch04_expressing_parallelism/fig_4_2.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | launch N kernel instances {
 9 |   int id =
10 |       get_instance_id();  // unique identifier in [0, N)
11 |   c[id] = a[id] + b[id];
12 | }
13 | 


--------------------------------------------------------------------------------
/samples/Ch04_expressing_parallelism/fig_4_20.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | void body(group& g);
 9 | 
10 | h.parallel_for(nd_range{global, local}, [=](nd_item<1> it) {
11 |   group<1> g = it.get_group();
12 |   range<1> r = g.get_local_range();
13 |   ...
14 |   body(g);
15 | });
16 | 


--------------------------------------------------------------------------------
/samples/Ch04_expressing_parallelism/fig_4_21.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | class sub_group {
 9 |  public:
10 |   // Return the index of the sub-group
11 |   id<1> get_group_id() const;
12 | 
13 |   // Return the number of sub-groups in this item's parent
14 |   // work-group
15 |   range<1> get_group_range() const;
16 | 
17 |   // Return the index of the work-item in this sub-group
18 |   id<1> get_local_id() const;
19 | 
20 |   // Return the number of work-items in this sub-group
21 |   range<1> get_local_range() const;
22 | 
23 |   // Return the maximum number of work-items in any
24 |   // sub-group in this item's parent work-group
25 |   range<1> get_max_local_range() const;
26 | };
27 | 


--------------------------------------------------------------------------------
/samples/Ch04_expressing_parallelism/fig_4_22.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | size_t N = ...;  // amount of work
 9 | size_t W = ...;  // number of workers
10 | h.parallel_for(range{W}, [=](item<1> it) {
11 |   for (int i = it.get_id()[0]; i < N;
12 |        i += it.get_range()[0]) {
13 |     output[i] = function(input[i]);
14 |   }
15 | });
16 | 


--------------------------------------------------------------------------------
/samples/Ch04_expressing_parallelism/fig_4_5_vector_add.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <algorithm>
 6 | #include <iostream>
 7 | #include <sycl/sycl.hpp>
 8 | using namespace sycl;
 9 | 
10 | int main() {
11 |   // Set up queue on any available device
12 |   queue q;
13 | 
14 |   // Initialize input and output memory on the host
15 |   constexpr size_t N = 256;
16 |   std::vector<int> a(N), b(N), c(N);
17 |   std::fill(a.begin(), a.end(), 1);
18 |   std::fill(b.begin(), b.end(), 2);
19 |   std::fill(c.begin(), c.end(), 0);
20 | 
21 |   {
22 |     // Create buffers associated with inputs and output
23 |     buffer<int, 1> a_buf{a}, b_buf{b}, c_buf{c};
24 | 
25 |     // Submit the kernel to the queue
26 |     q.submit([&](handler& h) {
27 |       accessor a{a_buf, h};
28 |       accessor b{b_buf, h};
29 |       accessor c{c_buf, h};
30 | 
31 |       // BEGIN CODE SNIP
32 |       h.parallel_for(range{N}, [=](id<1> idx) {
33 |         c[idx] = a[idx] + b[idx];
34 |       });
35 |       // END CODE SNIP
36 |     });
37 |   }
38 | 
39 |   // Check that all outputs match expected value
40 |   bool passed = std::all_of(c.begin(), c.end(),
41 |                             [](int i) { return (i == 3); });
42 |   std::cout << ((passed) ? "SUCCESS" : "FAILURE")
43 |             << std::endl;
44 |   return (passed) ? 0 : 1;
45 | }
46 | 


--------------------------------------------------------------------------------
/samples/Ch04_expressing_parallelism/fig_4_6_matrix_add.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <algorithm>
 6 | #include <iostream>
 7 | #include <sycl/sycl.hpp>
 8 | using namespace sycl;
 9 | 
10 | int main() {
11 |   // Set up queue on any available device
12 |   queue q;
13 | 
14 |   // Initialize input and output memory on the host
15 |   constexpr size_t N = 256;
16 |   constexpr size_t M = 256;
17 |   std::vector<int> a(N * M), b(N * M), c(N * M);
18 |   std::fill(a.begin(), a.end(), 1);
19 |   std::fill(b.begin(), b.end(), 2);
20 |   std::fill(c.begin(), c.end(), 0);
21 | 
22 |   {
23 |     // Create buffers associated with inputs and output
24 |     buffer<int, 2> a_buf(a.data(), range<2>(N, M)),
25 |         b_buf(b.data(), range<2>(N, M)),
26 |         c_buf(c.data(), range<2>(N, M));
27 | 
28 |     // Submit the kernel to the queue
29 |     q.submit([&](handler& h) {
30 |       accessor a{a_buf, h};
31 |       accessor b{b_buf, h};
32 |       accessor c{c_buf, h};
33 | 
34 |       // BEGIN CODE SNIP
35 |       h.parallel_for(range{N, M}, [=](id<2> idx) {
36 |         c[idx] = a[idx] + b[idx];
37 |       });
38 |       // END CODE SNIP
39 |     });
40 |   }
41 | 
42 |   // Check that all outputs match expected value
43 |   bool passed = std::all_of(c.begin(), c.end(),
44 |                             [](int i) { return (i == 3); });
45 |   std::cout << ((passed) ? "SUCCESS" : "FAILURE")
46 |             << std::endl;
47 |   return (passed) ? 0 : 1;
48 | }
49 | 


--------------------------------------------------------------------------------
/samples/Ch04_expressing_parallelism/fig_4_7_basic_matrix_multiply.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <algorithm>
 6 | #include <iostream>
 7 | #include <random>
 8 | #include <sycl/sycl.hpp>
 9 | using namespace sycl;
10 | 
11 | int main() {
12 |   // Set up queue on any available device
13 |   queue q;
14 | 
15 |   // Initialize input and output memory on the host
16 |   constexpr size_t N = 256;
17 |   std::vector<float> a(N * N), b(N * N), c(N * N);
18 |   std::default_random_engine gen(42);
19 |   std::uniform_real_distribution<float> dist(0.0, 1.0);
20 |   auto rng = [&]() { return dist(gen); };
21 |   std::generate(a.begin(), a.end(), rng);
22 |   std::generate(b.begin(), b.end(), rng);
23 |   std::fill(c.begin(), c.end(), 0);
24 | 
25 |   {
26 |     // Create buffers associated with inputs and output
27 |     buffer<float, 2> a_buf(a.data(), range<2>(N, N)),
28 |         b_buf(b.data(), range<2>(N, N)),
29 |         c_buf(c.data(), range<2>(N, N));
30 | 
31 |     // Submit the kernel to the queue
32 |     q.submit([&](handler& h) {
33 |       accessor a{a_buf, h};
34 |       accessor b{b_buf, h};
35 |       accessor c{c_buf, h};
36 | 
37 |       // BEGIN CODE SNIP
38 |       h.parallel_for(range{N, N}, [=](id<2> idx) {
39 |         int j = idx[0];
40 |         int i = idx[1];
41 |         for (int k = 0; k < N; ++k) {
42 |           c[j][i] +=
43 |               a[j][k] * b[k][i];  // or c[idx] += a[id(j,k)]
44 |                                   // * b[id(k,i)];
45 |         }
46 |       });
47 |       // END CODE SNIP
48 |     });
49 |   }
50 | 
51 |   // Check that all outputs match serial execution
52 |   bool passed = true;
53 |   for (int j = 0; j < N; ++j) {
54 |     for (int i = 0; i < N; ++i) {
55 |       float gold = 0;
56 |       for (int k = 0; k < N; ++k) {
57 |         gold += a[j * N + k] * b[k * N + i];
58 |       }
59 |       if (std::abs(gold - c[j * N + i]) / gold > 1.0E-05) {
60 |         passed = false;
61 |       }
62 |     }
63 |   }
64 |   std::cout << ((passed) ? "SUCCESS" : "FAILURE")
65 |             << std::endl;
66 |   return (passed) ? 0 : 1;
67 | }
68 | 


--------------------------------------------------------------------------------
/samples/Ch04_expressing_parallelism/fig_4_9.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | template <int Dimensions = 1>
 9 | class range {
10 |  public:
11 |   // Construct a range with one, two or three dimensions
12 |   range(size_t dim0);
13 |   range(size_t dim0, size_t dim1);
14 |   range(size_t dim0, size_t dim1, size_t dim2);
15 | 
16 |   // Return the size of the range in a specific dimension
17 |   size_t get(int dimension) const;
18 |   size_t &operator[](int dimension);
19 |   size_t operator[](int dimension) const;
20 | 
21 |   // Return the product of the size of each dimension
22 |   size_t size() const;
23 | 
24 |   // Arithmetic operations on ranges are also supported
25 | };
26 | 


--------------------------------------------------------------------------------
/samples/Ch05_error_handling/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2023 Intel Corporation
 2 | 
 3 | # SPDX-License-Identifier: MIT
 4 | 
 5 | add_book_sample(
 6 |     TEST
 7 |     TARGET fig_5_1_async_task_graph
 8 |     SOURCES fig_5_1_async_task_graph.cpp)
 9 | 
10 | add_book_sample(
11 |     TARGET fig_5_2_sync_error
12 |     SOURCES fig_5_2_sync_error.cpp)
13 | 
14 | add_book_sample(
15 |     TARGET fig_5_3_async_error
16 |     SOURCES fig_5_3_async_error.cpp)
17 | 
18 | add_book_sample(
19 |     TARGET fig_5_4_unhandled_exception
20 |     SOURCES fig_5_4_unhandled_exception.cpp)
21 | 
22 | add_book_sample(
23 |     TARGET fig_5_5_terminate
24 |     SOURCES fig_5_5_terminate.cpp)
25 | 
26 | add_book_sample(
27 |     TEST
28 |     TARGET fig_5_6_catch_snip
29 |     SOURCES fig_5_6_catch_snip.cpp)
30 | 
31 | add_book_sample(
32 |     TARGET fig_5_7_catch
33 |     SOURCES fig_5_7_catch.cpp)
34 | 
35 | add_book_sample(
36 |     TARGET fig_5_8_lambda_handler
37 |     SOURCES fig_5_8_lambda_handler.cpp)
38 | 
39 | add_book_sample(
40 |     TARGET fig_5_9_default_handler_proxy
41 |     SOURCES fig_5_9_default_handler_proxy.cpp)
42 | 


--------------------------------------------------------------------------------
/samples/Ch05_error_handling/fig_5_1_async_task_graph.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <iostream>
 6 | #include <sycl/sycl.hpp>
 7 | using namespace sycl;
 8 | 
 9 | int main() {
10 |   constexpr int size = 16;
11 |   buffer<int> b{range{size}};
12 | 
13 |   // Create queue on any available device
14 |   queue q;
15 | 
16 |   q.submit([&](handler& h) {
17 |     accessor a{b, h};
18 | 
19 |     h.parallel_for(size, [=](auto& idx) { a[idx] = idx; });
20 |   });
21 | 
22 |   // Obtain access to buffer on the host
23 |   // Will wait for device kernel to execute to generate data
24 |   host_accessor a{b};
25 |   for (int i = 0; i < size; i++)
26 |     std::cout << "data[" << i << "] = " << a[i] << "\n";
27 | 
28 |   return 0;
29 | }
30 | 


--------------------------------------------------------------------------------
/samples/Ch05_error_handling/fig_5_2_sync_error.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <sycl/sycl.hpp>
 6 | using namespace sycl;
 7 | 
 8 | int main() {
 9 |   buffer<int> b{range{16}};
10 | 
11 |   // ERROR: Create sub-buffer larger than size of parent
12 |   // buffer. An exception is thrown from within the buffer
13 |   // constructor.
14 |   buffer<int> b2(b, id{8}, range{16});
15 | 
16 |   return 0;
17 | }
18 | 
19 | // Example Output:
20 | // terminate called after throwing an instance of 'sycl::_V1::invalid_object_error'
21 | // what():  Requested sub-buffer size exceeds the size of the parent buffer -30 (PI_ERROR_INVALID_VALUE)
22 | //   Aborted
23 |   
24 | 


--------------------------------------------------------------------------------
/samples/Ch05_error_handling/fig_5_3_async_error.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <sycl/sycl.hpp>
 6 | using namespace sycl;
 7 | 
 8 | // Our example asynchronous handler function
 9 | auto handle_async_error = [](exception_list elist) {
10 |   for (auto &e : elist) {
11 |     try {
12 |       std::rethrow_exception(e);
13 |     } catch (...) {
14 |       std::cout << "Caught SYCL ASYNC exception!!\n";
15 |     }
16 |   }
17 | };
18 | 
19 | void say_device(const queue &Q) {
20 |   std::cout << "Device : "
21 |             << Q.get_device().get_info<info::device::name>()
22 |             << "\n";
23 | }
24 | 
25 | class something_went_wrong {};  // Example exception type
26 | 
27 | int main() {
28 |   queue q{cpu_selector_v, handle_async_error};
29 |   say_device(q);
30 | 
31 |   q.submit([&](handler &h) {
32 |      h.host_task([]() { throw(something_went_wrong{}); });
33 |    }).wait();
34 | 
35 |   return 0;
36 | }
37 | 
38 | // Example output:
39 | // Device : Intel(R) Xeon(R) Gold 6128 CPU @ 3.40GHz
40 | // Caught SYCL ASYNC exception!!
41 | 


--------------------------------------------------------------------------------
/samples/Ch05_error_handling/fig_5_4_unhandled_exception.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <iostream>
 6 | 
 7 | class something_went_wrong {};
 8 | 
 9 | int main() {
10 |   std::cout << "Hello\n";
11 | 
12 |   throw(something_went_wrong{});
13 | }
14 | 
15 | // Example output:
16 | // Hello
17 | // terminate called after throwing an instance of 'something_went_wrong'
18 | // Aborted
19 | 


--------------------------------------------------------------------------------
/samples/Ch05_error_handling/fig_5_5_terminate.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <iostream>
 6 | 
 7 | int main() {
 8 |   std::cout << "Hello\n";
 9 | 
10 |   std::terminate();
11 | }
12 | 
13 | // Example output:
14 | // Hello
15 | // terminate called without an active exception
16 | // Aborted
17 | 


--------------------------------------------------------------------------------
/samples/Ch05_error_handling/fig_5_6_catch_snip.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <sycl/sycl.hpp>
 6 | using namespace sycl;
 7 | 
 8 | int main() {
 9 |   // BEGIN CODE SNIP
10 | 
11 |   try {
12 |     // Do some SYCL work
13 |   } catch (sycl::exception &e) {
14 |     // Do something to output or handle the exception
15 |     std::cout << "Caught sync SYCL exception: " << e.what()
16 |               << "\n";
17 |     return 1;
18 |   }
19 | 
20 |   // END CODE SNIP
21 | 
22 |   return 0;
23 | }
24 | 


--------------------------------------------------------------------------------
/samples/Ch05_error_handling/fig_5_7_catch.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <sycl/sycl.hpp>
 6 | using namespace sycl;
 7 | 
 8 | int main() {
 9 |   try {
10 |     buffer<int> b{range{16}};
11 | 
12 |     // ERROR: Create sub-buffer larger than size of parent
13 |     // buffer. An exception is thrown from within the buffer
14 |     // constructor.
15 |     buffer<int> b2(b, id{8}, range{16});
16 | 
17 |   } catch (sycl::exception &e) {
18 |     // Do something to output or handle the exception
19 |     std::cout << "Caught synchronous SYCL exception: "
20 |               << e.what() << "\n";
21 |     return 1;
22 |   } catch (std::exception &e) {
23 |     std::cout << "Caught std exception: " << e.what()
24 |               << "\n";
25 |     return 2;
26 |   } catch (...) {
27 |     std::cout << "Caught unknown exception\n";
28 |     return 3;
29 |   }
30 | 
31 |   return 0;
32 | }
33 | 
34 | // Example output:
35 | // Caught synchronous SYCL exception: Requested sub-buffer size exceeds the size of the parent buffer -30 (PI_ERROR_INVALID_VALUE)
36 |   
37 | 


--------------------------------------------------------------------------------
/samples/Ch05_error_handling/fig_5_8_lambda_handler.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <sycl/sycl.hpp>
 6 | using namespace sycl;
 7 | 
 8 | // BEGIN CODE SNIP
 9 | 
10 | // Our simple asynchronous handler function
11 | auto handle_async_error = [](exception_list elist) {
12 |   for (auto& e : elist) {
13 |     try {
14 |       std::rethrow_exception(e);
15 |     } catch (sycl::exception& e) {
16 |       std::cout << "ASYNC EXCEPTION!!\n";
17 |       std::cout << e.what() << "\n";
18 |     }
19 |   }
20 | };
21 | 
22 | // END CODE SNIP
23 | 
24 | void say_device(const queue& q) {
25 |   std::cout << "Device : "
26 |             << q.get_device().get_info<info::device::name>()
27 |             << "\n";
28 | }
29 | 
30 | int main() {
31 |   queue q1{gpu_selector_v, handle_async_error};
32 |   queue q2{cpu_selector_v, handle_async_error};
33 |   say_device(q1);
34 |   say_device(q2);
35 | 
36 |   try {
37 |     q1.submit(
38 |         [&](handler& h) {
39 |           // Empty command group is illegal and generates an
40 |           // error
41 |         },
42 |         q2);  // Secondary/backup queue!
43 |   } catch (...) {
44 |   }  // Discard regular C++ exceptions for this example
45 |   return 0;
46 | }
47 | 


--------------------------------------------------------------------------------
/samples/Ch05_error_handling/fig_5_9_default_handler_proxy.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <sycl/sycl.hpp>
 6 | using namespace sycl;
 7 | 
 8 | // BEGIN CODE SNIP
 9 | 
10 | // Our simple asynchronous handler function
11 | auto handle_async_error = [](exception_list elist) {
12 |   for (auto& e : elist) {
13 |     try {
14 |       std::rethrow_exception(e);
15 |     } catch (sycl::exception& e) {
16 |       // Print information about the asynchronous exception
17 |     } catch (...) {
18 |       // Print information about non-sycl::exception
19 |     }
20 |   }
21 | 
22 |   // Terminate abnormally to make clear to user that
23 |   // something unhandled happened
24 |   std::terminate();
25 | };
26 | 
27 | // END CODE SNIP
28 | 
29 | void say_device(const queue& q) {
30 |   std::cout << "Device : "
31 |             << q.get_device().get_info<info::device::name>()
32 |             << "\n";
33 | }
34 | 
35 | class something_went_wrong {};
36 | 
37 | int main() {
38 |   queue q{cpu_selector_v, handle_async_error};
39 |   say_device(q);
40 | 
41 |   q.submit([&](handler& h) {
42 |      h.host_task([]() { throw(something_went_wrong{}); });
43 |    }).wait();
44 | 
45 |   return 0;
46 | }
47 | 
48 | // Example output:
49 | // Device : Intel(R) Xeon(R) Gold 6128 CPU @ 3.40GHz
50 | // terminate called without an active exception
51 | // Aborted
52 | 


--------------------------------------------------------------------------------
/samples/Ch06_unified_shared_memory/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2023 Intel Corporation
 2 | 
 3 | # SPDX-License-Identifier: MIT
 4 | 
 5 | add_book_sample(
 6 |     TEST
 7 |     TARGET fig_6_5_allocation_styles
 8 |     SOURCES fig_6_5_allocation_styles.cpp)
 9 | 
10 | add_book_sample(
11 |     TEST
12 |     TARGET fig_6_6_usm_explicit_data_movement
13 |     SOURCES fig_6_6_usm_explicit_data_movement.cpp)
14 | 
15 | add_book_sample(
16 |     TEST
17 |     TARGET fig_6_7_usm_implicit_data_movement
18 |     SOURCES fig_6_7_usm_implicit_data_movement.cpp)
19 | 
20 | add_book_sample(
21 |     TEST
22 |     TARGET fig_6_8_prefetch_memadvise
23 |     SOURCES fig_6_8_prefetch_memadvise.cpp)
24 | 
25 | add_book_sample(
26 |     TEST
27 |     TARGET fig_6_9_queries
28 |     SOURCES fig_6_9_queries.cpp)
29 | 


--------------------------------------------------------------------------------
/samples/Ch06_unified_shared_memory/fig_6_2.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | // Named Functions
 9 | void *malloc_device(size_t size, const device &dev,
10 |                     const context &ctxt);
11 | void *malloc_device(size_t size, const queue &q);
12 | void *aligned_alloc_device(size_t alignment, size_t size,
13 |                            const device &dev,
14 |                            const context &ctxt);
15 | void *aligned_alloc_device(size_t alignment, size_t size,
16 |                            const queue &q);
17 | 
18 | void *malloc_host(size_t size, const context &ctxt);
19 | void *malloc_host(size_t size, const queue &q);
20 | void *aligned_alloc_host(size_t alignment, size_t size,
21 |                          const context &ctxt);
22 | void *aligned_alloc_host(size_t alignment, size_t size,
23 |                          const queue &q);
24 | 
25 | void *malloc_shared(size_t size, const device &dev,
26 |                     const context &ctxt);
27 | void *malloc_shared(size_t size, const queue &q);
28 | void *aligned_alloc_shared(size_t alignment, size_t size,
29 |                            const device &dev,
30 |                            const context &ctxt);
31 | void *aligned_alloc_shared(size_t alignment, size_t size,
32 |                            const queue &q);
33 | 
34 | // Single Function
35 | void *malloc(size_t size, const device &dev,
36 |              const context &ctxt, usm::alloc kind);
37 | void *malloc(size_t size, const queue &q, usm::alloc kind);
38 | void *aligned_alloc(size_t alignment, size_t size,
39 |                     const device &dev, const context &ctxt,
40 |                     usm::alloc kind);
41 | void *aligned_alloc(size_t alignment, size_t size,
42 |                     const queue &q, usm::alloc kind);
43 | 


--------------------------------------------------------------------------------
/samples/Ch06_unified_shared_memory/fig_6_3.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | // Named Functions
 9 | template <typename T>
10 | T *malloc_device(size_t Count, const device &Dev,
11 |                  const context &Ctxt);
12 | template <typename T>
13 | T *malloc_device(size_t Count, const queue &Q);
14 | template <typename T>
15 | T *aligned_alloc_device(size_t Alignment, size_t Count,
16 |                         const device &Dev,
17 |                         const context &Ctxt);
18 | template <typename T>
19 | T *aligned_alloc_device(size_t Alignment, size_t Count,
20 |                         const queue &Q);
21 | 
22 | template <typename T>
23 | T *malloc_host(size_t Count, const context &Ctxt);
24 | template <typename T>
25 | T *malloc_host(size_t Count, const queue &Q);
26 | template <typename T>
27 | T *aligned_alloc_host(size_t Alignment, size_t Count,
28 |                       const context &Ctxt);
29 | template <typename T>
30 | T *aligned_alloc_host(size_t Alignment, size_t Count,
31 |                       const queue &Q);
32 | 
33 | template <typename T>
34 | T *malloc_shared(size_t Count, const device &Dev,
35 |                  const context &Ctxt);
36 | template <typename T>
37 | T *malloc_shared(size_t Count, const queue &Q);
38 | template <typename T>
39 | T *aligned_alloc_shared(size_t Alignment, size_t Count,
40 |                         const device &Dev,
41 |                         const context &Ctxt);
42 | template <typename T>
43 | T *aligned_alloc_shared(size_t Alignment, size_t Count,
44 |                         const queue &Q);
45 | 
46 | // Single Function
47 | template <typename T>
48 | T *malloc(size_t Count, const device &Dev,
49 |           const context &Ctxt, usm::alloc Kind);
50 | template <typename T>
51 | T *malloc(size_t Count, const queue &Q, usm::alloc Kind);
52 | template <typename T>
53 | T *aligned_alloc(size_t Alignment, size_t Count,
54 |                  const device &Dev, const context &Ctxt,
55 |                  usm::alloc Kind);
56 | template <typename T>
57 | T *aligned_alloc(size_t Alignment, size_t Count,
58 |                  const queue &Q, usm::alloc Kind);
59 | 


--------------------------------------------------------------------------------
/samples/Ch06_unified_shared_memory/fig_6_5_allocation_styles.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <sycl/sycl.hpp>
 6 | using namespace sycl;
 7 | constexpr int N = 42;
 8 | 
 9 | int main() {
10 |   queue q;
11 | 
12 |   // Allocate N floats
13 | 
14 |   // C-style
15 |   float *f1 = static_cast<float *>(malloc_shared(
16 |       N * sizeof(float), q.get_device(), q.get_context()));
17 | 
18 |   // C++-style
19 |   float *f2 = malloc_shared<float>(N, q);
20 | 
21 |   // C++-allocator-style
22 |   usm_allocator<float, usm::alloc::shared> alloc(q);
23 |   float *f3 = alloc.allocate(N);
24 | 
25 |   // Free our allocations
26 |   free(f1, q.get_context());
27 |   free(f2, q);
28 |   alloc.deallocate(f3, N);
29 | 
30 |   return 0;
31 | }
32 | 


--------------------------------------------------------------------------------
/samples/Ch06_unified_shared_memory/fig_6_6_usm_explicit_data_movement.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <array>
 6 | #include <sycl/sycl.hpp>
 7 | using namespace sycl;
 8 | constexpr int N = 42;
 9 | 
10 | int main() {
11 |   queue q;
12 | 
13 |   std::array<int, N> host_array;
14 |   int* device_array = malloc_device<int>(N, q);
15 |   for (int i = 0; i < N; i++) host_array[i] = N;
16 | 
17 |   q.submit([&](handler& h) {
18 |     // copy host_array to device_array
19 |     h.memcpy(device_array, &host_array[0], N * sizeof(int));
20 |   });
21 |   q.wait();  // needed for now (we learn a better way later)
22 | 
23 |   q.submit([&](handler& h) {
24 |     h.parallel_for(N, [=](id<1> i) { device_array[i]++; });
25 |   });
26 |   q.wait();  // needed for now (we learn a better way later)
27 | 
28 |   q.submit([&](handler& h) {
29 |     // copy device_array back to host_array
30 |     h.memcpy(&host_array[0], device_array, N * sizeof(int));
31 |   });
32 |   q.wait();  // needed for now (we learn a better way later)
33 | 
34 |   free(device_array, q);
35 |   return 0;
36 | }
37 | 


--------------------------------------------------------------------------------
/samples/Ch06_unified_shared_memory/fig_6_7_usm_implicit_data_movement.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <sycl/sycl.hpp>
 6 | using namespace sycl;
 7 | constexpr int N = 42;
 8 | 
 9 | int main() {
10 |   queue q;
11 | 
12 |   int* host_array = malloc_host<int>(N, q);
13 |   int* shared_array = malloc_shared<int>(N, q);
14 |   for (int i = 0; i < N; i++) host_array[i] = i;
15 | 
16 |   q.submit([&](handler& h) {
17 |     h.parallel_for(N, [=](id<1> i) {
18 |       // access shared_array and host_array on device
19 |       shared_array[i] = host_array[i] + 1;
20 |     });
21 |   });
22 |   q.wait();
23 | 
24 |   free(shared_array, q);
25 |   free(host_array, q);
26 |   return 0;
27 | }
28 | 


--------------------------------------------------------------------------------
/samples/Ch06_unified_shared_memory/fig_6_8_prefetch_memadvise.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <sycl/sycl.hpp>
 6 | using namespace sycl;
 7 | 
 8 | // Appropriate values depend on your HW
 9 | constexpr int BLOCK_SIZE = 42;
10 | constexpr int NUM_BLOCKS = 2500;
11 | constexpr int N = NUM_BLOCKS * BLOCK_SIZE;
12 | 
13 | int main() {
14 |   queue q;
15 |   int *data = malloc_shared<int>(N, q);
16 |   int *read_only_data = malloc_shared<int>(BLOCK_SIZE, q);
17 | 
18 |   for (int i = 0; i < N; i++) {
19 |     data[i] = -i;
20 |   }
21 | 
22 |   // Never updated after initialization
23 |   for (int i = 0; i < BLOCK_SIZE; i++) {
24 |     read_only_data[i] = i;
25 |   }
26 | 
27 |   // Mark this data as "read only" so the runtime can copy
28 |   // it to the device instead of migrating it from the host.
29 |   // Real values will be documented by your backend.
30 |   int HW_SPECIFIC_ADVICE_RO = 0;
31 |   q.mem_advise(read_only_data, BLOCK_SIZE,
32 |                HW_SPECIFIC_ADVICE_RO);
33 |   event e = q.prefetch(data, BLOCK_SIZE * sizeof(int));
34 | 
35 |   for (int b = 0; b < NUM_BLOCKS; b++) {
36 |     q.parallel_for(range{BLOCK_SIZE}, e, [=](id<1> i) {
37 |       data[b * BLOCK_SIZE + i] += read_only_data[i];
38 |     });
39 |     if ((b + 1) < NUM_BLOCKS) {
40 |       // Prefetch next block
41 |       e = q.prefetch(data + (b + 1) * BLOCK_SIZE,
42 |                      BLOCK_SIZE * sizeof(int));
43 |     }
44 |   }
45 |   q.wait();
46 | 
47 |   free(data, q);
48 |   free(read_only_data, q);
49 |   return 0;
50 | }
51 | 


--------------------------------------------------------------------------------
/samples/Ch06_unified_shared_memory/fig_6_9_queries.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <sycl/sycl.hpp>
 6 | using namespace sycl;
 7 | namespace dinfo = info::device;
 8 | constexpr int N = 42;
 9 | 
10 | template <typename T>
11 | void foo(T data, id<1> i) {
12 |   data[i] = N;
13 | }
14 | 
15 | int main() {
16 |   queue q;
17 |   auto dev = q.get_device();
18 |   auto ctxt = q.get_context();
19 |   bool usm_shared = dev.has(aspect::usm_shared_allocations);
20 |   bool usm_device = dev.has(aspect::usm_device_allocations);
21 |   bool use_USM = usm_shared || usm_device;
22 | 
23 |   if (use_USM) {
24 |     int *data;
25 |     if (usm_shared) {
26 |       data = malloc_shared<int>(N, q);
27 |     } else /* use device allocations */ {
28 |       data = malloc_device<int>(N, q);
29 |     }
30 |     std::cout << "Using USM with "
31 |               << ((get_pointer_type(data, ctxt) ==
32 |                    usm::alloc::shared)
33 |                       ? "shared"
34 |                       : "device")
35 |               << " allocations on "
36 |               << get_pointer_device(data, ctxt)
37 |                      .get_info<dinfo::name>()
38 |               << "\n";
39 |     q.parallel_for(N, [=](id<1> i) { foo(data, i); });
40 |     q.wait();
41 |     free(data, q);
42 |   } else /* use buffers */ {
43 |     buffer<int, 1> data{range{N}};
44 |     q.submit([&](handler &h) {
45 |       accessor a(data, h);
46 |       h.parallel_for(N, [=](id<1> i) { foo(a, i); });
47 |     });
48 |     q.wait();
49 |   }
50 |   return 0;
51 | }
52 | 


--------------------------------------------------------------------------------
/samples/Ch07_buffers/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2023 Intel Corporation
 2 | 
 3 | # SPDX-License-Identifier: MIT
 4 | 
 5 | add_book_sample(
 6 |     TEST
 7 |     TARGET fig_7_2_3_4_creating_buffers
 8 |     SOURCES fig_7_2_3_4_creating_buffers.cpp)
 9 | 
10 | add_book_sample(
11 |     TEST
12 |     TARGET fig_7_5_buffer_properties
13 |     SOURCES fig_7_5_buffer_properties.cpp)
14 | 
15 | add_book_sample(
16 |     TEST
17 |     TARGET fig_7_8_accessors_simple
18 |     SOURCES fig_7_8_accessors_simple.cpp)
19 | 
20 | add_book_sample(
21 |     TEST
22 |     TARGET fig_7_10_accessors
23 |     SOURCES fig_7_10_accessors.cpp)
24 | 


--------------------------------------------------------------------------------
/samples/Ch07_buffers/fig_7_1.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | template <typename T, int Dimensions, AllocatorT allocator>
 9 | class buffer;
10 | 


--------------------------------------------------------------------------------
/samples/Ch07_buffers/fig_7_10_accessors.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // BEGIN CODE SNIP
 6 | #include <cassert>
 7 | #include <sycl/sycl.hpp>
 8 | using namespace sycl;
 9 | constexpr int N = 42;
10 | 
11 | int main() {
12 |   queue q;
13 | 
14 |   // Create 3 buffers of 42 ints
15 |   buffer<int> buf_a{range{N}};
16 |   buffer<int> buf_b{range{N}};
17 |   buffer<int> buf_c{range{N}};
18 | 
19 |   accessor pc{buf_c};
20 | 
21 |   q.submit([&](handler &h) {
22 |     accessor a{buf_a, h, write_only, no_init};
23 |     accessor b{buf_b, h, write_only, no_init};
24 |     accessor c{buf_c, h, write_only, no_init};
25 |     h.parallel_for(N, [=](id<1> i) {
26 |       a[i] = 1;
27 |       b[i] = 40;
28 |       c[i] = 0;
29 |     });
30 |   });
31 |   q.submit([&](handler &h) {
32 |     accessor a{buf_a, h, read_only};
33 |     accessor b{buf_b, h, read_only};
34 |     accessor c{buf_c, h, read_write};
35 |     h.parallel_for(N,
36 |                    [=](id<1> i) { c[i] += a[i] + b[i]; });
37 |   });
38 |   q.submit([&](handler &h) {
39 |     h.require(pc);
40 |     h.parallel_for(N, [=](id<1> i) { pc[i]++; });
41 |   });
42 | 
43 |   host_accessor result{buf_c, read_only};
44 | 
45 |   for (int i = 0; i < N; i++) {
46 |     assert(result[i] == N);
47 |   }
48 |   return 0;
49 | }
50 | // END CODE SNIP
51 | 


--------------------------------------------------------------------------------
/samples/Ch07_buffers/fig_7_2_3_4_creating_buffers.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <sycl/sycl.hpp>
 6 | using namespace sycl;
 7 | 
 8 | int main() {
 9 |   // BEGIN CODE SNIP
10 |   // Create a buffer of 2x5 ints using the default allocator
11 |   buffer<int, 2, buffer_allocator<int>> b1{range<2>{2, 5}};
12 | 
13 |   // Create a buffer of 2x5 ints using the default allocator
14 |   // and CTAD for range
15 |   buffer<int, 2> b2{range{2, 5}};
16 | 
17 |   // Create a buffer of 20 floats using a
18 |   // default-constructed std::allocator
19 |   buffer<float, 1, std::allocator<float>> b3{range{20}};
20 | 
21 |   // Create a buffer of 20 floats using a passed-in
22 |   // allocator
23 |   std::allocator<float> myFloatAlloc;
24 |   buffer<float, 1, std::allocator<float>> b4{range(20),
25 |                                              myFloatAlloc};
26 |   // END CODE SNIP
27 | 
28 |   // BEGIN CODE SNIP
29 |   // Create a buffer of 4 doubles and initialize it from a
30 |   // host pointer
31 |   double myDoubles[4] = {1.1, 2.2, 3.3, 4.4};
32 |   buffer b5{myDoubles, range{4}};
33 | 
34 |   // Create a buffer of 5 doubles and initialize it from a
35 |   // host pointer to const double
36 |   const double myConstDbls[5] = {1.0, 2.0, 3.0, 4.0, 5.0};
37 |   buffer b6{myConstDbls, range{5}};
38 | 
39 |   // Create a buffer from a shared pointer to int
40 |   auto sharedPtr = std::make_shared<int>(42);
41 |   buffer b7{sharedPtr, range{1}};
42 |   // END CODE SNIP
43 | 
44 |   // BEGIN CODE SNIP
45 |   // Create a buffer of ints from an input iterator
46 |   std::vector<int> myVec;
47 |   buffer b8{myVec.begin(), myVec.end()};
48 |   buffer b9{myVec};
49 | 
50 |   // Create a buffer of 2x5 ints and 2 non-overlapping
51 |   // sub-buffers of 5 ints.
52 |   buffer<int, 2> b10{range{2, 5}};
53 |   buffer b11{b10, id{0, 0}, range{1, 5}};
54 |   buffer b12{b10, id{1, 0}, range{1, 5}};
55 |   // END CODE SNIP
56 | 
57 |   return 0;
58 | }
59 | 


--------------------------------------------------------------------------------
/samples/Ch07_buffers/fig_7_5_buffer_properties.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <mutex>
 6 | #include <sycl/sycl.hpp>
 7 | using namespace sycl;
 8 | 
 9 | int main() {
10 |   // BEGIN CODE SNIP
11 |   queue q;
12 |   int my_ints[42];
13 | 
14 |   // Create a buffer of 42 ints
15 |   buffer<int> b{range(42)};
16 | 
17 |   // Create a buffer of 42 ints, initialize with a host
18 |   // pointer, and add the use_host_pointer property
19 |   buffer b1{my_ints,
20 |             range(42),
21 |             {property::buffer::use_host_ptr{}}};
22 | 
23 |   // Create a buffer of 42 ints, initialize with a host
24 |   // pointer, and add the use_mutex property
25 |   std::mutex myMutex;
26 |   buffer b2{my_ints,
27 |             range(42),
28 |             {property::buffer::use_mutex{myMutex}}};
29 |   // Retrieve a pointer to the mutex used by this buffer
30 |   auto mutexPtr =
31 |       b2.get_property<property::buffer::use_mutex>()
32 |           .get_mutex_ptr();
33 |   // Lock the mutex until we exit scope
34 |   std::lock_guard<std::mutex> guard{*mutexPtr};
35 | 
36 |   // Create a context-bound buffer of 42 ints, initialized
37 |   // from a host pointer
38 |   buffer b3{
39 |       my_ints,
40 |       range(42),
41 |       {property::buffer::context_bound{q.get_context()}}};
42 |   // END CODE SNIP
43 | 
44 |   return 0;
45 | }
46 | 


--------------------------------------------------------------------------------
/samples/Ch07_buffers/fig_7_8_accessors_simple.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // BEGIN CODE SNIP
 6 | #include <cassert>
 7 | #include <sycl/sycl.hpp>
 8 | using namespace sycl;
 9 | constexpr int N = 42;
10 | 
11 | int main() {
12 |   queue q;
13 |   // Create 3 buffers of 42 ints
14 |   buffer<int> a_buf{range{N}};
15 |   buffer<int> b_buf{range{N}};
16 |   buffer<int> c_buf{range{N}};
17 |   accessor pc{c_buf};
18 | 
19 |   q.submit([&](handler &h) {
20 |     accessor a{a_buf, h};
21 |     accessor b{b_buf, h};
22 |     accessor c{c_buf, h};
23 |     h.parallel_for(N, [=](id<1> i) {
24 |       a[i] = 1;
25 |       b[i] = 40;
26 |       c[i] = 0;
27 |     });
28 |   });
29 |   q.submit([&](handler &h) {
30 |     accessor a{a_buf, h};
31 |     accessor b{b_buf, h};
32 |     accessor c{c_buf, h};
33 |     h.parallel_for(N,
34 |                    [=](id<1> i) { c[i] += a[i] + b[i]; });
35 |   });
36 |   q.submit([&](handler &h) {
37 |     h.require(pc);
38 |     h.parallel_for(N, [=](id<1> i) { pc[i]++; });
39 |   });
40 | 
41 |   host_accessor result{c_buf};
42 |   for (int i = 0; i < N; i++) {
43 |     assert(result[i] == N);
44 |   }
45 |   return 0;
46 | }
47 | // END CODE SNIP
48 | 


--------------------------------------------------------------------------------
/samples/Ch08_scheduling_kernels_and_data_movement/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2023 Intel Corporation
 2 | 
 3 | # SPDX-License-Identifier: MIT
 4 | 
 5 | if(NOT WITHCUDA)
 6 | # TEMPORARILY DISABLE: doesn't work with CUDA backend.
 7 | add_book_sample(
 8 |     TEST
 9 |     TARGET fig_8_3_linear_dependence_in_order
10 |     SOURCES fig_8_3_linear_dependence_in_order.cpp)
11 | endif()
12 | 
13 | if(NOT WITHCUDA)
14 | # TEMPORARILY DISABLE: doesn't work with CUDA backend.
15 | add_book_sample(
16 |     TEST
17 |     TARGET fig_8_4_linear_dependence_events
18 |     SOURCES fig_8_4_linear_dependence_events.cpp)
19 | endif()
20 | 
21 | add_book_sample(
22 |     TEST
23 |     TARGET fig_8_5_linear_dependence_buffers
24 |     SOURCES fig_8_5_linear_dependence_buffers.cpp)
25 | 
26 | if(NOT WITHCUDA)
27 | # TEMPORARILY DISABLE: doesn't work with CUDA backend.
28 | add_book_sample(
29 |     TEST
30 |     TARGET fig_8_6_y_in_order
31 |     SOURCES fig_8_6_y_in_order.cpp)
32 | endif()
33 | 
34 | if(NOT WITHCUDA)
35 | # TEMPORARILY DISABLE: doesn't work with CUDA backend.
36 | add_book_sample(
37 |     TEST
38 |     TARGET fig_8_7_y_events
39 |     SOURCES fig_8_7_y_events.cpp)
40 | endif()
41 | 
42 | add_book_sample(
43 |     TEST
44 |     TARGET fig_8_8_y_buffers
45 |     SOURCES fig_8_8_y_buffers.cpp)
46 | 
47 | 
48 | 


--------------------------------------------------------------------------------
/samples/Ch08_scheduling_kernels_and_data_movement/fig_8_3_linear_dependence_in_order.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <sycl/sycl.hpp>
 6 | using namespace sycl;
 7 | constexpr int N = 42;
 8 | 
 9 | int main() {
10 |   queue q{property::queue::in_order()};
11 | 
12 |   int *data = malloc_shared<int>(N, q);
13 | 
14 |   q.parallel_for(N, [=](id<1> i) { data[i] = 1; });
15 | 
16 |   q.single_task([=]() {
17 |     for (int i = 1; i < N; i++) data[0] += data[i];
18 |   });
19 |   q.wait();
20 | 
21 |   assert(data[0] == N);
22 |   return 0;
23 | }
24 | 


--------------------------------------------------------------------------------
/samples/Ch08_scheduling_kernels_and_data_movement/fig_8_4_linear_dependence_events.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <sycl/sycl.hpp>
 6 | using namespace sycl;
 7 | constexpr int N = 42;
 8 | 
 9 | int main() {
10 |   queue q;
11 | 
12 |   int *data = malloc_shared<int>(N, q);
13 | 
14 |   auto e = q.parallel_for(N, [=](id<1> i) { data[i] = 1; });
15 | 
16 |   q.submit([&](handler &h) {
17 |     h.depends_on(e);
18 |     h.single_task([=]() {
19 |       for (int i = 1; i < N; i++) data[0] += data[i];
20 |     });
21 |   });
22 |   q.wait();
23 | 
24 |   assert(data[0] == N);
25 |   return 0;
26 | }
27 | 


--------------------------------------------------------------------------------
/samples/Ch08_scheduling_kernels_and_data_movement/fig_8_5_linear_dependence_buffers.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <sycl/sycl.hpp>
 6 | using namespace sycl;
 7 | constexpr int N = 42;
 8 | 
 9 | int main() {
10 |   queue q;
11 | 
12 |   buffer<int> data{range{N}};
13 | 
14 |   q.submit([&](handler &h) {
15 |     accessor a{data, h};
16 |     h.parallel_for(N, [=](id<1> i) { a[i] = 1; });
17 |   });
18 | 
19 |   q.submit([&](handler &h) {
20 |     accessor a{data, h};
21 |     h.single_task([=]() {
22 |       for (int i = 1; i < N; i++) a[0] += a[i];
23 |     });
24 |   });
25 | 
26 |   host_accessor h_a{data};
27 |   assert(h_a[0] == N);
28 |   return 0;
29 | }
30 | 


--------------------------------------------------------------------------------
/samples/Ch08_scheduling_kernels_and_data_movement/fig_8_6_y_in_order.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <sycl/sycl.hpp>
 6 | using namespace sycl;
 7 | constexpr int N = 42;
 8 | 
 9 | int main() {
10 |   queue q{property::queue::in_order()};
11 | 
12 |   int *data1 = malloc_shared<int>(N, q);
13 |   int *data2 = malloc_shared<int>(N, q);
14 | 
15 |   q.parallel_for(N, [=](id<1> i) { data1[i] = 1; });
16 | 
17 |   q.parallel_for(N, [=](id<1> i) { data2[i] = 2; });
18 | 
19 |   q.parallel_for(N, [=](id<1> i) { data1[i] += data2[i]; });
20 | 
21 |   q.single_task([=]() {
22 |     for (int i = 1; i < N; i++) data1[0] += data1[i];
23 | 
24 |     data1[0] /= 3;
25 |   });
26 |   q.wait();
27 | 
28 |   assert(data1[0] == N);
29 |   return 0;
30 | }
31 | 


--------------------------------------------------------------------------------
/samples/Ch08_scheduling_kernels_and_data_movement/fig_8_7_y_events.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <sycl/sycl.hpp>
 6 | using namespace sycl;
 7 | constexpr int N = 42;
 8 | 
 9 | int main() {
10 |   queue q;
11 | 
12 |   int *data1 = malloc_shared<int>(N, q);
13 |   int *data2 = malloc_shared<int>(N, q);
14 | 
15 |   auto e1 =
16 |       q.parallel_for(N, [=](id<1> i) { data1[i] = 1; });
17 | 
18 |   auto e2 =
19 |       q.parallel_for(N, [=](id<1> i) { data2[i] = 2; });
20 | 
21 |   auto e3 = q.parallel_for(
22 |       range{N}, {e1, e2},
23 |       [=](id<1> i) { data1[i] += data2[i]; });
24 | 
25 |   q.single_task(e3, [=]() {
26 |     for (int i = 1; i < N; i++) data1[0] += data1[i];
27 | 
28 |     data1[0] /= 3;
29 |   });
30 |   q.wait();
31 | 
32 |   assert(data1[0] == N);
33 |   return 0;
34 | }
35 | 


--------------------------------------------------------------------------------
/samples/Ch08_scheduling_kernels_and_data_movement/fig_8_8_y_buffers.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <sycl/sycl.hpp>
 6 | using namespace sycl;
 7 | constexpr int N = 42;
 8 | 
 9 | int main() {
10 |   queue q;
11 | 
12 |   buffer<int> data1{range{N}};
13 |   buffer<int> data2{range{N}};
14 | 
15 |   q.submit([&](handler &h) {
16 |     accessor a{data1, h};
17 |     h.parallel_for(N, [=](id<1> i) { a[i] = 1; });
18 |   });
19 | 
20 |   q.submit([&](handler &h) {
21 |     accessor b{data2, h};
22 |     h.parallel_for(N, [=](id<1> i) { b[i] = 2; });
23 |   });
24 | 
25 |   q.submit([&](handler &h) {
26 |     accessor a{data1, h};
27 |     accessor b{data2, h, read_only};
28 |     h.parallel_for(N, [=](id<1> i) { a[i] += b[i]; });
29 |   });
30 | 
31 |   q.submit([&](handler &h) {
32 |     accessor a{data1, h};
33 |     h.single_task([=]() {
34 |       for (int i = 1; i < N; i++) a[0] += a[i];
35 | 
36 |       a[0] /= 3;
37 |     });
38 |   });
39 | 
40 |   host_accessor h_a{data1};
41 |   assert(h_a[0] == N);
42 |   return 0;
43 | }
44 | 


--------------------------------------------------------------------------------
/samples/Ch09_communication_and_sychronization/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2023 Intel Corporation
 2 | 
 3 | # SPDX-License-Identifier: MIT
 4 | 
 5 | add_book_sample(
 6 |     TEST
 7 |     TARGET fig_9_4_naive_matmul
 8 |     SOURCES matmul_harness.cpp fig_9_4_naive_matmul.cpp)
 9 | 
10 | add_book_sample(
11 |     TEST
12 |     TARGET fig_9_7_local_accessors
13 |     SOURCES fig_9_7_local_accessors.cpp)
14 | 
15 | add_book_sample(
16 |     TEST
17 |     TARGET fig_9_8_ndrange_tiled_matmul
18 |     SOURCES matmul_harness.cpp fig_9_8_ndrange_tiled_matmul.cpp)
19 | 
20 | add_book_sample(
21 |     TEST
22 |     TARGET fig_9_9_sub_group_barrier
23 |     SOURCES fig_9_9_sub_group_barrier.cpp)
24 | 
25 | add_book_sample(
26 |     TEST
27 |     TARGET fig_9_11_matmul_broadcast
28 |     SOURCES matmul_harness.cpp fig_9_11_matmul_broadcast.cpp)
29 | 
30 | add_book_sample(
31 |     TARGET fig_9_12_ndrange_sub_group_matmul
32 |     SOURCES matmul_harness.cpp fig_9_12_ndrange_sub_group_matmul.cpp)
33 | 


--------------------------------------------------------------------------------
/samples/Ch09_communication_and_sychronization/fig_9_4_naive_matmul.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <chrono>
 6 | #include <sycl/sycl.hpp>
 7 | using namespace sycl;
 8 | 
 9 | extern const int matrixSize = 128;
10 | static const int iterations = 16;
11 | 
12 | template <typename T>
13 | double run_sycl(const std::vector<T>& vecA,
14 |                 const std::vector<T>& vecB,
15 |                 std::vector<T>& vecC) {
16 |   using ns = std::chrono::nanoseconds;
17 |   ns::rep best_time = std::numeric_limits<ns::rep>::max();
18 | 
19 |   const int M = matrixSize;
20 |   const int N = matrixSize;
21 |   const int K = matrixSize;
22 | 
23 |   std::fill(vecC.begin(), vecC.end(), (T)0);
24 | 
25 |   buffer<T, 2> bufA{vecA.data(), range<2>{M, K}};
26 |   buffer<T, 2> bufB{vecB.data(), range<2>{K, N}};
27 |   buffer<T, 2> bufC{vecC.data(), range<2>{M, N}};
28 | 
29 |   queue q;
30 |   std::cout << "Running on device: "
31 |             << q.get_device().get_info<info::device::name>()
32 |             << "\n";
33 | 
34 |   for (int i = 0; i < iterations; ++i) {
35 |     auto start = std::chrono::steady_clock::now();
36 | 
37 |     q.submit([&](handler& h) {
38 |       accessor matrixA{bufA, h};
39 |       accessor matrixB{bufB, h};
40 |       accessor matrixC{bufC, h};
41 | 
42 |       // BEGIN CODE SNIP
43 |       h.parallel_for(range{M, N}, [=](id<2> id) {
44 |         int m = id[0];
45 |         int n = id[1];
46 | 
47 |         // Template type T is the type of data stored
48 |         // in the matrix
49 |         T sum = 0;
50 |         for (int k = 0; k < K; k++) {
51 |           sum += matrixA[m][k] * matrixB[k][n];
52 |         }
53 | 
54 |         matrixC[m][n] = sum;
55 |       });
56 |       // END CODE SNIP
57 |     });
58 |     q.wait();
59 | 
60 |     auto duration =
61 |         std::chrono::steady_clock::now() - start;
62 |     auto time =
63 |         std::chrono::duration_cast<ns>(duration).count();
64 | 
65 |     best_time = std::min(time, best_time);
66 |   }
67 | 
68 |   double best_seconds = (double)best_time / 1e9;
69 | 
70 |   return best_seconds;
71 | }
72 | 
73 | template double run_sycl<float>(
74 |     const std::vector<float>& vecA,
75 |     const std::vector<float>& vecB,
76 |     std::vector<float>& vecC);
77 | 


--------------------------------------------------------------------------------
/samples/Ch09_communication_and_sychronization/fig_9_7_local_accessors.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <array>
 6 | #include <iostream>
 7 | #include <sycl/sycl.hpp>
 8 | using namespace sycl;
 9 | 
10 | int main() {
11 |   constexpr size_t size = 16;
12 |   std::array<int, size> data;
13 | 
14 |   for (int i = 0; i < size; i++) data[i] = i;
15 | 
16 |   {
17 |     buffer dataBuf{data};
18 | 
19 |     queue q{default_selector_v};
20 |     std::cout
21 |         << "Running on device: "
22 |         << q.get_device().get_info<info::device::name>()
23 |         << "\n";
24 | 
25 |     q.submit([&](handler& h) {
26 |       // BEGIN CODE SNIP
27 |       // This is a typical global accessor.
28 |       accessor dataAcc{dataBuf, h};
29 | 
30 |       // This is a 1D local accessor consisting of 16 ints:
31 |       auto localIntAcc = local_accessor<int, 1>(16, h);
32 | 
33 |       // This is a 2D local accessor consisting of 4 x 4
34 |       // floats:
35 |       auto localFloatAcc =
36 |           local_accessor<float, 2>({4, 4}, h);
37 | 
38 |       h.parallel_for(
39 |           nd_range<1>{{size}, {16}}, [=](nd_item<1> item) {
40 |             auto index = item.get_global_id();
41 |             auto local_index = item.get_local_id();
42 | 
43 |             // Within a kernel, a local accessor may be read
44 |             // from and written to like any other accessor.
45 |             localIntAcc[local_index] = dataAcc[index] + 1;
46 |             dataAcc[index] = localIntAcc[local_index];
47 |           });
48 |       // END CODE SNIP
49 |     });
50 |   }
51 | 
52 |   for (int i = 0; i < size; i++) {
53 |     if (data[i] != i + 1) {
54 |       std::cout << "Results did not validate at index " << i
55 |                 << "!\n";
56 |       return -1;
57 |     }
58 |   }
59 | 
60 |   std::cout << "Success!\n";
61 |   return 0;
62 | }
63 | 


--------------------------------------------------------------------------------
/samples/Ch09_communication_and_sychronization/fig_9_9_sub_group_barrier.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <array>
 6 | #include <iostream>
 7 | #include <sycl/sycl.hpp>
 8 | using namespace sycl;
 9 | 
10 | int main() {
11 |   constexpr size_t size = 16;
12 |   std::array<int, size> data;
13 | 
14 |   for (int i = 0; i < size; i++) data[i] = i;
15 | 
16 |   {
17 |     buffer data_buf{data};
18 | 
19 |     queue q;
20 |     std::cout
21 |         << "Running on device: "
22 |         << q.get_device().get_info<info::device::name>()
23 |         << "\n";
24 | 
25 |     q.submit([&](handler& h) {
26 |       accessor data_acc{data_buf, h};
27 | 
28 |       // BEGIN CODE SNIP
29 |       h.parallel_for(
30 |           nd_range{{size}, {16}}, [=](nd_item<1> item) {
31 |             auto sg = item.get_sub_group();
32 |             group_barrier(sg);
33 |             // ...
34 |             auto index = item.get_global_id();
35 |             data_acc[index] = data_acc[index] + 1;
36 |           });
37 |       // END CODE SNIP
38 |     });
39 |   }
40 | 
41 |   for (int i = 0; i < size; i++) {
42 |     if (data[i] != i + 1) {
43 |       std::cout << "Results did not validate at index " << i
44 |                 << "!\n";
45 |       return -1;
46 |     }
47 |   }
48 | 
49 |   std::cout << "Success!\n";
50 |   return 0;
51 | }
52 | 


--------------------------------------------------------------------------------
/samples/Ch10_defining_kernels/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2023 Intel Corporation
 2 | 
 3 | # SPDX-License-Identifier: MIT
 4 | 
 5 | add_book_sample(
 6 |     TEST
 7 |     TARGET fig_10_2_kernel_lambda
 8 |     SOURCES fig_10_2_kernel_lambda.cpp)
 9 | 
10 | add_book_sample(
11 |     TEST
12 |     TARGET fig_10_3_optional_kernel_lambda_elements
13 |     SOURCES fig_10_3_optional_kernel_lambda_elements.cpp)
14 | 
15 | add_book_sample(
16 |     TEST
17 |     TARGET fig_10_4_named_kernel_lambda
18 |     SOURCES fig_10_4_named_kernel_lambda.cpp)
19 | 
20 | add_book_sample(
21 |     TEST
22 |     TARGET fig_10_5_unnamed_kernel_lambda
23 |     SOURCES fig_10_5_unnamed_kernel_lambda.cpp)
24 | 
25 | add_book_sample(
26 |     TEST
27 |     TARGET fig_10_6_kernel_functor
28 |     SOURCES fig_10_6_kernel_functor.cpp)
29 | 
30 | add_book_sample(
31 |     TEST
32 |     TARGET fig_10_7_optional_kernel_functor_elements
33 |     SOURCES fig_10_7_optional_kernel_functor_elements.cpp)
34 | 
35 | add_book_sample(
36 |     TEST
37 |     TARGET fig_10_8_use_kernel_bundle
38 |     SOURCES fig_10_8_use_kernel_bundle.cpp)
39 | 
40 | add_book_sample(
41 |     TEST
42 |     TARGET fig_10_9_use_specific_kernel_bundle
43 |     SOURCES fig_10_9_use_specific_kernel_bundle.cpp)
44 | 
45 | add_book_sample(
46 |     TEST
47 |     TARGET fig_10_10_kernel_query
48 |     SOURCES fig_10_10_kernel_query.cpp)
49 | 


--------------------------------------------------------------------------------
/samples/Ch10_defining_kernels/fig_10_2_kernel_lambda.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <array>
 6 | #include <iostream>
 7 | #include <sycl/sycl.hpp>
 8 | using namespace sycl;
 9 | 
10 | int main() {
11 |   constexpr size_t size = 16;
12 |   std::array<int, size> data;
13 | 
14 |   for (int i = 0; i < size; i++) {
15 |     data[i] = i;
16 |   }
17 | 
18 |   {
19 |     buffer data_buf{data};
20 | 
21 |     queue q;
22 |     std::cout
23 |         << "Running on device: "
24 |         << q.get_device().get_info<info::device::name>()
25 |         << "\n";
26 | 
27 |     q.submit([&](handler& h) {
28 |       accessor data_acc{data_buf, h};
29 |       // BEGIN CODE SNIP
30 |       h.parallel_for(
31 |           size,
32 |           // This is the start of a kernel lambda
33 |           // expression:
34 |           [=](id<1> i) { data_acc[i] = data_acc[i] + 1; }
35 |           // This is the end of the kernel lambda
36 |           // expression.
37 |       );
38 |       // END CODE SNIP
39 |     });
40 |   }
41 | 
42 |   for (int i = 0; i < size; i++) {
43 |     if (data[i] != i + 1) {
44 |       std::cout << "Results did not validate at index " << i
45 |                 << "!\n";
46 |       return -1;
47 |     }
48 |   }
49 | 
50 |   std::cout << "Success!\n";
51 |   return 0;
52 | }
53 | 


--------------------------------------------------------------------------------
/samples/Ch10_defining_kernels/fig_10_3_optional_kernel_lambda_elements.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <array>
 6 | #include <iostream>
 7 | #include <sycl/sycl.hpp>
 8 | using namespace sycl;
 9 | 
10 | int main() {
11 |   constexpr size_t size = 16;
12 |   std::array<int, size> data;
13 | 
14 |   for (int i = 0; i < size; i++) {
15 |     data[i] = i;
16 |   }
17 | 
18 |   {
19 |     buffer data_buf{data};
20 | 
21 |     queue q;
22 |     std::cout
23 |         << "Running on device: "
24 |         << q.get_device().get_info<info::device::name>()
25 |         << "\n";
26 | 
27 |     q.submit([&](handler& h) {
28 |       // BEGIN CODE SNIP
29 |       accessor data_acc{data_buf, h};
30 | 
31 |       h.parallel_for(
32 |           nd_range{{size}, {8}},
33 |           [=](nd_item<1> item) noexcept
34 |           [[sycl::reqd_work_group_size(8)]] -> void {
35 |             auto i = item.get_global_id();
36 |             data_acc[i] = data_acc[i] + 1;
37 |           });
38 |     });
39 |     // END CODE SNIP
40 |   }
41 | 
42 |   for (int i = 0; i < size; i++) {
43 |     if (data[i] != i + 1) {
44 |       std::cout << "Results did not validate at index " << i
45 |                 << "!\n";
46 |       return -1;
47 |     }
48 |   }
49 | 
50 |   std::cout << "Success!\n";
51 |   return 0;
52 | }
53 | 


--------------------------------------------------------------------------------
/samples/Ch10_defining_kernels/fig_10_4_named_kernel_lambda.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <array>
 6 | #include <iostream>
 7 | #include <sycl/sycl.hpp>
 8 | using namespace sycl;
 9 | 
10 | class Add;
11 | 
12 | int main() {
13 |   constexpr size_t size = 16;
14 |   std::array<int, size> data;
15 | 
16 |   for (int i = 0; i < size; i++) {
17 |     data[i] = i;
18 |   }
19 | 
20 |   {
21 |     buffer data_buf{data};
22 | 
23 |     queue q;
24 |     std::cout
25 |         << "Running on device: "
26 |         << q.get_device().get_info<info::device::name>()
27 |         << "\n";
28 | 
29 |     q.submit([&](handler& h) {
30 |       accessor data_acc{data_buf, h};
31 | 
32 |       // BEGIN CODE SNIP
33 |       // In this example, "class Add" names the kernel
34 |       // lambda expression.
35 |       h.parallel_for<class Add>(size, [=](id<1> i) {
36 |         data_acc[i] = data_acc[i] + 1;
37 |       });
38 |       // END CODE SNIP
39 |     });
40 |   }
41 | 
42 |   for (int i = 0; i < size; i++) {
43 |     if (data[i] != i + 1) {
44 |       std::cout << "Results did not validate at index " << i
45 |                 << "!\n";
46 |       return -1;
47 |     }
48 |   }
49 | 
50 |   std::cout << "Success!\n";
51 |   return 0;
52 | }
53 | 


--------------------------------------------------------------------------------
/samples/Ch10_defining_kernels/fig_10_5_unnamed_kernel_lambda.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <array>
 6 | #include <iostream>
 7 | #include <sycl/sycl.hpp>
 8 | using namespace sycl;
 9 | 
10 | int main() {
11 |   constexpr size_t size = 16;
12 |   std::array<int, size> data;
13 | 
14 |   for (int i = 0; i < size; i++) {
15 |     data[i] = i;
16 |   }
17 | 
18 |   {
19 |     buffer data_buf{data};
20 | 
21 |     queue q;
22 |     std::cout
23 |         << "Running on device: "
24 |         << q.get_device().get_info<info::device::name>()
25 |         << "\n";
26 | 
27 |     q.submit([&](handler& h) {
28 |       accessor data_acc{data_buf, h};
29 | 
30 |       // In many cases the explicit kernel name template
31 |       // parameter is not required.
32 |       // BEGIN CODE SNIP
33 |       h.parallel_for(size, [=](id<1> i) {
34 |         data_acc[i] = data_acc[i] + 1;
35 |       });
36 |       // END CODE SNIP
37 |     });
38 |   }
39 | 
40 |   for (int i = 0; i < size; i++) {
41 |     if (data[i] != i + 1) {
42 |       std::cout << "Results did not validate at index " << i
43 |                 << "!\n";
44 |       return -1;
45 |     }
46 |   }
47 | 
48 |   std::cout << "Success!\n";
49 |   return 0;
50 | }
51 | 


--------------------------------------------------------------------------------
/samples/Ch10_defining_kernels/fig_10_6_kernel_functor.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <array>
 6 | #include <iostream>
 7 | #include <sycl/sycl.hpp>
 8 | using namespace sycl;
 9 | 
10 | // BEGIN CODE SNIP
11 | class Add {
12 |  public:
13 |   Add(accessor<int> acc) : data_acc(acc) {}
14 |   void operator()(id<1> i) const {
15 |     data_acc[i] = data_acc[i] + 1;
16 |   }
17 | 
18 |  private:
19 |   accessor<int> data_acc;
20 | };
21 | 
22 | int main() {
23 |   constexpr size_t size = 16;
24 |   std::array<int, size> data;
25 | 
26 |   for (int i = 0; i < size; i++) {
27 |     data[i] = i;
28 |   }
29 | 
30 |   {
31 |     buffer data_buf{data};
32 | 
33 |     queue q;
34 |     std::cout
35 |         << "Running on device: "
36 |         << q.get_device().get_info<info::device::name>()
37 |         << "\n";
38 | 
39 |     q.submit([&](handler& h) {
40 |       accessor data_acc{data_buf, h};
41 |       h.parallel_for(size, Add(data_acc));
42 |     });
43 |   }
44 |   // END CODE SNIP
45 | 
46 |   for (int i = 0; i < size; i++) {
47 |     if (data[i] != i + 1) {
48 |       std::cout << "Results did not validate at index " << i
49 |                 << "!\n";
50 |       return -1;
51 |     }
52 |   }
53 | 
54 |   std::cout << "Success!\n";
55 |   return 0;
56 | }
57 | 


--------------------------------------------------------------------------------
/samples/Ch10_defining_kernels/fig_10_7_optional_kernel_functor_elements.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <array>
 6 | #include <iostream>
 7 | #include <sycl/sycl.hpp>
 8 | using namespace sycl;
 9 | 
10 | // BEGIN CODE SNIP
11 | class AddWithAttribute {
12 |  public:
13 |   AddWithAttribute(accessor<int> acc) : data_acc(acc) {}
14 |   [[sycl::reqd_work_group_size(8)]] void operator()(
15 |       nd_item<1> item) const {
16 |     auto i = item.get_global_id();
17 |     data_acc[i] = data_acc[i] + 1;
18 |   }
19 | 
20 |  private:
21 |   accessor<int> data_acc;
22 | };
23 | 
24 | class MulWithAttribute {
25 |  public:
26 |   MulWithAttribute(accessor<int> acc) : data_acc(acc) {}
27 |   void operator() [[sycl::reqd_work_group_size(8)]] (
28 |       nd_item<1> item) const {
29 |     auto i = item.get_global_id();
30 |     data_acc[i] = data_acc[i] * 2;
31 |   }
32 | 
33 |  private:
34 |   accessor<int> data_acc;
35 | };
36 | // END CODE SNIP
37 | 
38 | int main() {
39 |   constexpr size_t size = 16;
40 |   std::array<int, size> data;
41 | 
42 |   for (int i = 0; i < size; i++) {
43 |     data[i] = i;
44 |   }
45 | 
46 |   {
47 |     buffer data_buf{data};
48 | 
49 |     queue q;
50 |     std::cout
51 |         << "Running on device: "
52 |         << q.get_device().get_info<info::device::name>()
53 |         << "\n";
54 | 
55 |     q.submit([&](handler& h) {
56 |       accessor data_acc{data_buf, h};
57 |       h.parallel_for(nd_range{{size}, {8}},
58 |                      AddWithAttribute(data_acc));
59 |     });
60 | 
61 |     q.submit([&](handler& h) {
62 |       accessor data_acc{data_buf, h};
63 |       h.parallel_for(nd_range{{size}, {8}},
64 |                      MulWithAttribute(data_acc));
65 |     });
66 |   }
67 | 
68 |   for (int i = 0; i < size; i++) {
69 |     if (data[i] != (i + 1) * 2) {
70 |       std::cout << "Results did not validate at index " << i
71 |                 << "!\n";
72 |       return -1;
73 |     }
74 |   }
75 | 
76 |   std::cout << "Success!\n";
77 |   return 0;
78 | }
79 | 


--------------------------------------------------------------------------------
/samples/Ch10_defining_kernels/fig_10_8_use_kernel_bundle.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <array>
 6 | #include <iostream>
 7 | #include <sycl/sycl.hpp>
 8 | using namespace sycl;
 9 | 
10 | int main() {
11 |   constexpr size_t size = 16;
12 |   std::array<int, size> data;
13 | 
14 |   for (int i = 0; i < size; i++) {
15 |     data[i] = i;
16 |   }
17 | 
18 |   {
19 |     buffer data_buf{data};
20 | 
21 |     queue q;
22 |     std::cout
23 |         << "Running on device: "
24 |         << q.get_device().get_info<info::device::name>()
25 |         << "\n";
26 | 
27 |     // BEGIN CODE SNIP
28 |     auto kb = get_kernel_bundle<bundle_state::executable>(
29 |         q.get_context());
30 | 
31 |     std::cout
32 |         << "All kernel compilation should be done now.\n";
33 | 
34 |     q.submit([&](handler& h) {
35 |       // Use the pre-compiled kernel from the kernel bundle.
36 |       h.use_kernel_bundle(kb);
37 | 
38 |       accessor data_acc{data_buf, h};
39 |       h.parallel_for(range{size}, [=](id<1> i) {
40 |         data_acc[i] = data_acc[i] + 1;
41 |       });
42 |     });
43 |     // END CODE SNIP
44 |   }
45 | 
46 |   for (int i = 0; i < size; i++) {
47 |     if (data[i] != i + 1) {
48 |       std::cout << "Results did not validate at index " << i
49 |                 << "!\n";
50 |       return -1;
51 |     }
52 |   }
53 | 
54 |   std::cout << "Success!\n";
55 |   return 0;
56 | }
57 | 


--------------------------------------------------------------------------------
/samples/Ch10_defining_kernels/fig_10_9_use_specific_kernel_bundle.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <array>
 6 | #include <iostream>
 7 | #include <sycl/sycl.hpp>
 8 | using namespace sycl;
 9 | 
10 | class Add;
11 | 
12 | int main() {
13 |   constexpr size_t size = 16;
14 |   std::array<int, size> data;
15 | 
16 |   for (int i = 0; i < size; i++) {
17 |     data[i] = i;
18 |   }
19 | 
20 |   {
21 |     buffer data_buf{data};
22 | 
23 |     queue q;
24 |     std::cout
25 |         << "Running on device: "
26 |         << q.get_device().get_info<info::device::name>()
27 |         << "\n";
28 | 
29 |     // BEGIN CODE SNIP
30 |     auto kid = get_kernel_id<class Add>();
31 |     auto kb = get_kernel_bundle<bundle_state::executable>(
32 |         q.get_context(), {q.get_device()}, {kid});
33 | 
34 |     std::cout << "Kernel compilation should be done now.\n";
35 | 
36 |     q.submit([&](handler& h) {
37 |       // Use the pre-compiled kernel from the kernel bundle.
38 |       h.use_kernel_bundle(kb);
39 | 
40 |       accessor data_acc{data_buf, h};
41 |       h.parallel_for<class Add>(range{size}, [=](id<1> i) {
42 |         data_acc[i] = data_acc[i] + 1;
43 |       });
44 |     });
45 |     // END CODE SNIP
46 |   }
47 | 
48 |   for (int i = 0; i < size; i++) {
49 |     if (data[i] != i + 1) {
50 |       std::cout << "Results did not validate at index " << i
51 |                 << "!\n";
52 |       return -1;
53 |     }
54 |   }
55 | 
56 |   std::cout << "Success!\n";
57 |   return 0;
58 | }
59 | 


--------------------------------------------------------------------------------
/samples/Ch11_vectors_and_math_arrays/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2023 Intel Corporation
 2 | 
 3 | # SPDX-License-Identifier: MIT
 4 | 
 5 | add_book_sample(
 6 |     TEST
 7 |     TARGET fig_11_2_marray
 8 |     SOURCES fig_11_2_marray.cpp)
 9 | 
10 | add_book_sample(
11 |     TEST
12 |     TARGET fig_11_4_load_store
13 |     SOURCES fig_11_4_load_store.cpp)
14 | 
15 | add_book_sample(
16 |     TEST
17 |     TARGET fig_11_6_swizzle_vec
18 |     SOURCES fig_11_6_swizzle_vec.cpp)
19 | 
20 | add_book_sample(
21 |     TEST
22 |     TARGET fig_11_7_vector_exec
23 |     SOURCES fig_11_7_vector_exec.cpp)
24 | 


--------------------------------------------------------------------------------
/samples/Ch11_vectors_and_math_arrays/fig_11_10.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | q.submit([&](sycl::handler &h) { // assume sub group size is 8
 9 |   // ...
10 |   h.parallel_for(range<1>(8), [=](id<1> i) {
11 |     // ...
12 |     float4 y4 = b[i];  // i=0, 1, 2, ...
13 |     // ...
14 |     float x = dowork(&y4);  // the “dowork” expects y4,
15 |                             // i.e., vec_y[8][4] layout
16 |   });
17 | 


--------------------------------------------------------------------------------
/samples/Ch11_vectors_and_math_arrays/fig_11_2_marray.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <cassert>
 6 | #include <sycl/sycl.hpp>
 7 | #define M 32
 8 | 
 9 | using namespace sycl;
10 | 
11 | template <typename T, size_t N>
12 | bool checkEqual(marray<T, N> A, marray<T, N> B) {
13 |   for (int i = 0; i < N; i++) {
14 |     if (A[i] != B[i]) {
15 |       return false;
16 |     }
17 |   }
18 |   return true;
19 | }
20 | 
21 | int main() {
22 |   // BEGIN CODE SNIP
23 |   queue q;
24 |   marray<float, 4> input{1.0004f, 1e-4f, 1.4f, 14.0f};
25 |   marray<float, 4> res[M];
26 |   for (int i = 0; i < M; i++)
27 |     res[i] = {-(i + 1), -(i + 1), -(i + 1), -(i + 1)};
28 |   {
29 |     buffer in_buf(&input, range{1});
30 |     buffer re_buf(res, range{M});
31 | 
32 |     q.submit([&](handler &cgh) {
33 |       accessor re_acc{re_buf, cgh, read_write};
34 |       accessor in_acc{in_buf, cgh, read_only};
35 | 
36 |       cgh.parallel_for(range<1>(M), [=](id<1> idx) {
37 |         int i = idx[0];
38 |         re_acc[i] = cos(in_acc[0]);
39 |       });
40 |     });
41 |   }
42 |   // END CODE SNIP
43 | 
44 |   if (checkEqual(res[0], res[M / 2]))
45 |     std::cout << "passed\n";
46 |   else
47 |     std::cout << "failed\n";
48 |   return 0;
49 | }
50 | 


--------------------------------------------------------------------------------
/samples/Ch11_vectors_and_math_arrays/fig_11_3.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | template <access::address_space AddressSpace, access::decorated IsDecorated> 
 9 |   void load(size_t offset, multi_ptr<DataT, AddressSpace, IsDecorated> ptr);
10 | 
11 | template <access::address_space addressSpace, access::decorated IsDecorated>
12 |   void store(size_t offset, multi_ptr<DataT, AddressSpace, IsDecorated> ptr) const;
13 | 


--------------------------------------------------------------------------------
/samples/Ch11_vectors_and_math_arrays/fig_11_4_load_store.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <array>
 6 | #include <sycl/sycl.hpp>
 7 | using namespace sycl;
 8 | 
 9 | int main() {
10 |   constexpr int workers = 64;
11 |   constexpr int size = workers * 16;
12 | 
13 |   // BEGIN CODE SNIP
14 |   std::array<float, size> fpData;
15 |   for (int i = 0; i < size; i++) {
16 |     fpData[i] = 8.0f;
17 |   }
18 | 
19 |   buffer fpBuf(fpData);
20 | 
21 |   queue q;
22 |   q.submit([&](handler& h) {
23 |     accessor acc{fpBuf, h};
24 | 
25 |     h.parallel_for(workers, [=](id<1> idx) {
26 |       float16 inpf16;
27 |       inpf16.load(idx, acc.get_multi_ptr<access::decorated::no>());
28 |       float16 result = inpf16 * 2.0f;
29 |       result.store(idx, acc.get_multi_ptr<access::decorated::no>());
30 |     });
31 |   });
32 |   // END CODE SNIP
33 | 
34 |   host_accessor hostAcc(fpBuf);
35 |   if (fpData[0] != 16.0f) {
36 |     std::cout << "Failed\n";
37 |     return -1;
38 |   }
39 | 
40 |   std::cout << "Passed\n";
41 |   return 0;
42 | }
43 | 


--------------------------------------------------------------------------------
/samples/Ch11_vectors_and_math_arrays/fig_11_5.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | template <int... swizzleindexes>
 9 | __swizzled_vec__ swizzle() const;
10 | __swizzled_vec__ XYZW_ACCESS() const;
11 | __swizzled_vec__ RGBA_ACCESS() const;
12 | __swizzled_vec__ INDEX_ACCESS() const;
13 | 
14 | #ifdef SYCL_SIMPLE_SWIZZLES
15 | // Available only when numElements <= 4
16 | // XYZW_SWIZZLE is all permutations with repetition of:
17 | // x, y, z, w, subject to numElements
18 | __swizzled_vec__ XYZW_SWIZZLE() const;
19 | 
20 | // Available only when numElements == 4
21 | // RGBA_SWIZZLE is all permutations with repetition of: r,
22 | // g, b, a.
23 | __swizzled_vec__ RGBA_SWIZZLE() const;
24 | #endif
25 | 


--------------------------------------------------------------------------------
/samples/Ch11_vectors_and_math_arrays/fig_11_6_swizzle_vec.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #define SYCL_SIMPLE_SWIZZLES
 6 | #include <array>
 7 | #include <sycl/sycl.hpp>
 8 | using namespace sycl;
 9 | 
10 | int main() {
11 |   // BEGIN CODE SNIP
12 |   constexpr int size = 16;
13 | 
14 |   std::array<float4, size> input;
15 |   for (int i = 0; i < size; i++) {
16 |     input[i] = float4(8.0f, 6.0f, 2.0f, i);
17 |   }
18 | 
19 |   buffer b(input);
20 | 
21 |   queue q;
22 |   q.submit([&](handler& h) {
23 |     accessor a{b, h};
24 | 
25 |     //  We can access the individual elements of a vector by
26 |     //  using the functions x(), y(), z(), w() and so on.
27 |     //
28 |     //  "Swizzles" can be used by calling a vector member
29 |     //  equivalent to the swizzle order that we need, for
30 |     //  example zyx() or any combination of the elements.
31 |     //  The swizzle need not be the same size as the
32 |     //  original vector.
33 |     h.parallel_for(size, [=](id<1> idx) {
34 |       auto e = a[idx];
35 |       float w = e.w();
36 |       float4 sw = e.xyzw();
37 |       sw = e.xyzw() * sw.wzyx();
38 |       sw = sw + w;
39 |       a[idx] = sw.xyzw();
40 |     });
41 |   });
42 |   // END CODE SNIP
43 | 
44 |   host_accessor hostAcc(b);
45 | 
46 |   for (int i = 0; i < size; i++) {
47 |     if (hostAcc[i].y() != 12.0f + i) {
48 |       std::cout << "Failed\n";
49 |       return -1;
50 |     }
51 |   }
52 | 
53 |   std::cout << "Passed\n";
54 |   return 0;
55 | }
56 | 


--------------------------------------------------------------------------------
/samples/Ch11_vectors_and_math_arrays/fig_11_7_vector_exec.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <array>
 6 | #include <cmath>
 7 | #include <sycl/sycl.hpp>
 8 | using namespace sycl;
 9 | 
10 | int main() {
11 |   constexpr int size = 8;
12 | 
13 |   std::array<float, size> fpData;
14 |   std::array<float4, size> fp4Data;
15 |   for (int i = 0; i < size; i++) {
16 |     fpData[i] = i;
17 |     float b = i * 4.0f;
18 |     fp4Data[i] = float4(b, b + 1, b + 2, b + 3);
19 |   }
20 | 
21 |   buffer fpBuf(fpData);
22 |   buffer fp4Buf(fp4Data);
23 | 
24 |   queue q;
25 |   q.submit([&](handler& h) {
26 |     accessor a{fpBuf, h};
27 |     accessor b{fp4Buf, h};
28 | 
29 |     // BEGIN CODE SNIP
30 |     h.parallel_for(8, [=](id<1> i) {
31 |       float x = a[i];
32 |       float4 y4 = b[i];
33 |       a[i] = x + sycl::length(y4);
34 |     });
35 |     // END CODE SNIP
36 |   });
37 | 
38 |   host_accessor a(fpBuf);
39 |   for (int i = 0; i < size; i++) {
40 |     float b = 4 * i;
41 |     if (1 < a[i] - (i + std::sqrt(std::pow(b, 2) +
42 |                                   std::pow(b + 1, 2) +
43 |                                   std::pow(b + 2, 2) +
44 |                                   std::pow(b + 3, 2)))) {
45 |       std::cout << "Failed\n";
46 |       return -1;
47 |     }
48 |   }
49 | 
50 |   std::cout << "Passed\n";
51 |   return 0;
52 | }
53 | 


--------------------------------------------------------------------------------
/samples/Ch12_device_information_and_kernel_specialization/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2023 Intel Corporation
 2 | 
 3 | # SPDX-License-Identifier: MIT
 4 | 
 5 | add_book_sample(
 6 |     TEST
 7 |     TARGET fig_12_1_assigned_device
 8 |     SOURCES fig_12_1_assigned_device.cpp)
 9 | 
10 | add_book_sample(
11 |     TEST
12 |     TARGET fig_12_2_try_catch
13 |     SOURCES fig_12_2_try_catch.cpp)
14 | 
15 | add_book_sample(
16 |     TEST
17 |     TARGET fig_12_4_device_selector
18 |     SOURCES fig_12_4_device_selector.cpp)
19 | 
20 | add_book_sample(
21 |     TEST
22 |     TARGET fig_12_5_curious
23 |     SOURCES fig_12_5_curious.cpp)
24 | 
25 | add_book_sample(
26 |     TEST
27 |     TARGET fig_12_7_very_curious
28 |     SOURCES fig_12_7_very_curious.cpp)
29 | 
30 | add_book_sample(
31 |     TEST
32 |     TARGET fig_12_8_invocation_parameters
33 |     SOURCES fig_12_8_invocation_parameters.cpp)
34 | 
35 | add_book_sample(
36 |     TEST
37 |     TARGET fig_12_10_specialize
38 |     SOURCES fig_12_10_specialize.cpp)
39 | 


--------------------------------------------------------------------------------
/samples/Ch12_device_information_and_kernel_specialization/fig_12_10_specialize.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <sycl/sycl.hpp>
 3 | using namespace sycl;
 4 | 
 5 | int main() {
 6 |   queue q;
 7 | 
 8 |   constexpr int size = 16;
 9 |   std::array<double, size> data;
10 | 
11 |   // Using "sycl::device_has()" as an attribute does not
12 |   // affect the device we select. Therefore, our host code
13 |   // should check the device's aspects before submitting a
14 |   // kernel which does require that attribute.
15 |   if (q.get_device().has(aspect::fp64)) {
16 |     buffer B{data};
17 |     q.submit([&](handler& h) {
18 |       accessor A{B, h};
19 |       // the attributes here say that the kernel is allowed
20 |       // to require fp64 support any attribute(s) from
21 |       // Figure 12-3 could be specified note that namespace
22 |       // stmt above (for C++) does not affect attributes (a
23 |       // C++ quirk) so sycl:: is needed here
24 |       h.parallel_for(
25 |           size, [=](auto& idx)
26 |                     [[sycl::device_has(aspect::fp64)]] {
27 |                       A[idx] = idx * 2.0;
28 |                     });
29 |     });
30 |     std::cout << "doubles were used\n";
31 |   } else {
32 |     // here we use an alternate method (not needing double
33 |     // math support on the device) to help our code be
34 |     // flexible and hence more portable
35 |     std::array<float, size> fdata;
36 |     {
37 |       buffer B{fdata};
38 |       q.submit([&](handler& h) {
39 |         accessor A{B, h};
40 |         h.parallel_for(
41 |             size, [=](auto& idx) { A[idx] = idx * 2.0f; });
42 |       });
43 |     }
44 | 
45 |     for (int i = 0; i < size; i++) data[i] = fdata[i];
46 | 
47 |     std::cout << "no doubles used\n";
48 |   }
49 |   for (int i = 0; i < size; i++)
50 |     std::cout << "data[" << i << "] = " << data[i] << "\n";
51 |   return 0;
52 | }
53 | 


--------------------------------------------------------------------------------
/samples/Ch12_device_information_and_kernel_specialization/fig_12_1_assigned_device.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <iostream>
 6 | #include <sycl/sycl.hpp>
 7 | using namespace sycl;
 8 | 
 9 | int main() {
10 |   // BEGIN CODE SNIP
11 |   queue q;
12 | 
13 |   std::cout << "By default, we are running on "
14 |             << q.get_device().get_info<info::device::name>()
15 |             << "\n";
16 |   // END CODE SNIP
17 | 
18 |   return 0;
19 | }
20 | 
21 | // Example Outputs (one line per run – depends on system):
22 | // By default, we are running on NVIDIA GeForce RTX 3060
23 | // By default, we are running on AMD Radeon RX 5700 XT
24 | // By default, we are running on Intel(R) UHD Graphics 770
25 | // By default, we are running on Intel(R) Xeon(R) Gold 6336Y CPU @ 2.40GHz
26 | // By default, we are running on Intel(R) Data Center GPU Max 1100
27 |   
28 | 


--------------------------------------------------------------------------------
/samples/Ch12_device_information_and_kernel_specialization/fig_12_2_try_catch.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <iostream>
 6 | #include <sycl/sycl.hpp>
 7 | using namespace sycl;
 8 | 
 9 | int main() {
10 |   // BEGIN CODE SNIP
11 |   auto GPU_is_available = false;
12 | 
13 |   try {
14 |     device testForGPU(gpu_selector_v);
15 |     GPU_is_available = true;
16 |   } catch (exception const& ex) {
17 |     std::cout << "Caught this SYCL exception: " << ex.what()
18 |               << std::endl;
19 |   }
20 | 
21 |   auto q = GPU_is_available ? queue(gpu_selector_v)
22 |                             : queue(default_selector_v);
23 | 
24 |   std::cout
25 |       << "After checking for a GPU, we are running on:\n "
26 |       << q.get_device().get_info<info::device::name>()
27 |       << "\n";
28 | 
29 |   // END CODE SNIP
30 |   return 0;
31 | }
32 | 
33 | // After checking for a GPU, we are running on:
34 | //  AMD Radeon RX 5700 XT
35 | // After checking for a GPU, we are running on:
36 | //  Intel(R) Data Center GPU Max 1100
37 | // After checking for a GPU, we are running on:
38 | //  NVIDIA GeForce RTX 3060
39 | // After checking for a GPU, we are running on:
40 | //  Intel(R) UHD Graphics 770
41 | // 
42 | // Example Output (using a system without GPU):
43 | // Caught this SYCL exception: No device of
44 | // requested type 'info::device_type::gpu' available.
45 | // ...(PI_ERROR_DEVICE_NOT_FOUND)
46 | // After checking for a GPU, we are running on:
47 | //  AMD Ryzen 5 3600 6-Core Processor
48 | 


--------------------------------------------------------------------------------
/samples/Ch12_device_information_and_kernel_specialization/fig_12_4_device_selector.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <iostream>
 6 | #include <sycl/sycl.hpp>
 7 | using namespace sycl;
 8 | 
 9 | int my_selector(const device& dev) {
10 |   int score = -1;
11 | 
12 |   // We prefer non-Martian GPUs, especially ACME GPUs
13 |   if (dev.is_gpu()) {
14 |     if (dev.get_info<info::device::vendor>().find("ACME") !=
15 |         std::string::npos)
16 |       score += 25;
17 | 
18 |     if (dev.get_info<info::device::vendor>().find(
19 |             "Martian") == std::string::npos)
20 |       score += 800;
21 |   }
22 | 
23 |   // If there is no GPU on the system all devices will be
24 |   // given a negative score and the selector will not select
25 |   // a device. This will cause an exception.
26 |   return score;
27 | }
28 | 
29 | int main() {
30 |   try {
31 |     auto q = queue{my_selector};
32 |     std::cout
33 |         << "After checking for a GPU, we are running on:\n "
34 |         << q.get_device().get_info<info::device::name>()
35 |         << "\n";
36 |   } catch (exception const& ex) {
37 |     std::cout << "Custom device selector did not select a "
38 |                  "device.\n";
39 |     std::cout << "Caught this SYCL exception: " << ex.what()
40 |               << std::endl;
41 |   }
42 | 
43 |   return 0;
44 | }
45 | 
46 | // Four Example Outputs (using four different
47 | //   systems, each with a GPU):
48 | // After checking for a GPU, we are running on:
49 | //  Intel(R) Gen9 HD Graphics NEO.
50 | // After checking for a GPU, we are running on:
51 | //  NVIDIA GeForce RTX 3060
52 | // After checking for a GPU, we are running on:
53 | //  Intel(R) Data Center GPU Max 1100
54 | // After checking for a GPU, we are running on:
55 | //  AMD Radeon RX 5700 XT
56 | // 
57 | // Example Output (using a system without GPU):
58 | // After checking for a GPU, we are running on:
59 | // Custom device selector did not select a device.
60 | // Caught this SYCL exception: No device of requested
61 | // type available. ...(PI_ERROR_DEVICE_NOT_FOUND)
62 | 


--------------------------------------------------------------------------------
/samples/Ch12_device_information_and_kernel_specialization/fig_12_5_curious.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <iostream>
 6 | #include <sycl/sycl.hpp>
 7 | using namespace sycl;
 8 | 
 9 | int main() {
10 |   // BEGIN CODE SNIP
11 |   // Loop through available platforms
12 |   for (auto const& this_platform :
13 |        platform::get_platforms()) {
14 |     std::cout
15 |         << "Found platform: "
16 |         << this_platform.get_info<info::platform::name>()
17 |         << "\n";
18 | 
19 |     // Loop through available devices in this platform
20 |     for (auto const& this_device :
21 |          this_platform.get_devices()) {
22 |       std::cout
23 |           << " Device: "
24 |           << this_device.get_info<info::device::name>()
25 |           << "\n";
26 |     }
27 |     std::cout << "\n";
28 |   }
29 |   // END CODE SNIP
30 | 
31 |   return 0;
32 | }
33 | 
34 | 
35 | // % clang++ -fsycl fig_12_5_curious.cpp -o curious
36 | // 
37 | // % ./curious
38 | // Found platform: NVIDIA CUDA BACKEND
39 | //  Device: NVIDIA GeForce RTX 3060
40 | // 
41 | // Found platform: AMD HIP BACKEND
42 | //  Device: AMD Radeon RX 5700 XT
43 | // 
44 | // Found platform: Intel(R) OpenCL
45 | //  Device: Intel(R) Xeon(R) E-2176G CPU @ 3.70GHz
46 | // 
47 | // Found platform: Intel(R) OpenCL HD Graphics
48 | //  Device: Intel(R) UHD Graphics P630 [0x3e96]
49 | // 
50 | // Found platform: Intel(R) Level-Zero
51 | //  Device: Intel(R) UHD Graphics P630 [0x3e96]
52 | // 
53 | // Found platform: Intel(R) FPGA Emulation Platform for OpenCL(TM)
54 | //  Device: Intel(R) FPGA Emulation Device
55 |  
56 | 


--------------------------------------------------------------------------------
/samples/Ch12_device_information_and_kernel_specialization/fig_12_8_invocation_parameters.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <iostream>
 6 | #include <sycl/sycl.hpp>
 7 | using namespace sycl;
 8 | 
 9 | int main() {
10 |   // BEGIN CODE SNIP
11 |   queue q;
12 |   device dev = q.get_device();
13 | 
14 |   std::cout << "We are running on:\n"
15 |             << dev.get_info<info::device::name>() << "\n";
16 | 
17 |   // Query results like the following can be used to
18 |   // calculate how large your kernel invocations can be.
19 |   auto maxWG =
20 |       dev.get_info<info::device::max_work_group_size>();
21 |   auto maxGmem =
22 |       dev.get_info<info::device::global_mem_size>();
23 |   auto maxLmem =
24 |       dev.get_info<info::device::local_mem_size>();
25 | 
26 |   std::cout << "Max WG size is " << maxWG
27 |             << "\nGlobal memory size is " << maxGmem
28 |             << "\nLocal memory size is " << maxLmem << "\n";
29 | 
30 |   // END CODE SNIP
31 |   return 0;
32 | }
33 | 


--------------------------------------------------------------------------------
/samples/Ch12_device_information_and_kernel_specialization/tst_12_4_device_selector.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <iostream>
 6 | #include <sycl/sycl.hpp>
 7 | using namespace std;
 8 | 
 9 | int my_selector(int isgpu, string foo) {
10 |   int score = -1;
11 | 
12 |   // We prefer non-Martian GPUs, especially ACME GPUs
13 |   if (isgpu) {
14 |     if (foo.find("ACME") != std::string::npos) score += 25;
15 | 
16 |     if (foo.find("Martian") == std::string::npos)
17 |       score += 800;
18 |   }
19 | 
20 |   // If there is no GPU on the system all devices will be
21 |   // given a negative score and the selector will not select
22 |   // a device. This will cause an exception.
23 |   return score;
24 | }
25 | 
26 | int main() {
27 |   string foo;
28 |   foo = "Intel GPU";
29 |   cout << "NOTGPU" << foo << my_selector(0, foo) << '\n';
30 |   cout << "YESGPU" << foo << my_selector(1, foo) << '\n';
31 |   foo = "Intel ACME GPU";
32 |   cout << "NOTGPU" << foo << my_selector(0, foo) << '\n';
33 |   cout << "YESGPU" << foo << my_selector(1, foo) << '\n';
34 |   foo = "Intel GPU Martian";
35 |   cout << "NOTGPU" << foo << my_selector(0, foo) << '\n';
36 |   cout << "YESGPU" << foo << my_selector(1, foo) << '\n';
37 |   foo = "Intel Martian ACME GPU";
38 |   cout << "NOTGPU" << foo << my_selector(0, foo) << '\n';
39 |   cout << "YESGPU" << foo << my_selector(1, foo) << '\n';
40 |   foo = "ACME";
41 |   cout << "NOTGPU" << foo << my_selector(0, foo) << '\n';
42 |   cout << "YESGPU" << foo << my_selector(1, foo) << '\n';
43 |   foo = "MartianACME";
44 |   cout << "NOTGPU" << foo << my_selector(0, foo) << '\n';
45 |   cout << "YESGPU" << foo << my_selector(1, foo) << '\n';
46 |   foo = "Martian";
47 |   cout << "NOTGPU" << foo << my_selector(0, foo) << '\n';
48 |   cout << "YESGPU" << foo << my_selector(1, foo) << '\n';
49 |   return 0;
50 | }
51 | 


--------------------------------------------------------------------------------
/samples/Ch13_practical_tips/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2023 Intel Corporation
 2 | 
 3 | # SPDX-License-Identifier: MIT
 4 | 
 5 | add_book_sample(
 6 |     TEST
 7 |     TARGET fig_13_4_stream
 8 |     SOURCES fig_13_4_stream.cpp)
 9 | 
10 | add_book_sample(
11 |     TEST
12 |     TARGET fig_13_6_queue_profiling_timing
13 |     SOURCES fig_13_6_queue_profiling_timing.cpp)
14 | 
15 | add_book_sample(
16 |     TEST
17 |     TARGET fig_13_9_common_buffer_pattern
18 |     SOURCES fig_13_9_common_buffer_pattern.cpp)
19 | 
20 | add_book_sample(
21 |     TEST
22 |     TARGET fig_13_10_common_pattern_bug
23 |     SOURCES fig_13_10_common_pattern_bug.cpp)
24 | 
25 | add_book_sample(
26 |     TEST
27 |     TARGET fig_13_11_host_accessor
28 |     SOURCES fig_13_11_host_accessor.cpp)
29 | 
30 | add_book_sample(
31 |     TEST
32 |     TARGET fig_13_12_host_accessor_for_init
33 |     SOURCES fig_13_12_host_accessor_for_init.cpp)
34 | 
35 | add_book_sample(
36 |     TARGET fig_13_13_host_accessor_deadlock
37 |     SOURCES fig_13_13_host_accessor_deadlock.cpp)
38 | 
39 | 


--------------------------------------------------------------------------------
/samples/Ch13_practical_tips/fig_13_10_common_pattern_bug.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <algorithm>
 6 | #include <iostream>
 7 | #include <sycl/sycl.hpp>
 8 | using namespace sycl;
 9 | 
10 | int main() {
11 |   // BEGIN CODE SNIP
12 | 
13 |   constexpr size_t N = 1024;
14 | 
15 |   // Set up queue on any available device
16 |   queue q;
17 | 
18 |   // Create host containers to initialize on the host
19 |   std::vector<int> in_vec(N), out_vec(N);
20 | 
21 |   // Initialize input and output vectors
22 |   for (int i = 0; i < N; i++) in_vec[i] = i;
23 |   std::fill(out_vec.begin(), out_vec.end(), 0);
24 | 
25 |   // Create buffers using host allocations (vector in this
26 |   // case)
27 |   buffer in_buf{in_vec}, out_buf{out_vec};
28 | 
29 |   // Submit the kernel to the queue
30 |   q.submit([&](handler& h) {
31 |     accessor in{in_buf, h};
32 |     accessor out{out_buf, h};
33 | 
34 |     h.parallel_for(range{N},
35 |                    [=](id<1> idx) { out[idx] = in[idx]; });
36 |   });
37 | 
38 |   // BUG!!! We're using the host allocation out_vec, but the
39 |   // buffer out_buf is still alive and owns that allocation!
40 |   // We will probably see the initialiation value (zeros)
41 |   // printed out, since the kernel probably hasn't even run
42 |   // yet, and the buffer has no reason to have copied any
43 |   // output back to the host even if the kernel has run.
44 |   for (int i = 0; i < N; i++)
45 |     std::cout << "out_vec[" << i << "]=" << out_vec[i]
46 |               << "\n";
47 | 
48 |   // END CODE SNIP
49 |   return 0;
50 | }
51 | 


--------------------------------------------------------------------------------
/samples/Ch13_practical_tips/fig_13_11_host_accessor.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <algorithm>
 6 | #include <iostream>
 7 | #include <sycl/sycl.hpp>
 8 | using namespace sycl;
 9 | 
10 | int main() {
11 |   // BEGIN CODE SNIP
12 | 
13 |   constexpr size_t N = 1024;
14 | 
15 |   // Set up queue on any available device
16 |   queue q;
17 | 
18 |   // Create host containers to initialize on the host
19 |   std::vector<int> in_vec(N), out_vec(N);
20 | 
21 |   // Initialize input and output vectors
22 |   for (int i = 0; i < N; i++) in_vec[i] = i;
23 |   std::fill(out_vec.begin(), out_vec.end(), 0);
24 | 
25 |   // Create buffers using host allocations (vector in this
26 |   // case)
27 |   buffer in_buf{in_vec}, out_buf{out_vec};
28 | 
29 |   // Submit the kernel to the queue
30 |   q.submit([&](handler& h) {
31 |     accessor in{in_buf, h};
32 |     accessor out{out_buf, h};
33 | 
34 |     h.parallel_for(range{N},
35 |                    [=](id<1> idx) { out[idx] = in[idx]; });
36 |   });
37 | 
38 |   // Check that all outputs match expected value
39 |   // Use host accessor!  Buffer is still in scope / alive
40 |   host_accessor A{out_buf};
41 | 
42 |   for (int i = 0; i < N; i++)
43 |     std::cout << "A[" << i << "]=" << A[i] << "\n";
44 | 
45 |   // END CODE SNIP
46 |   return 0;
47 | }
48 | 


--------------------------------------------------------------------------------
/samples/Ch13_practical_tips/fig_13_12_host_accessor_for_init.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <algorithm>
 6 | #include <iostream>
 7 | #include <sycl/sycl.hpp>
 8 | using namespace sycl;
 9 | 
10 | int main() {
11 |   // BEGIN CODE SNIP
12 | 
13 |   constexpr size_t N = 1024;
14 | 
15 |   // Set up queue on any available device
16 |   queue q;
17 | 
18 |   // Create buffers of size N
19 |   buffer<int> in_buf{N}, out_buf{N};
20 | 
21 |   // Use host accessors to initialize the data
22 |   {  // CRITICAL: Begin scope for host_accessor lifetime!
23 |     host_accessor in_acc{in_buf}, out_acc{out_buf};
24 |     for (int i = 0; i < N; i++) {
25 |       in_acc[i] = i;
26 |       out_acc[i] = 0;
27 |     }
28 |   }  // CRITICAL: Close scope to make host accessors go out
29 |      // of scope!
30 | 
31 |   // Submit the kernel to the queue
32 |   q.submit([&](handler& h) {
33 |     accessor in{in_buf, h};
34 |     accessor out{out_buf, h};
35 | 
36 |     h.parallel_for(range{N},
37 |                    [=](id<1> idx) { out[idx] = in[idx]; });
38 |   });
39 | 
40 |   // Check that all outputs match expected value
41 |   // Use host accessor!  Buffer is still in scope / alive
42 |   host_accessor A{out_buf};
43 | 
44 |   for (int i = 0; i < N; i++)
45 |     std::cout << "A[" << i << "]=" << A[i] << "\n";
46 | 
47 |   // END CODE SNIP
48 |   return 0;
49 | }
50 | 


--------------------------------------------------------------------------------
/samples/Ch13_practical_tips/fig_13_4_stream.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <iostream>
 6 | #include <sycl/sycl.hpp>
 7 | using namespace sycl;
 8 | 
 9 | int main() {
10 |   queue q;
11 |   // BEGIN CODE SNIP
12 |   q.submit([&](handler &h) {
13 |     stream out(1024, 256, h);
14 |     h.parallel_for(range{8}, [=](id<1> idx) {
15 |       out << "Testing my sycl stream (this is work-item ID:"
16 |           << idx << ")\n";
17 |     });
18 |   });
19 |   // END CODE SNIP
20 | 
21 |   // Wait on the queue so that the host program doesn't
22 |   // complete before the device code stream out is executed.
23 |   // This ensures that the example actually displays the
24 |   // output text.
25 |   q.wait();
26 | 
27 |   return 0;
28 | }
29 | 


--------------------------------------------------------------------------------
/samples/Ch14_common_parallel_patterns/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2023 Intel Corporation
 2 | 
 3 | # SPDX-License-Identifier: MIT
 4 | 
 5 | add_book_sample(
 6 |     TEST
 7 |     TARGET fig_14_8_one_reduction
 8 |     SOURCES fig_14_8_one_reduction.cpp)
 9 | 
10 | add_book_sample(
11 |     TEST
12 |     TARGET fig_14_11_array_reduction
13 |     SOURCES fig_14_11_array_reduction.cpp)
14 | 
15 | add_book_sample(
16 |     TEST
17 |     TARGET fig_14_12_user_defined_reduction
18 |     SOURCES fig_14_12_user_defined_reduction.cpp)
19 | 
20 | add_book_sample(
21 |     TEST
22 |     TARGET fig_14_13_algorithm_comparison
23 |     SOURCES fig_14_13_algorithm_comparison.cpp)
24 | 
25 | add_book_sample(
26 |     TEST
27 |     TARGET fig_14_15_map
28 |     SOURCES fig_14_15_map.cpp
29 |     ADDITIONAL_COMPILE_OPTIONS -fno-fast-math)
30 | 
31 | add_book_sample(
32 |     TEST
33 |     TARGET fig_14_16_stencil
34 |     SOURCES fig_14_16_stencil.cpp)
35 | 
36 | add_book_sample(
37 |     TEST
38 |     TARGET fig_14_17_local_stencil
39 |     SOURCES fig_14_17_local_stencil.cpp)
40 | 
41 | add_book_sample(
42 |     TEST
43 |     TARGET fig_14_18_basic_reduction
44 |     SOURCES fig_14_18_basic_reduction.cpp)
45 | 
46 | add_book_sample(
47 |     TEST
48 |     TARGET fig_14_19_nd_range_reduction
49 |     SOURCES fig_14_19_nd_range_reduction.cpp)
50 | 
51 | add_book_sample(
52 |     TEST
53 |     TARGET fig_14_20-22_inclusive_scan
54 |     SOURCES fig_14_20-22_inclusive_scan.cpp)
55 | 
56 | add_book_sample(
57 |     TEST
58 |     TARGET fig_14_24_local_pack
59 |     SOURCES fig_14_24_local_pack.cpp)
60 | 
61 | add_book_sample(
62 |     TEST
63 |     TARGET fig_14_26_local_unpack
64 |     SOURCES fig_14_26_local_unpack.cpp)
65 | 


--------------------------------------------------------------------------------
/samples/Ch14_common_parallel_patterns/fig_14_10.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | template <typename T, typename BinaryOperation,
 9 |           /* implementation-defined */>
10 | class reducer {
11 |   // Combine partial result with reducer's value
12 |   void combine(const T& partial);
13 | };
14 | 
15 | // Other operators are available for standard binary
16 | // operations
17 | template <typename T>
18 | auto& operator+=(reducer<T, plus::<T>>&, const T&);
19 | 


--------------------------------------------------------------------------------
/samples/Ch14_common_parallel_patterns/fig_14_11_array_reduction.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <iostream>
 6 | #include <numeric>
 7 | #include <sycl/sycl.hpp>
 8 | 
 9 | using namespace sycl;
10 | 
11 | int main() {
12 |   constexpr size_t N = 16;
13 |   constexpr size_t B = 4;
14 | 
15 |   queue q;
16 |   int* data = malloc_shared<int>(N, q);
17 |   int* histogram = malloc_shared<int>(B, q);
18 |   std::iota(data, data + N, 1);
19 |   std::fill(histogram, histogram + B, 0);
20 | 
21 |   q.submit([&](handler& h) {
22 |      // BEGIN CODE SNIP
23 |      h.parallel_for(
24 |          range{N},
25 |          reduction(span<int, B>(histogram, B), plus<>()),
26 |          [=](id<1> i, auto& histogram) {
27 |            histogram[data[i] % B]++;
28 |          });
29 |      // END CODE SNIP
30 |    }).wait();
31 | 
32 |   bool passed = true;
33 |   std::cout << "Histogram:" << std::endl;
34 |   for (int b = 0; b < B; ++b) {
35 |     std::cout << "bin[" << b << "]: " << histogram[b]
36 |               << std::endl;
37 |     passed &= (histogram[b] == N / B);
38 |   }
39 |   std::cout << ((passed) ? "SUCCESS" : "FAILURE") << "\n";
40 | 
41 |   free(histogram, q);
42 |   free(data, q);
43 |   return (passed) ? 0 : 1;
44 | }
45 | 


--------------------------------------------------------------------------------
/samples/Ch14_common_parallel_patterns/fig_14_12_user_defined_reduction.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <iostream>
 6 | #include <random>
 7 | #include <sycl/sycl.hpp>
 8 | 
 9 | using namespace sycl;
10 | 
11 | template <typename T, typename I>
12 | using minloc = minimum<std::pair<T, I>>;
13 | 
14 | int main() {
15 |   constexpr size_t N = 16;
16 | 
17 |   queue q;
18 |   float* data = malloc_shared<float>(N, q);
19 |   std::pair<float, int>* res =
20 |       malloc_shared<std::pair<float, int>>(1, q);
21 |   std::generate(data, data + N, std::mt19937{});
22 | 
23 |   std::pair<float, int> identity = {
24 |       std::numeric_limits<float>::max(),
25 |       std::numeric_limits<int>::min()};
26 |   *res = identity;
27 | 
28 |   auto red =
29 |       sycl::reduction(res, identity, minloc<float, int>());
30 | 
31 |   q.submit([&](handler& h) {
32 |      h.parallel_for(
33 |          range<1>{N}, red, [=](id<1> i, auto& res) {
34 |            std::pair<float, int> partial = {data[i], i};
35 |            res.combine(partial);
36 |          });
37 |    }).wait();
38 | 
39 |   std::cout << "minimum value = " << res->first << " at "
40 |             << res->second << "\n";
41 | 
42 |   std::pair<float, int> gold = identity;
43 |   for (int i = 0; i < N; ++i) {
44 |     if (data[i] <= gold.first ||
45 |         (data[i] == gold.first && i < gold.second)) {
46 |       gold.first = data[i];
47 |       gold.second = i;
48 |     }
49 |   }
50 |   bool passed = (res->first == gold.first) &&
51 |                 (res->second == gold.second);
52 |   std::cout << ((passed) ? "SUCCESS" : "FAILURE") << "\n";
53 | 
54 |   free(res, q);
55 |   free(data, q);
56 |   return (passed) ? 0 : 1;
57 | }
58 | 


--------------------------------------------------------------------------------
/samples/Ch14_common_parallel_patterns/fig_14_15_map.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <algorithm>
 6 | #include <cstdio>
 7 | #include <cmath>
 8 | #include <numeric>
 9 | #include <sycl/sycl.hpp>
10 | 
11 | using namespace sycl;
12 | 
13 | int main() {
14 |   queue q;
15 | 
16 |   const size_t N = 64;
17 |   float* input = malloc_shared<float>(N, q);
18 |   float* output = malloc_shared<float>(N, q);
19 |   std::iota(input, input + N, 1);
20 |   std::fill(output, output + N, 0);
21 | 
22 |   // BEGIN CODE SNIP
23 |   // Compute the square root of each input value
24 |   q.parallel_for(N, [=](id<1> i) {
25 |      output[i] = sycl::sqrt(input[i]);
26 |    }).wait();
27 |   // END CODE SNIP
28 | 
29 |   // Check that all outputs match serial execution.
30 |   bool passed = true;
31 |   for (int i = 0; i < N; ++i) {
32 |     float gold = std::sqrt(input[i]);
33 |     if (std::abs(output[i] - gold) >= 1.0E-06) {
34 |       passed = false;
35 |     }
36 |   }
37 |   std::cout << ((passed) ? "SUCCESS" : "FAILURE") << "\n";
38 | 
39 |   free(output, q);
40 |   free(input, q);
41 |   return (passed) ? 0 : 1;
42 | }
43 | 


--------------------------------------------------------------------------------
/samples/Ch14_common_parallel_patterns/fig_14_16_stencil.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <algorithm>
 6 | #include <cstdio>
 7 | #include <cstdlib>
 8 | #include <iostream>
 9 | #include <numeric>
10 | #include <random>
11 | #include <sycl/sycl.hpp>
12 | 
13 | using namespace sycl;
14 | 
15 | int main() {
16 |   queue q;
17 | 
18 |   const size_t N = 16;
19 |   const size_t M = 16;
20 |   range<2> stencil_range(N, M);
21 |   range<2> alloc_range(N + 2, M + 2);
22 |   std::vector<float> input(alloc_range.size()),
23 |       output(alloc_range.size());
24 |   std::iota(input.begin(), input.end(), 1);
25 |   std::fill(output.begin(), output.end(), 0);
26 | 
27 |   {
28 |     buffer<float, 2> input_buf(input.data(), alloc_range);
29 |     buffer<float, 2> output_buf(output.data(), alloc_range);
30 | 
31 |     // BEGIN CODE SNIP
32 |     q.submit([&](handler& h) {
33 |       accessor input{input_buf, h};
34 |       accessor output{output_buf, h};
35 | 
36 |       // Compute the average of each cell and its immediate
37 |       // neighbors
38 |       h.parallel_for(stencil_range, [=](id<2> idx) {
39 |         int i = idx[0] + 1;
40 |         int j = idx[1] + 1;
41 | 
42 |         float self = input[i][j];
43 |         float north = input[i - 1][j];
44 |         float east = input[i][j + 1];
45 |         float south = input[i + 1][j];
46 |         float west = input[i][j - 1];
47 |         output[i][j] =
48 |             (self + north + east + south + west) / 5.0f;
49 |       });
50 |     });
51 |     // END CODE SNIP
52 |   }
53 | 
54 |   // Check that all outputs match serial execution.
55 |   bool passed = true;
56 |   for (int i = 1; i < N + 1; ++i) {
57 |     for (int j = 1; j < M + 1; ++j) {
58 |       float self = input[i * (M + 2) + j];
59 |       float north = input[(i - 1) * (M + 2) + j];
60 |       float east = input[i * (M + 2) + (j + 1)];
61 |       float south = input[(i + 1) * (M + 2) + j];
62 |       float west = input[i * (M + 2) + (j - 1)];
63 |       float gold =
64 |           (self + north + east + south + west) / 5.0f;
65 |       if (std::abs(output[i * (M + 2) + j] - gold) >=
66 |           1.0E-06) {
67 |         passed = false;
68 |       }
69 |     }
70 |   }
71 |   std::cout << ((passed) ? "SUCCESS" : "FAILURE") << "\n";
72 |   return (passed) ? 0 : 1;
73 | }
74 | 


--------------------------------------------------------------------------------
/samples/Ch14_common_parallel_patterns/fig_14_18_basic_reduction.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <iostream>
 6 | #include <numeric>
 7 | #include <sycl/sycl.hpp>
 8 | 
 9 | using namespace sycl;
10 | 
11 | int main() {
12 |   constexpr size_t N = 16;
13 | 
14 |   queue q;
15 |   int* data = malloc_shared<int>(N, q);
16 |   int* sum = malloc_shared<int>(1, q);
17 |   std::iota(data, data + N, 1);
18 |   *sum = 0;
19 | 
20 |   // BEGIN CODE SNIP
21 |   q.parallel_for(N, [=](id<1> i) {
22 |      atomic_ref<int, memory_order::relaxed,
23 |                 memory_scope::system,
24 |                 access::address_space::global_space>(
25 |          *sum) += data[i];
26 |    }).wait();
27 |   // END CODE SNIP
28 | 
29 |   std::cout << "sum = " << *sum << "\n";
30 |   bool passed = (*sum == ((N * (N + 1)) / 2));
31 |   std::cout << ((passed) ? "SUCCESS" : "FAILURE") << "\n";
32 | 
33 |   free(sum, q);
34 |   free(data, q);
35 |   return (passed) ? 0 : 1;
36 | }
37 | 


--------------------------------------------------------------------------------
/samples/Ch14_common_parallel_patterns/fig_14_19_nd_range_reduction.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <cstdio>
 6 | #include <numeric>
 7 | #include <sycl/sycl.hpp>
 8 | 
 9 | using namespace sycl;
10 | 
11 | int main() {
12 |   constexpr size_t N = 16;
13 |   constexpr size_t B = 4;
14 | 
15 |   queue q;
16 |   int* data = malloc_shared<int>(N, q);
17 |   int* sum = malloc_shared<int>(1, q);
18 |   std::iota(data, data + N, 1);
19 |   *sum = 0;
20 | 
21 |   // BEGIN CODE SNIP
22 |   q.parallel_for(nd_range<1>{N, B}, [=](nd_item<1> it) {
23 |      int i = it.get_global_id(0);
24 |      auto grp = it.get_group();
25 |      int group_sum =
26 |          reduce_over_group(grp, data[i], plus<>());
27 |      if (grp.leader()) {
28 |        atomic_ref<int, memory_order::relaxed,
29 |                   memory_scope::system,
30 |                   access::address_space::global_space>(
31 |            *sum) += group_sum;
32 |      }
33 |    }).wait();
34 |   // END CODE SNIP
35 | 
36 |   std::cout << "sum = " << *sum << "\n";
37 |   bool passed = (*sum == ((N * (N + 1)) / 2));
38 |   std::cout << ((passed) ? "SUCCESS" : "FAILURE") << "\n";
39 | 
40 |   free(sum, q);
41 |   free(data, q);
42 |   return (passed) ? 0 : 1;
43 | }
44 | 


--------------------------------------------------------------------------------
/samples/Ch14_common_parallel_patterns/fig_14_23.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | uint32_t index =
 9 |     exclusive_scan(g, (uint32_t)predicate, plus<>());
10 | if (predicate) dst[index] = value;
11 | 


--------------------------------------------------------------------------------
/samples/Ch14_common_parallel_patterns/fig_14_25.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | uint32_t index =
 9 |     exclusive_scan(sg, (uint32_t)predicate, plus<>());
10 | return (predicate) ? new_value[index] : original_value;
11 | 


--------------------------------------------------------------------------------
/samples/Ch14_common_parallel_patterns/fig_14_8_one_reduction.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <iostream>
 6 | #include <numeric>
 7 | #include <sycl/sycl.hpp>
 8 | 
 9 | using namespace sycl;
10 | 
11 | int main() {
12 |   constexpr size_t N = 16;
13 | 
14 |   queue q;
15 |   int* data = malloc_shared<int>(N, q);
16 |   int* sum = malloc_shared<int>(1, q);
17 |   std::iota(data, data + N, 1);
18 |   *sum = 0;
19 | 
20 |   q.submit([&](handler& h) {
21 |      // BEGIN CODE SNIP
22 |      h.parallel_for(
23 |          range<1>{N}, reduction(sum, plus<>()),
24 |          [=](id<1> i, auto& sum) { sum += data[i]; });
25 |      // END CODE SNIP
26 |    }).wait();
27 | 
28 |   std::cout << "sum = " << *sum << "\n";
29 |   bool passed = (*sum == ((N * (N + 1)) / 2));
30 |   std::cout << ((passed) ? "SUCCESS" : "FAILURE") << "\n";
31 | 
32 |   free(sum, q);
33 |   free(data, q);
34 |   return (passed) ? 0 : 1;
35 | }
36 | 


--------------------------------------------------------------------------------
/samples/Ch14_common_parallel_patterns/fig_14_9.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | template <typename BufferT, typename BinaryOperation>
 9 | unspecified reduction(BufferT variable, handler& h,
10 |                       BinaryOperation combiner,
11 |                       const property_list& properties = {});
12 | 
13 | template <typename BufferT, typename BinaryOperation>
14 | unspecified reduction(BufferT variable, handler& h,
15 |                       const BufferT::value_type& identity,
16 |                       BinaryOperation combiner,
17 |                       const property_list& properties = {});
18 | 
19 | template <typename T, typename BinaryOperation>
20 | unspecified reduction(T* variable, BinaryOperation combiner,
21 |                       const property_list& properties = {});
22 | 
23 | template <typename T, typename BinaryOperation>
24 | unspecified reduction(T* variable, const T& identity,
25 |                       BinaryOperation combiner,
26 |                       const property_list& properties = {});
27 | 
28 | template <typename T, typename Extent,
29 |           typename BinaryOperation>
30 | unspecified reduction(span<T, Extent> variables,
31 |                       BinaryOperation combiner,
32 |                       const property_list& properties = {});
33 | 
34 | template <typename T, typename Extent,
35 |           typename BinaryOperation>
36 | unspecified reduction(span<T, Extent> variables,
37 |                       const T& identity,
38 |                       BinaryOperation combiner,
39 |                       const property_list& properties = {});
40 | 


--------------------------------------------------------------------------------
/samples/Ch15_programming_for_gpus/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2023 Intel Corporation
 2 | 
 3 | # SPDX-License-Identifier: MIT
 4 | 
 5 | add_book_sample(
 6 |     TEST
 7 |     TARGET fig_15_3_single_task_matrix_multiplication
 8 |     SOURCES matrix_multiplication_harness.cpp fig_15_3_single_task_matrix_multiplication.cpp)
 9 | 
10 | add_book_sample(
11 |     TEST
12 |     TARGET fig_15_5_somewhat_parallel_matrix_multiplication
13 |     SOURCES matrix_multiplication_harness.cpp fig_15_5_somewhat_parallel_matrix_multiplication.cpp)
14 | 
15 | add_book_sample(
16 |     TEST
17 |     TARGET fig_15_7_more_parallel_matrix_multiplication
18 |     SOURCES matrix_multiplication_harness.cpp fig_15_7_more_parallel_matrix_multiplication.cpp)
19 | 
20 | add_book_sample(
21 |     TEST
22 |     TARGET fig_15_10_divergent_control_flow
23 |     SOURCES fig_15_10_divergent_control_flow.cpp)
24 | 
25 | add_book_sample(
26 |     TEST
27 |     TARGET fig_15_12_small_work_group_matrix_multiplication
28 |     SOURCES matrix_multiplication_harness.cpp fig_15_12_small_work_group_matrix_multiplication.cpp)
29 | 
30 | add_book_sample(
31 |     TEST
32 |     TARGET fig_15_18_columns_matrix_multiplication
33 |     SOURCES matrix_multiplication_harness.cpp fig_15_18_columns_matrix_multiplication.cpp)
34 | 


--------------------------------------------------------------------------------
/samples/Ch15_programming_for_gpus/fig_15_10_divergent_control_flow.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <array>
 6 | #include <iostream>
 7 | #include <sycl/sycl.hpp>
 8 | using namespace sycl;
 9 | 
10 | int main() {
11 |   constexpr size_t array_size = 16;
12 |   std::array<int, array_size> data;
13 | 
14 |   for (int i = 0; i < array_size; i++) {
15 |     data[i] = i;
16 |   }
17 | 
18 |   buffer dataBuf{data};
19 | 
20 |   queue q{default_selector_v};
21 |   q.submit([&](handler& h) {
22 |     accessor dataAcc{dataBuf, h};
23 | 
24 |     // BEGIN CODE SNIP
25 |     h.parallel_for(array_size, [=](id<1> i) {
26 |       auto condition = i[0] & 1;
27 |       if (condition) {
28 |         dataAcc[i] = dataAcc[i] * 2;  // odd
29 |       } else {
30 |         dataAcc[i] = dataAcc[i] + 1;  // even
31 |       }
32 |     });
33 |     // END CODE SNIP
34 |   });
35 | 
36 |   host_accessor dataAcc{dataBuf};
37 | 
38 |   for (int i = 0; i < array_size; i++) {
39 |     if (i & 1) {
40 |       if (dataAcc[i] != i * 2) {
41 |         std::cout << "Odd result did not validate at index "
42 |                   << i << "!\n";
43 |         return -1;
44 |       }
45 |     } else {
46 |       if (dataAcc[i] != i + 1) {
47 |         std::cout
48 |             << "Even result did not validate at index " << i
49 |             << "!\n";
50 |         return -1;
51 |       }
52 |     }
53 |   }
54 | 
55 |   std::cout << "Success!\n";
56 |   return 0;
57 | }
58 | 


--------------------------------------------------------------------------------
/samples/Ch15_programming_for_gpus/fig_15_7_more_parallel_matrix_multiplication.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <chrono>
 6 | #include <sycl/sycl.hpp>
 7 | using namespace sycl;
 8 | 
 9 | extern const int matrixSize = 128;
10 | static const int iterations = 16;
11 | 
12 | template <typename T>
13 | double run_sycl(const std::vector<T>& vecA,
14 |                 const std::vector<T>& vecB,
15 |                 std::vector<T>& vecC) {
16 |   const int M = matrixSize;
17 |   const int N = matrixSize;
18 |   const int K = matrixSize;
19 | 
20 |   using ns = std::chrono::nanoseconds;
21 |   ns::rep best_time = std::numeric_limits<ns::rep>::max();
22 | 
23 |   std::fill(vecC.begin(), vecC.end(), (T)0);
24 | 
25 |   buffer<T> bufA{vecA};  // M * K elements
26 |   buffer<T> bufB{vecB};  // K * N elements
27 |   buffer<T> bufC{vecC};  // M * N elements
28 | 
29 |   queue q;  // Choose any available device
30 |   std::cout << "Running on device: "
31 |             << q.get_device().get_info<info::device::name>()
32 |             << "\n";
33 | 
34 |   for (int i = 0; i < iterations; ++i) {
35 |     auto start = std::chrono::steady_clock::now();
36 | 
37 |     q.submit([&](handler& h) {
38 |       accessor matrixA{bufA, h};
39 |       accessor matrixB{bufB, h};
40 |       accessor matrixC{bufC, h};
41 | 
42 |       // BEGIN CODE SNIP
43 |       h.parallel_for(range{M, N}, [=](id<2> idx) {
44 |         int m = idx[0];
45 |         int n = idx[1];
46 | 
47 |         T sum = 0;
48 |         for (int k = 0; k < K; k++) {
49 |           sum += matrixA[m * K + k] * matrixB[k * N + n];
50 |         }
51 | 
52 |         matrixC[m * N + n] = sum;
53 |       });
54 |       // END CODE SNIP
55 |     });
56 | 
57 |     q.wait();  // So that we know the kernel has finished
58 |                // before checking time
59 |     auto duration =
60 |         std::chrono::steady_clock::now() - start;
61 |     auto time =
62 |         std::chrono::duration_cast<ns>(duration).count();
63 | 
64 |     best_time = std::min(time, best_time);
65 |   }
66 | 
67 |   double best_seconds = (double)best_time / 1e9;
68 | 
69 |   return best_seconds;
70 | }
71 | 
72 | template double run_sycl<float>(
73 |     const std::vector<float>& vecA,
74 |     const std::vector<float>& vecB,
75 |     std::vector<float>& vecC);
76 | 


--------------------------------------------------------------------------------
/samples/Ch16_programming_for_cpus/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2023 Intel Corporation
 2 | 
 3 | # SPDX-License-Identifier: MIT
 4 | 
 5 | add_book_sample(
 6 |     TEST
 7 |     TARGET fig_16_6_stream_triad
 8 |     SOURCES fig_16_6_stream_triad.cpp)
 9 | 
10 | add_book_sample(
11 |     TEST
12 |     TARGET fig_16_12_forward_dep  
13 |     SOURCES fig_16_12_forward_dep.cpp)
14 | 
15 | add_book_sample(
16 |     TEST
17 |     TARGET fig_16_18_vector_swizzle  
18 |     SOURCES fig_16_18_vector_swizzle.cpp)
19 | 


--------------------------------------------------------------------------------
/samples/Ch16_programming_for_cpus/fig_16_10.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | template <typename T>
 9 | void init(queue& deviceQueue, T* VA, T* VB, T* VC,
10 |           size_t array_size) {
11 |   range<1> numOfItems{array_size};
12 | 
13 |   buffer<T, 1> bufferA(VA, numOfItems);
14 |   buffer<T, 1> bufferB(VB, numOfItems);
15 |   buffer<T, 1> bufferC(VC, numOfItems);
16 | 
17 |   auto queue_event = deviceQueue.submit([&](handler& cgh) {
18 |     auto aA = bufA.template get_access<sycl_write>(cgh);
19 |     auto aB = bufB.template get_access<sycl_write>(cgh);
20 |     auto aC = bufC.template get_access<sycl_write>(cgh);
21 | 
22 |     cgh.parallel_for<class Init<T>>(numOfItems, [=](id<1> wi) {
23 |       aA[wi] = 2.0;
24 |       aB[wi] = 1.0;
25 |       aC[wi] = 0.0;
26 |     });
27 |   });
28 | 
29 |   queue_event.wait();
30 | }
31 | 


--------------------------------------------------------------------------------
/samples/Ch16_programming_for_cpus/fig_16_12_forward_dep.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <iostream>
 6 | #include <sycl/sycl.hpp>
 7 | 
 8 | using namespace sycl;
 9 | 
10 | int main() {
11 |   // BEGIN CODE SNIP
12 |   const int n = 16, w = 16;
13 | 
14 |   queue q;
15 |   range<2> G = {n, w};
16 |   range<2> L = {1, w};
17 | 
18 |   int *a = malloc_shared<int>(n * (n + 1), q);
19 | 
20 |   for (int i = 0; i < n; i++)
21 |     for (int j = 0; j < n + 1; j++) a[i * n + j] = i + j;
22 | 
23 |   q.parallel_for(
24 |        nd_range<2>{G, L},
25 |        [=](nd_item<2> it) [[sycl::reqd_sub_group_size(w)]] {
26 |          // distribute uniform "i" over the sub-group with
27 |          // 16-way redundant computation
28 |          const int i = it.get_global_id(0);
29 |          sub_group sg = it.get_sub_group();
30 | 
31 |          for (int j = sg.get_local_id()[0]; j < n; j += w) {
32 |            // load a[i*n+j+1:16] before updating a[i*n+j:16]
33 |            // to preserve loop-carried forward dependency
34 |            auto va = a[i * n + j + 1];
35 |            group_barrier(sg);
36 |            a[i * n + j] = va + i + 2;
37 |          }
38 |          group_barrier(sg);
39 |        })
40 |       .wait();
41 |   // END CODE SNIP
42 | 
43 |   if (a[0] == 3 && a[9] == 12)
44 |     std::cout << "passed\n";
45 |   else
46 |     std::cout << "failed\n";
47 |   free(a, q);
48 | }
49 | 


--------------------------------------------------------------------------------
/samples/Ch16_programming_for_cpus/fig_16_15.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | cgh.parallel_for<class aos<T>>(numOfItems,[=](id<1> wi) {
 9 |   x[wi] = a[wi].x;  // lead to gather x0, x1, x2, x3
10 |   y[wi] = a[wi].y;  // lead to gather y0, y1, y2, y3
11 |   z[wi] = a[wi].z;  // lead to gather z0, z1, z2, z3
12 |   w[wi] = a[wi].w;  // lead to gather w0, w1, w2, w3
13 | });
14 | 


--------------------------------------------------------------------------------
/samples/Ch16_programming_for_cpus/fig_16_16.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | cgh.parallel_for<class aos<T>>(numOfItems,[=](id<1> wi) {
 9 |   x[wi] = a.x[wi];  // lead to unit-stride vector load x[0:4]
10 |   y[wi] = a.y[wi];  // lead to unit-stride vector load y[0:4]
11 |   z[wi] = a.z[wi];  // lead to unit-stride vector load z[0:4]
12 |   w[wi] = a.w[wi];  // lead to unit-stride vector load w[0:4]
13 | });
14 | 


--------------------------------------------------------------------------------
/samples/Ch16_programming_for_cpus/fig_16_17pre.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2023 Intel Corporation
2 | 
3 | // SPDX-License-Identifier: MIT
4 | 
5 | // These ".hpp" files are text from the book that are
6 | // snippets that are not set up to be compiled as is.
7 | 
8 | int id = get_global_id(0); a[id] = b[id] + c[id];
9 | 


--------------------------------------------------------------------------------
/samples/Ch16_programming_for_cpus/fig_16_18_vector_swizzle.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #define SYCL_SIMPLE_SWIZZLES
 6 | #include <iostream>
 7 | #include <sycl/sycl.hpp>
 8 | 
 9 | using namespace sycl;
10 | 
11 | int main() {
12 |   // BEGIN CODE SNIP
13 |   queue q;
14 | 
15 |   bool *resArray = malloc_shared<bool>(1, q);
16 |   resArray[0] = true;
17 | 
18 |   q.single_task([=]() {
19 |      sycl::vec<int, 4> old_v =
20 |          sycl::vec<int, 4>(0, 100, 200, 300);
21 |      sycl::vec<int, 4> new_v = sycl::vec<int, 4>();
22 | 
23 |      new_v.rgba() = old_v.abgr();
24 |      int vals[] = {300, 200, 100, 0};
25 | 
26 |      if (new_v.r() != vals[0] || new_v.g() != vals[1] ||
27 |          new_v.b() != vals[2] || new_v.a() != vals[3]) {
28 |        resArray[0] = false;
29 |      }
30 |    }).wait();
31 |   // END CODE SNIP
32 | 
33 |   if (resArray[0])
34 |     std::cout << "passed\n";
35 |   else
36 |     std::cout << "failed\n";
37 |   free(resArray, q);
38 | }
39 | 


--------------------------------------------------------------------------------
/samples/Ch16_programming_for_cpus/fig_16_2.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | h.parallel_for(range(1024),
 9 |                [=](id<1> k) { z[k] = x[k] + y[k]; });
10 | 


--------------------------------------------------------------------------------
/samples/Ch16_programming_for_cpus/fig_16_4.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | // C++ STREAM Triad workload
 9 | // __restrict is used to denote no memory aliasing among
10 | // arguments
11 | template <typename T>
12 | double triad(T* __restrict VA, T* __restrict VB,
13 |              T* __restrict VC, size_t array_size,
14 |              const T scalar) {
15 |   double ts = timer_start();
16 |   for (size_t id = 0; id < array_size; id++) {
17 |     VC[id] = VA[id] + scalar * VB[id];
18 |   }
19 |   double te = timer_end();
20 |   return (te – ts);
21 | }
22 | 


--------------------------------------------------------------------------------
/samples/Ch16_programming_for_cpus/fig_16_5.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are snippets
 6 | // that are not set up to be compiled as is.
 7 | 
 8 | // STREAM Triad: SIMD code generated by the compiler, where zmm0, zmm1 
 9 | // and zmm2 are SIMD vector registers. The vectorized loop is unrolled by 4
10 | // to leverage the out-of-execution of instructions from Xeon CPU and to 
11 | // hide memory load and store latency  
12 | 
13 | // clang-format off
14 | 
15 | # %bb.0:                        # %entry
16 | vbroadcastsd    %xmm0, %zmm0    # broadcast “scalar” to SIMD reg zmm0
17 | movq    $-32, %rax
18 | .p2align        4, 0x90
19 | .LBB0_1:                        # %loop.19
20 |                                 # =>This Loop Header: Depth=1
21 | vmovupd 256(%rdx,%rax,8), %zmm1 # load 8 elements from memory to zmm1 
22 | vfmadd213pd     256(%rsi,%rax,8), %zmm0, %zmm1 # zmm1=(zmm0*zmm1)+mem
23 |                                 # perform SIMD FMA for 8 data elements 
24 |                                 # VC[id:8] = scalar*VB[id:8]+VA[id:8] 
25 | vmovupd %zmm1, 256(%rdi,%rax,8) # store 8-element result to mem from zmm1  
26 |                                 # This SIMD loop body is unrolled by 4
27 | vmovupd 320(%rdx,%rax,8), %zmm1 
28 | vfmadd213pd     320(%rsi,%rax,8), %zmm0, %zmm1 # zmm1=(zmm0*zmm1)+mem
29 | vmovupd %zmm1, 320(%rdi,%rax,8)
30 |         
31 | vmovupd 384(%rdx,%rax,8), %zmm1
32 | vfmadd213pd     384(%rsi,%rax,8), %zmm0, %zmm1 # zmm1=(zmm0*zmm1)+mem
33 | vmovupd %zmm1, 384(%rdi,%rax,8)
34 |         
35 | vmovupd 448(%rdx,%rax,8), %zmm1
36 | vfmadd213pd     448(%rsi,%rax,8), %zmm0, %zmm1 # zmm1=(zmm0*zmm1)+mem
37 | vmovupd %zmm1, 448(%rdi,%rax,8)
38 | addq    $32, %rax
39 | cmpq    $134217696, %rax        # imm = 0x7FFFFE0
40 | jb      .LBB0_1
41 | 


--------------------------------------------------------------------------------
/samples/Ch17_programming_for_fpgas/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2023 Intel Corporation
 2 | 
 3 | # SPDX-License-Identifier: MIT
 4 | 
 5 | add_book_sample(
 6 |     TARGET fig_17_9_fpga_selector
 7 |     SOURCES fig_17_9_fpga_selector.cpp)
 8 | 
 9 | add_book_sample(
10 |     TARGET fig_17_11_fpga_emulator_selector
11 |     SOURCES fig_17_11_fpga_emulator_selector.cpp)
12 | 
13 | add_book_sample(
14 |     TARGET fig_17_17_ndrange_func
15 |     SOURCES fig_17_17_ndrange_func.cpp)
16 | 
17 | add_book_sample(
18 |     TARGET fig_17_18_loop_func
19 |     SOURCES fig_17_18_loop_func.cpp)
20 | 
21 | add_book_sample(
22 |     TARGET fig_17_20_loop_carried_deps
23 |     SOURCES fig_17_20_loop_carried_deps.cpp)
24 | 
25 | add_book_sample(
26 |     TARGET fig_17_22_loop_carried_state
27 |     SOURCES fig_17_22_loop_carried_state.cpp)
28 | 
29 | if(NOT WITHCUDA AND NOT WITHROCM)
30 | # TEMPORARILY DISABLE: doesn't work with CUDA or ROCm backend.
31 | add_book_sample(
32 |     TARGET fig_17_31_inter_kernel_pipe
33 |     SOURCES fig_17_31_inter_kernel_pipe.cpp)
34 | endif()
35 | 


--------------------------------------------------------------------------------
/samples/Ch17_programming_for_fpgas/fig_17_11_fpga_emulator_selector.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <sycl/ext/intel/fpga_extensions.hpp>  // For fpga_selector_v
 6 | #include <sycl/sycl.hpp>
 7 | using namespace sycl;
 8 | 
 9 | void say_device(const queue& q) {
10 |   std::cout << "Device : "
11 |             << q.get_device().get_info<info::device::name>()
12 |             << "\n";
13 | }
14 | 
15 | int main() {
16 |   queue q{ext::intel::fpga_emulator_selector_v};
17 |   say_device(q);
18 | 
19 |   q.submit([&](handler& h) {
20 |     h.parallel_for(1024, [=](auto idx) {
21 |       // ...
22 |     });
23 |   });
24 | 
25 |   return 0;
26 | }
27 | 


--------------------------------------------------------------------------------
/samples/Ch17_programming_for_fpgas/fig_17_17_ndrange_func.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <sycl/ext/intel/fpga_extensions.hpp>  // For fpga_emulator_selector_v
 6 | #include <sycl/sycl.hpp>
 7 | using namespace sycl;
 8 | 
 9 | int generate_random_number_from_ID(const id<3>& I) {
10 |   return 0;  // Useless non-RNG generator as proxy!
11 | };
12 | 
13 | int main() {
14 |   queue q{ext::intel::fpga_emulator_selector_v};
15 | 
16 |   buffer<int, 3> B{range{16, 16, 16}};
17 | 
18 |   q.submit([&](handler& h) {
19 |     accessor output(B, h);
20 |     // BEGIN CODE SNIP
21 |     h.parallel_for({16, 16, 16}, [=](auto I) {
22 |       output[I] = generate_random_number_from_ID(I);
23 |     });
24 |     // END CODE SNIP
25 |   });
26 | 
27 |   return 0;
28 | }
29 | 


--------------------------------------------------------------------------------
/samples/Ch17_programming_for_fpgas/fig_17_18_loop_func.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <sycl/ext/intel/fpga_extensions.hpp>  // For fpga_emulator_selector_v
 6 | #include <sycl/sycl.hpp>
 7 | using namespace sycl;
 8 | 
 9 | int generate_random_number(const int& state) {
10 |   return 0;  // Useless non-RNG generator as proxy!
11 | };
12 | 
13 | int main() {
14 |   constexpr int size = 64;
15 |   queue q{ext::intel::fpga_emulator_selector_v};
16 | 
17 |   buffer<int> b{range{size}};
18 | 
19 |   q.submit([&](handler& h) {
20 |     accessor output(b, h);
21 | 
22 |     h.single_task([=]() {
23 |       // BEGIN CODE SNIP
24 |       int state = 0;
25 |       for (int i = 0; i < size; i++) {
26 |         state = generate_random_number(state);
27 |         output[i] = state;
28 |       }
29 |       // END CODE SNIP
30 |     });
31 |   });
32 | 
33 |   return 0;
34 | }
35 | 


--------------------------------------------------------------------------------
/samples/Ch17_programming_for_fpgas/fig_17_20_loop_carried_deps.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <sycl/ext/intel/fpga_extensions.hpp>  // For fpga_emulator_selector_v
 6 | #include <sycl/sycl.hpp>
 7 | using namespace sycl;
 8 | 
 9 | int generate_random_number(const int& state) {
10 |   return 0;  // Useless non-RNG generator as proxy!
11 | };
12 | 
13 | int main() {
14 |   constexpr int size = 64;
15 |   queue q{ext::intel::fpga_emulator_selector_v};
16 | 
17 |   buffer<int> b{range{size}};
18 | 
19 |   q.submit([&](handler& h) {
20 |     accessor output(b, h);
21 | 
22 |     h.single_task([=]() {
23 |       // BEGIN CODE SNIP
24 |       int a = 0;
25 |       for (int i = 0; i < size; i++) {
26 |         a = a + i;
27 |       }
28 |       // END CODE SNIP
29 |     });
30 |   });
31 | 
32 |   return 0;
33 | }
34 | 


--------------------------------------------------------------------------------
/samples/Ch17_programming_for_fpgas/fig_17_22_loop_carried_state.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <sycl/ext/intel/fpga_extensions.hpp>  // For fpga_emulator_selector_v
 6 | #include <sycl/sycl.hpp>
 7 | using namespace sycl;
 8 | 
 9 | int generate_incremental_random_number(const int& state) {
10 |   return 0;  // Useless non-RNG generator as proxy!
11 | };
12 | 
13 | int main() {
14 |   constexpr int size = 64;
15 |   constexpr int seed = 0;
16 | 
17 |   queue q{ext::intel::fpga_emulator_selector_v};
18 | 
19 |   buffer<int> b{range{size}};
20 | 
21 |   q.submit([&](handler& h) {
22 |     accessor output(b, h);
23 | 
24 |     // BEGIN CODE SNIP
25 |     h.single_task([=]() {
26 |       int state = seed;
27 |       for (int i = 0; i < size; i++) {
28 |         state = generate_incremental_random_number(state);
29 |         output[i] = state;
30 |       }
31 |     });
32 |     // END CODE SNIP
33 |   });
34 | 
35 |   return 0;
36 | }
37 | 


--------------------------------------------------------------------------------
/samples/Ch17_programming_for_fpgas/fig_17_31_inter_kernel_pipe.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <array>
 6 | #include <sycl/ext/intel/fpga_extensions.hpp>  // For fpga_emulator_selector_v
 7 | #include <sycl/sycl.hpp>
 8 | using namespace sycl;
 9 | 
10 | int main() {
11 |   constexpr int count = 1024;
12 |   std::array<int, count> in_array;
13 | 
14 |   // Initialize input array
15 |   for (int i = 0; i < count; i++) {
16 |     in_array[i] = i;
17 |   }
18 | 
19 |   // Buffer initialized from in_array (std::array)
20 |   buffer<int> b_in{in_array};
21 | 
22 |   // Uninitialized buffer with count elements
23 |   buffer<int> b_out{range{count}};
24 | 
25 |   // Acquire queue to emulated FPGA device
26 |   queue q{ext::intel::fpga_emulator_selector_v};
27 | 
28 |   // BEGIN CODE SNIP
29 |   // Create alias for pipe type to be consistent across uses
30 |   using my_pipe = ext::intel::pipe<class some_pipe, int>;
31 | 
32 |   // ND-range kernel
33 |   q.submit([&](handler& h) {
34 |     auto a = accessor(b_in, h);
35 | 
36 |     h.parallel_for(
37 |         count, [=](auto idx) { my_pipe::write(a[idx]); });
38 |   });
39 | 
40 |   // Single_task kernel
41 |   q.submit([&](handler& h) {
42 |     auto a = accessor(b_out, h);
43 | 
44 |     h.single_task([=]() {
45 |       for (int i = 0; i < count; i++) {
46 |         a[i] = my_pipe::read();
47 |       }
48 |     });
49 |   });
50 | 
51 |   // END CODE SNIP
52 | 
53 |   auto a = host_accessor(b_out);
54 |   for (int i = 0; i < count; i++) {
55 |     if (a[i] != i) {
56 |       std::cout << "Failure on element " << i << "\n";
57 |       return 1;
58 |     }
59 |   }
60 |   std::cout << "Passed!\n";
61 |   return 0;
62 | }
63 | 


--------------------------------------------------------------------------------
/samples/Ch17_programming_for_fpgas/fig_17_32.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | template <typename name, typename dataT,
 9 |           size_t min_capacity = 0>
10 | class pipe;
11 | 


--------------------------------------------------------------------------------
/samples/Ch17_programming_for_fpgas/fig_17_33.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | // Blocking
 9 | T read();
10 | void write(const T &data);
11 | 
12 | // Non-blocking
13 | T read(bool &success_code);
14 | void write(const T &data, bool &success_code);
15 | 


--------------------------------------------------------------------------------
/samples/Ch17_programming_for_fpgas/fig_17_9_fpga_selector.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <sycl/ext/intel/fpga_extensions.hpp>  // For fpga_selector_v
 6 | #include <sycl/sycl.hpp>
 7 | using namespace sycl;
 8 | 
 9 | void say_device(const queue& q) {
10 |   std::cout << "Device : "
11 |             << q.get_device().get_info<info::device::name>()
12 |             << "\n";
13 | }
14 | 
15 | int main() {
16 |   queue q{ext::intel::fpga_selector_v};
17 |   say_device(q);
18 | 
19 |   q.submit([&](handler& h) {
20 |     h.parallel_for(1024, [=](auto idx) {
21 |       // ...
22 |     });
23 |   });
24 | 
25 |   return 0;
26 | }
27 | 


--------------------------------------------------------------------------------
/samples/Ch18_libraries/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2023 Intel Corporation
 2 | 
 3 | # SPDX-License-Identifier: MIT
 4 | 
 5 | if(NOT WITHCUDA)
 6 | # TEMPORARILY DISABLE: doesn't work with CUDA backend.
 7 | add_book_sample(
 8 |     TEST
 9 |     TARGET fig_18_1_builtin
10 |     SOURCES fig_18_1_builtin.cpp)
11 | endif()
12 | 
13 | add_book_sample(
14 |     TEST
15 |     TARGET fig_18_2_swap
16 |     SOURCES fig_18_2_swap.cpp)
17 | 
18 | if(NOT NODPL)
19 | add_book_sample(
20 |     TEST
21 |     TARGET fig_18_6_std_fill
22 |     SOURCES fig_18_6_std_fill.cpp)
23 | endif()
24 | 
25 | if(NOT NODPL)
26 | add_book_sample(
27 |     TEST
28 |     TARGET fig_18_7_std_fill_default_policy
29 |     SOURCES fig_18_7_std_fill_default_policy.cpp)
30 | endif()
31 | 
32 | if(NOT NODPL)
33 | add_book_sample(
34 |     TEST
35 |     TARGET fig_18_8_binary_search
36 |     SOURCES fig_18_8_binary_search.cpp)
37 | endif()
38 | 
39 | if(NOT NODPL)
40 | add_book_sample(
41 |     TEST
42 |     TARGET fig_18_9_pstl_usm_device
43 |     SOURCES fig_18_9_pstl_usm_device.cpp)
44 | endif()
45 | 
46 | if(NOT NODPL)
47 | add_book_sample(
48 |     TEST
49 |     TARGET fig_18_10_pstl_usm
50 |     SOURCES fig_18_10_pstl_usm.cpp)
51 | endif()
52 | 
53 | 


--------------------------------------------------------------------------------
/samples/Ch18_libraries/fig_18_10_pstl_usm.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <oneapi/dpl/algorithm>
 6 | #include <oneapi/dpl/execution>
 7 | #include <sycl/sycl.hpp>
 8 | 
 9 | int main() {
10 |   sycl::queue q;
11 |   const int n = 10;
12 |   sycl::usm_allocator<int, sycl::usm::alloc::shared> alloc(
13 |       q);
14 |   std::vector<int, decltype(alloc)> vec(n, alloc);
15 | 
16 |   std::fill(oneapi::dpl::execution::make_device_policy(q),
17 |             vec.begin(), vec.end(), 78);
18 |   q.wait();
19 | 
20 |   return 0;
21 | }
22 | 


--------------------------------------------------------------------------------
/samples/Ch18_libraries/fig_18_1_builtin.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <array>
 6 | #include <cmath>
 7 | #include <iostream>
 8 | #include <sycl/sycl.hpp>
 9 | using namespace sycl;
10 | 
11 | int main() {
12 |   // BEGIN CODE SNIP
13 |   constexpr int size = 9;
14 |   std::array<float, size> a;
15 |   std::array<float, size> b;
16 | 
17 |   bool pass = true;
18 | 
19 |   for (int i = 0; i < size; ++i) {
20 |     a[i] = i;
21 |     b[i] = i;
22 |   }
23 | 
24 |   queue q;
25 | 
26 |   range sz{size};
27 | 
28 |   buffer<float> bufA(a);
29 |   buffer<float> bufB(b);
30 |   buffer<bool> bufP(&pass, 1);
31 | 
32 |   q.submit([&](handler &h) {
33 |     accessor accA{bufA, h};
34 |     accessor accB{bufB, h};
35 |     accessor accP{bufP, h};
36 | 
37 |     h.parallel_for(size, [=](id<1> idx) {
38 |       accA[idx] = std::log(accA[idx]);
39 |       accB[idx] = sycl::log(accB[idx]);
40 |       if (!sycl::isequal(accA[idx], accB[idx])) {
41 |         accP[0] = false;
42 |       }
43 |     });
44 |   });
45 |   // END CODE SNIP
46 | 
47 |   host_accessor host_P(bufP);
48 | 
49 |   if (host_P[0]) {
50 |     std::cout << "Matched\n";
51 |   } else {
52 |     std::cout << "Unmatched\n";
53 |   }
54 |   return 0;
55 | }
56 | 


--------------------------------------------------------------------------------
/samples/Ch18_libraries/fig_18_2_swap.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <array>
 6 | #include <iostream>
 7 | #include <sycl/sycl.hpp>
 8 | #include <utility>
 9 | using namespace sycl;
10 | 
11 | int main() {
12 |   std::array<int, 2> arr{8, 9};
13 |   buffer<int> buf{arr};
14 | 
15 |   {
16 |     host_accessor host_A(buf);
17 |     std::cout << "Before: " << host_A[0] << ", "
18 |               << host_A[1] << "\n";
19 |   }  // End scope of host_A so that upcoming kernel can
20 |      // operate on buf
21 | 
22 |   queue q;
23 |   q.submit([&](handler &h) {
24 |     accessor a{buf, h};
25 |     h.single_task([=]() {
26 |       // Call std::swap!
27 |       std::swap(a[0], a[1]);
28 |     });
29 |   });
30 | 
31 |   host_accessor host_B(buf);
32 |   std::cout << "After:  " << host_B[0] << ", " << host_B[1]
33 |             << "\n";
34 |   return 0;
35 | }
36 | 
37 | // Sample output:
38 | // 8, 9
39 | // 9, 8
40 | 	       
41 | 


--------------------------------------------------------------------------------
/samples/Ch18_libraries/fig_18_5.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | auto policy_b = device_policy<parallel_unsequenced_policy,
 9 |                               class PolicyB>{
10 |     sycl::device{sycl::gpu_selector{}}};
11 | std::for_each(policy_b, …);
12 | auto policy_c =
13 |     device_policy<parallel_unsequenced_policy,
14 |                   class PolicyС>{sycl::default_selector{}};
15 | std::for_each(policy_c, …);
16 | auto policy_d =
17 |     make_device_policy<class PolicyD>(default_policy);
18 | std::for_each(policy_d, …);
19 | auto policy_e =
20 |     make_device_policy<class PolicyE>(sycl::queue{});
21 | std::for_each(policy_e, …);
22 | 


--------------------------------------------------------------------------------
/samples/Ch18_libraries/fig_18_6_std_fill.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // -------------------------------------------------------
 6 | // Changed from Book:
 7 | // old naming dpstd:: is now oneapi::dpl::
 8 | // -------------------------------------------------------
 9 | 
10 | #include <oneapi/dpl/algorithm>
11 | #include <oneapi/dpl/execution>
12 | #include <oneapi/dpl/iterator>
13 | #include <sycl/sycl.hpp>
14 | 
15 | int main() {
16 |   sycl::queue q;
17 |   sycl::buffer<int> buf{1000};
18 | 
19 |   auto buf_begin = oneapi::dpl::begin(buf);
20 |   auto buf_end = oneapi::dpl::end(buf);
21 | 
22 |   auto policy = oneapi::dpl::execution::make_device_policy<
23 |       class fill>(q);
24 |   std::fill(policy, buf_begin, buf_end, 42);
25 | 
26 |   return 0;
27 | }
28 | 


--------------------------------------------------------------------------------
/samples/Ch18_libraries/fig_18_7_std_fill_default_policy.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <oneapi/dpl/algorithm>
 6 | #include <oneapi/dpl/execution>
 7 | #include <oneapi/dpl/iterator>
 8 | #include <sycl/sycl.hpp>
 9 | 
10 | int main() {
11 |   std::vector<int> v(100000);
12 |   std::fill(oneapi::dpl::execution::dpcpp_default,
13 |             v.begin(), v.end(), 42);
14 | 
15 |   if (v[788] == 42)
16 |     std::cout << "passed" << std::endl;
17 |   else
18 |     std::cout << "failed" << std::endl;
19 | 
20 |   return 0;
21 | }
22 | 


--------------------------------------------------------------------------------
/samples/Ch18_libraries/fig_18_8_binary_search.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // clang-format off
 6 | #include <oneapi/dpl/algorithm>
 7 | // clang-format on
 8 | #include <iostream>
 9 | #include <oneapi/dpl/execution>
10 | #include <oneapi/dpl/iterator>
11 | #include <sycl/sycl.hpp>
12 | 
13 | using namespace sycl;
14 | 
15 | int main() {
16 |   buffer<uint64_t, 1> kB{range<1>(10)};
17 |   buffer<uint64_t, 1> vB{range<1>(5)};
18 |   buffer<uint64_t, 1> rB{range<1>(5)};
19 |   {
20 |     host_accessor k{kB};
21 |     host_accessor v{vB};
22 | 
23 |     // Initialize data, sorted
24 |     k[0] = 0;
25 |     k[1] = 5;
26 |     k[2] = 6;
27 |     k[3] = 6;
28 |     k[4] = 7;
29 |     k[5] = 7;
30 |     k[6] = 8;
31 |     k[7] = 8;
32 |     k[8] = 9;
33 |     k[9] = 9;
34 | 
35 |     v[0] = 1;
36 |     v[1] = 6;
37 |     v[2] = 3;
38 |     v[3] = 7;
39 |     v[4] = 8;
40 |   }
41 | 
42 |   // create dpc++ iterators
43 |   auto k_beg = oneapi::dpl::begin(kB);
44 |   auto k_end = oneapi::dpl::end(kB);
45 |   auto v_beg = oneapi::dpl::begin(vB);
46 |   auto v_end = oneapi::dpl::end(vB);
47 |   auto r_beg = oneapi::dpl::begin(rB);
48 | 
49 |   // create named policy from existing one
50 |   auto policy = oneapi::dpl::execution::make_device_policy<
51 |       class bSearch>(oneapi::dpl::execution::dpcpp_default);
52 | 
53 |   // call algorithm
54 |   oneapi::dpl::binary_search(policy, k_beg, k_end, v_beg,
55 |                              v_end, r_beg);
56 | 
57 |   // check data
58 |   host_accessor r{rB};
59 |   if ((r[0] == false) && (r[1] == true) &&
60 |       (r[2] == false) && (r[3] == true) && (r[4] == true)) {
61 |     std::cout << "Passed. \nRun on "
62 |               << policy.queue()
63 |                      .get_device()
64 |                      .get_info<info::device::name>()
65 |               << "\n";
66 |   } else
67 |     std::cout << "failed: values do not match.\n";
68 | 
69 |   return 0;
70 | }
71 | 


--------------------------------------------------------------------------------
/samples/Ch18_libraries/fig_18_9_pstl_usm_device.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <oneapi/dpl/algorithm>
 6 | #include <oneapi/dpl/execution>
 7 | #include <sycl/sycl.hpp>
 8 | 
 9 | int main() {
10 |   sycl::queue q;
11 |   const int n = 10;
12 |   int* h_head = sycl::malloc_host<int>(n, q);
13 |   int* d_head = sycl::malloc_device<int>(n, q);
14 |   std::fill(oneapi::dpl::execution::make_device_policy(q),
15 |             d_head, d_head + n, 78);
16 |   q.wait();
17 | 
18 |   q.memcpy(h_head, d_head, n * sizeof(int));
19 |   q.wait();
20 | 
21 |   if (h_head[8] == 78)
22 |     std::cout << "passed" << std::endl;
23 |   else
24 |     std::cout << "failed" << std::endl;
25 | 
26 |   sycl::free(h_head, q);
27 |   sycl::free(d_head, q);
28 |   return 0;
29 | }
30 | 


--------------------------------------------------------------------------------
/samples/Ch19_memory_model_and_atomics/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2023 Intel Corporation
 2 | 
 3 | # SPDX-License-Identifier: MIT
 4 | 
 5 | add_book_sample(
 6 |     TARGET fig_19_3_data_race
 7 |     SOURCES fig_19_3_data_race.cpp)
 8 | 
 9 | add_book_sample(
10 |     TEST
11 |     TARGET fig_19_6_avoid_data_race_with_barrier
12 |     SOURCES fig_19_6_avoid_data_race_with_barrier.cpp)
13 | 
14 | add_book_sample(
15 |     TEST
16 |     TARGET fig_19_7_avoid_data_race_with_atomics
17 |     SOURCES fig_19_7_avoid_data_race_with_atomics.cpp)
18 | 
19 | add_book_sample(
20 |     TEST
21 |     TARGET fig_19_15_buffer_and_atomic_ref
22 |     SOURCES fig_19_15_buffer_and_atomic_ref.cpp)
23 | 
24 | add_book_sample(
25 |     TEST
26 |     TARGET fig_19_16_usm_and_atomic_ref
27 |     SOURCES fig_19_16_usm_and_atomic_ref.cpp)
28 | 
29 | add_book_sample(
30 |     TEST
31 |     TARGET fig_19_17_histogram
32 |     SOURCES fig_19_17_histogram.cpp)
33 | 
34 | add_book_sample(
35 |     TARGET fig_19_18-19_device_latch
36 |     SOURCES fig_19_18-19_device_latch.cpp)
37 | 


--------------------------------------------------------------------------------
/samples/Ch19_memory_model_and_atomics/fig_19_11.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | template <typename T, memory_order DefaultOrder,
 9 |           memory_scope DefaultScope,
10 |           access::address_space AddressSpace>
11 | class atomic_ref {
12 |  public:
13 |   using value_type = T;
14 |   static constexpr size_t required_alignment =
15 |       /* implementation-defined */;
16 |   static constexpr bool is_always_lock_free =
17 |       /* implementation-defined */;
18 |   static constexpr memory_order default_read_order =
19 |       memory_order_traits<DefaultOrder>::read_order;
20 |   static constexpr memory_order default_write_order =
21 |       memory_order_traits<DefaultOrder>::write_order;
22 |   static constexpr memory_order
23 |       default_read_modify_write_order = DefaultOrder;
24 |   static constexpr memory_scope default_scope =
25 |       DefaultScope;
26 | 
27 |   explicit atomic_ref(T& obj);
28 |   atomic_ref(const atomic_ref& ref) noexcept;
29 | };
30 | 


--------------------------------------------------------------------------------
/samples/Ch19_memory_model_and_atomics/fig_19_12.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | void store(
 9 |     T operand, memory_order order = default_write_order,
10 |     memory_scope scope = default_scope) const noexcept;
11 | T operator=(
12 |     T desired) const noexcept;  // equivalent to store
13 | 
14 | T load(memory_order order = default_read_order,
15 |        memory_scope scope = default_scope) const noexcept;
16 | operator T() const noexcept;  // equivalent to load
17 | 
18 | T exchange(
19 |     T operand,
20 |     memory_order order = default_read_modify_write_order,
21 |     memory_scope scope = default_scope) const noexcept;
22 | 
23 | bool compare_exchange_weak(
24 |     T &expected, T desired, memory_order success,
25 |     memory_order failure,
26 |     memory_scope scope = default_scope) const noexcept;
27 | 
28 | bool compare_exchange_weak(
29 |     T &expected, T desired,
30 |     memory_order order = default_read_modify_write_order,
31 |     memory_scope scope = default_scope) const noexcept;
32 | 
33 | bool compare_exchange_strong(
34 |     T &expected, T desired, memory_order success,
35 |     memory_order failure,
36 |     memory_scope scope = default_scope) const noexcept;
37 | 
38 | bool compare_exchange_strong(
39 |     T &expected, T desired,
40 |     memory_order order = default_read_modify_write_order,
41 |     memory_scope scope = default_scope) const noexcept;
42 | 


--------------------------------------------------------------------------------
/samples/Ch19_memory_model_and_atomics/fig_19_13.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | Integral fetch_add(
 9 |     Integral operand,
10 |     memory_order order = default_read_modify_write_order,
11 |     memory_scope scope = default_scope) const noexcept;
12 | 
13 | Integral fetch_sub(
14 |     Integral operand,
15 |     memory_order order = default_read_modify_write_order,
16 |     memory_scope scope = default_scope) const noexcept;
17 | 
18 | Integral fetch_and(
19 |     Integral operand,
20 |     memory_order order = default_read_modify_write_order,
21 |     memory_scope scope = default_scope) const noexcept;
22 | 
23 | Integral fetch_or(
24 |     Integral operand,
25 |     memory_order order = default_read_modify_write_order,
26 |     memory_scope scope = default_scope) const noexcept;
27 | 
28 | Integral fetch_min(
29 |     Integral operand,
30 |     memory_order order = default_read_modify_write_order,
31 |     memory_scope scope = default_scope) const noexcept;
32 | 
33 | Integral fetch_max(
34 |     Integral operand,
35 |     memory_order order = default_read_modify_write_order,
36 |     memory_scope scope = default_scope) const noexcept;
37 | 
38 | Integral operator++(int) const noexcept;
39 | Integral operator--(int) const noexcept;
40 | Integral operator++() const noexcept;
41 | Integral operator--() const noexcept;
42 | Integral operator+=(Integral) const noexcept;
43 | Integral operator-=(Integral) const noexcept;
44 | Integral operator&=(Integral) const noexcept;
45 | Integral operator|=(Integral) const noexcept;
46 | 


--------------------------------------------------------------------------------
/samples/Ch19_memory_model_and_atomics/fig_19_14.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | Floating fetch_add(
 9 |     Floating operand,
10 |     memory_order order = default_read_modify_write_order,
11 |     memory_scope scope = default_scope) const noexcept;
12 | 
13 | Floating fetch_sub(
14 |     Floating operand,
15 |     memory_order order = default_read_modify_write_order,
16 |     memory_scope scope = default_scope) const noexcept;
17 | 
18 | Floating fetch_min(
19 |     Floating operand,
20 |     memory_order order = default_read_modify_write_order,
21 |     memory_scope scope = default_scope) const noexcept;
22 | 
23 | Floating fetch_max(
24 |     Floating operand,
25 |     memory_order order = default_read_modify_write_order,
26 |     memory_scope scope = default_scope) const noexcept;
27 | 
28 | Floating operator+=(Floating) const noexcept;
29 | Floating operator-=(Floating) const noexcept;
30 | 


--------------------------------------------------------------------------------
/samples/Ch19_memory_model_and_atomics/fig_19_15_buffer_and_atomic_ref.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <algorithm>
 6 | #include <iostream>
 7 | #include <sycl/sycl.hpp>
 8 | 
 9 | using namespace sycl;
10 | 
11 | int main() {
12 |   queue q;
13 | 
14 |   const size_t N = 32;
15 |   const size_t M = 4;
16 |   std::vector<int> data(N);
17 |   std::fill(data.begin(), data.end(), 0);
18 | 
19 |   {
20 |     buffer buf(data);
21 | 
22 |     // BEGIN CODE SNIP
23 |     q.submit([&](handler& h) {
24 |       accessor acc{buf, h};
25 |       h.parallel_for(N, [=](id<1> i) {
26 |         int j = i % M;
27 |         atomic_ref<int, memory_order::relaxed,
28 |                    memory_scope::system,
29 |                    access::address_space::global_space>
30 |             atomic_acc(acc[j]);
31 |         atomic_acc += 1;
32 |       });
33 |     });
34 |     // END CODE SNIP
35 |   }
36 | 
37 |   for (int i = 0; i < N; ++i) {
38 |     std::cout << "data [" << i << "] = " << data[i] << "\n";
39 |   }
40 | 
41 |   bool passed = true;
42 |   int* gold = (int*)malloc(N * sizeof(int));
43 |   std::fill(gold, gold + N, 0);
44 |   for (int i = 0; i < N; ++i) {
45 |     int j = i % M;
46 |     gold[j] += 1;
47 |   }
48 |   for (int i = 0; i < N; ++i) {
49 |     if (data[i] != gold[i]) {
50 |       passed = false;
51 |     }
52 |   }
53 |   std::cout << ((passed) ? "SUCCESS\n" : "FAILURE\n");
54 |   free(gold);
55 |   return (passed) ? 0 : 1;
56 | }
57 | 


--------------------------------------------------------------------------------
/samples/Ch19_memory_model_and_atomics/fig_19_16_usm_and_atomic_ref.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // The contents of this file are identical to
 6 | // fig_19_7_avoid_data_race_with_atomics.cpp.
 7 | // The figure is reproduced in the book for readability,
 8 | // and duplicated here to avoid confusion.
 9 | 
10 | #include <algorithm>
11 | #include <iostream>
12 | #include <sycl/sycl.hpp>
13 | 
14 | using namespace sycl;
15 | 
16 | int main() {
17 |   queue q;
18 | 
19 |   const size_t N = 32;
20 |   const size_t M = 4;
21 | 
22 |   int* data = malloc_shared<int>(N, q);
23 |   std::fill(data, data + N, 0);
24 | 
25 |   // BEGIN CODE SNIP
26 |   q.parallel_for(N, [=](id<1> i) {
27 |      int j = i % M;
28 |      atomic_ref<int, memory_order::relaxed,
29 |                 memory_scope::system,
30 |                 access::address_space::global_space>
31 |          atomic_data(data[j]);
32 |      atomic_data += 1;
33 |    }).wait();
34 |   // END CODE SNIP
35 | 
36 |   for (int i = 0; i < N; ++i) {
37 |     std::cout << "data [" << i << "] = " << data[i] << "\n";
38 |   }
39 |   bool passed = true;
40 |   int* gold = (int*)malloc(N * sizeof(int));
41 |   std::fill(gold, gold + N, 0);
42 |   for (int i = 0; i < N; ++i) {
43 |     int j = i % M;
44 |     gold[j] += 1;
45 |   }
46 |   for (int i = 0; i < N; ++i) {
47 |     if (data[i] != gold[i]) {
48 |       passed = false;
49 |     }
50 |   }
51 |   std::cout << ((passed) ? "SUCCESS\n" : "FAILURE\n");
52 |   free(gold);
53 |   free(data, q);
54 |   return (passed) ? 0 : 1;
55 | }
56 | 


--------------------------------------------------------------------------------
/samples/Ch19_memory_model_and_atomics/fig_19_3_data_race.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <algorithm>
 6 | #include <iostream>
 7 | #include <sycl/sycl.hpp>
 8 | 
 9 | using namespace sycl;
10 | 
11 | int main() {
12 |   queue q;
13 | 
14 |   const size_t N = 32;
15 |   const size_t M = 4;
16 | 
17 |   // BEGIN CODE SNIP
18 |   int* data = malloc_shared<int>(N, q);
19 |   std::fill(data, data + N, 0);
20 | 
21 |   q.parallel_for(N, [=](id<1> i) {
22 |      int j = i % M;
23 |      data[j] += 1;
24 |    }).wait();
25 | 
26 |   for (int i = 0; i < N; ++i) {
27 |     std::cout << "data [" << i << "] = " << data[i] << "\n";
28 |   }
29 |   // END CODE SNIP
30 | 
31 |   bool passed = true;
32 |   int* gold = (int*)malloc(N * sizeof(int));
33 |   std::fill(gold, gold + N, 0);
34 |   for (int i = 0; i < N; ++i) {
35 |     int j = i % M;
36 |     gold[j] += 1;
37 |   }
38 |   for (int i = 0; i < N; ++i) {
39 |     if (data[i] != gold[i]) {
40 |       passed = false;
41 |     }
42 |   }
43 |   std::cout << ((passed) ? "SUCCESS\n" : "FAILURE\n");
44 |   free(gold);
45 |   free(data, q);
46 |   return (passed) ? 0 : 1;
47 | }
48 | 
49 | 
50 | // N = 2, M = 2:
51 | // data [0] = 1
52 | // data [1] = 1
53 | // 
54 | // N = 2, M = 1:
55 | // data [0] = 1
56 | // data [1] = 0
57 | 	       
58 | 


--------------------------------------------------------------------------------
/samples/Ch19_memory_model_and_atomics/fig_19_6_avoid_data_race_with_barrier.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <algorithm>
 6 | #include <iostream>
 7 | #include <sycl/sycl.hpp>
 8 | 
 9 | using namespace sycl;
10 | 
11 | int main() {
12 |   queue q;
13 | 
14 |   const uint32_t N = 32;
15 |   const uint32_t M = 4;
16 | 
17 |   // BEGIN CODE SNIP
18 |   int* data = malloc_shared<int>(N, q);
19 |   std::fill(data, data + N, 0);
20 | 
21 |   // Launch exactly one work-group
22 |   // Number of work-groups = global / local
23 |   range<1> global{N};
24 |   range<1> local{N};
25 | 
26 |   q.parallel_for(nd_range<1>{global, local},
27 |                  [=](nd_item<1> it) {
28 |                    int i = it.get_global_id(0);
29 |                    int j = i % M;
30 |                    for (int round = 0; round < N; ++round) {
31 |                      // Allow exactly one work-item update
32 |                      // per round
33 |                      if (i == round) {
34 |                        data[j] += 1;
35 |                      }
36 |                      group_barrier(it.get_group());
37 |                    }
38 |                  })
39 |       .wait();
40 | 
41 |   for (int i = 0; i < N; ++i) {
42 |     std::cout << "data [" << i << "] = " << data[i] << "\n";
43 |   }
44 |   // END CODE SNIP
45 | 
46 |   bool passed = true;
47 |   int* gold = (int*)malloc(N * sizeof(int));
48 |   std::fill(gold, gold + N, 0);
49 |   for (int i = 0; i < N; ++i) {
50 |     int j = i % M;
51 |     gold[j] += 1;
52 |   }
53 |   for (int i = 0; i < N; ++i) {
54 |     if (data[i] != gold[i]) {
55 |       passed = false;
56 |     }
57 |   }
58 |   std::cout << ((passed) ? "SUCCESS\n" : "FAILURE\n");
59 |   free(gold);
60 |   free(data, q);
61 |   return (passed) ? 0 : 1;
62 | }
63 | 


--------------------------------------------------------------------------------
/samples/Ch19_memory_model_and_atomics/fig_19_7_avoid_data_race_with_atomics.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <algorithm>
 6 | #include <iostream>
 7 | #include <sycl/sycl.hpp>
 8 | 
 9 | using namespace sycl;
10 | 
11 | int main() {
12 |   queue q;
13 | 
14 |   const size_t N = 32;
15 |   const size_t M = 4;
16 | 
17 |   // BEGIN CODE SNIP
18 |   int* data = malloc_shared<int>(N, q);
19 |   std::fill(data, data + N, 0);
20 | 
21 |   q.parallel_for(N, [=](id<1> i) {
22 |      int j = i % M;
23 |      atomic_ref<int, memory_order::relaxed,
24 |                 memory_scope::system,
25 |                 access::address_space::global_space>
26 |          atomic_data(data[j]);
27 |      atomic_data += 1;
28 |    }).wait();
29 | 
30 |   for (int i = 0; i < N; ++i) {
31 |     std::cout << "data [" << i << "] = " << data[i] << "\n";
32 |   }
33 |   // END CODE SNIP
34 | 
35 |   bool passed = true;
36 |   int* gold = (int*)malloc(N * sizeof(int));
37 |   std::fill(gold, gold + N, 0);
38 |   for (int i = 0; i < N; ++i) {
39 |     int j = i % M;
40 |     gold[j] += 1;
41 |   }
42 |   for (int i = 0; i < N; ++i) {
43 |     if (data[i] != gold[i]) {
44 |       passed = false;
45 |     }
46 |   }
47 |   std::cout << ((passed) ? "SUCCESS\n" : "FAILURE\n");
48 |   free(gold);
49 |   free(data, q);
50 |   return (passed) ? 0 : 1;
51 | }
52 | 


--------------------------------------------------------------------------------
/samples/Ch20_backend_interoperability/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2023 Intel Corporation
 2 | 
 3 | # SPDX-License-Identifier: MIT
 4 | 
 5 | add_book_sample(
 6 |     TEST
 7 |     TARGET fig_20_2_querying_backends
 8 |     SOURCES fig_20_2_querying_backends.cpp)
 9 | 
10 | add_book_sample(
11 |     #TEST   # disable temporarily due to bug
12 |     TARGET fig_20_3_opencl_to_sycl
13 |     SOURCES fig_20_3_opencl_to_sycl.cpp
14 |     LIBS OpenCL)
15 | 
16 | if(NOT NOL0)
17 | add_book_sample(
18 |     #TEST   # disable temporarily due to bug
19 |     TARGET fig_20_4_level_zero_to_sycl
20 |     SOURCES fig_20_4_level_zero_to_sycl.cpp
21 |     LIBS ze_loader)
22 | endif()
23 | 
24 | add_book_sample(
25 |     TEST
26 |     TARGET fig_20_5_sycl_to_opencl
27 |     SOURCES fig_20_5_sycl_to_opencl.cpp
28 |     LIBS OpenCL)
29 | 
30 | if(NOT NOL0)
31 | add_book_sample(
32 |     TEST
33 |     TARGET fig_20_6_sycl_to_level_zero
34 |     SOURCES fig_20_6_sycl_to_level_zero.cpp
35 |     LIBS ze_loader)
36 | endif()
37 | 
38 | add_book_sample(
39 |     TEST
40 |     TARGET fig_20_7_interop_handle_opencl
41 |     SOURCES fig_20_7_interop_handle_opencl.cpp
42 |     LIBS OpenCL)
43 | 
44 | if(NOT NOL0)
45 | add_book_sample(
46 |     TEST
47 |     TARGET fig_20_8_interop_handle_level_zero
48 |     SOURCES fig_20_8_interop_handle_level_zero.cpp
49 |     LIBS ze_loader)
50 | endif()
51 | 
52 | add_book_sample(
53 |     TEST
54 |     TARGET fig_20_9_opencl_kernel_interop
55 |     SOURCES fig_20_9_opencl_kernel_interop.cpp
56 |     LIBS OpenCL)
57 | 
58 | if(NOT NOL0)
59 | add_book_sample(
60 |     TEST
61 |     TARGET fig_20_10_level_zero_kernel_interop
62 |     SOURCES fig_20_10_level_zero_kernel_interop.cpp
63 |     LIBS ze_loader)
64 | endif()
65 | 
66 | add_book_sample(
67 |     TEST
68 |     TARGET fig_20_11_filter_selector
69 |     SOURCES fig_20_11_filter_selector.cpp)
70 | 


--------------------------------------------------------------------------------
/samples/Ch20_backend_interoperability/fig_20_11_filter_selector.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <iostream>
 6 | #include <sycl/sycl.hpp>
 7 | using namespace sycl;
 8 | 
 9 | int main() {
10 |   auto find_device = [](backend b,
11 |                         info::device_type t =
12 |                             info::device_type::all) {
13 |     for (auto d : device::get_devices(t)) {
14 |       if (d.get_backend() == b) {
15 |         return d;
16 |       }
17 |     }
18 |     throw sycl::exception(errc::runtime,
19 |                           "Could not find a device with "
20 |                           "the requested backend!");
21 |   };
22 | 
23 |   try {
24 |     device d{find_device(backend::opencl)};
25 |     std::cout << "Found an OpenCL SYCL device: "
26 |               << d.get_info<info::device::name>() << "\n";
27 |   } catch (const sycl::exception &e) {
28 |     std::cout << "No OpenCL SYCL devices were found.\n";
29 |   }
30 | 
31 |   try {
32 |     device d{find_device(backend::ext_oneapi_level_zero)};
33 |     std::cout << "Found a Level Zero SYCL device: "
34 |               << d.get_info<info::device::name>() << "\n";
35 |   } catch (const sycl::exception &e) {
36 |     std::cout << "No Level Zero SYCL devices were found.\n";
37 |   }
38 | 
39 |   return 0;
40 | }
41 | 
42 | // Example Output:
43 | // Found an OpenCL SYCL device: pthread-12th Gen Intel(R) Core(TM) i9-12900K
44 | // Found a Level Zero SYCL device: Intel(R) UHD Graphics 770 [0x4680]
45 |   
46 | 


--------------------------------------------------------------------------------
/samples/Ch20_backend_interoperability/fig_20_2_querying_backends.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <iostream>
 6 | #include <sycl/sycl.hpp>
 7 | using namespace sycl;
 8 | 
 9 | int main() {
10 |   for (auto& p : platform::get_platforms()) {
11 |     std::cout << "SYCL Platform: "
12 |               << p.get_info<info::platform::name>()
13 |               << " is associated with SYCL Backend: "
14 |               << p.get_backend() << std::endl;
15 |   }
16 |   return 0;
17 | }
18 | 
19 | // Example Output:
20 | // SYCL Platform: Portable Computing Language is associated with SYCL Backend: opencl
21 | // SYCL Platform: Intel(R) OpenCL HD Graphics is associated with SYCL Backend: opencl
22 | // SYCL Platform: Intel(R) OpenCL is associated with SYCL Backend: opencl
23 | // SYCL Platform: Intel(R) FPGA Emulation Platform for OpenCL(TM) is associated with SYCL Backend: opencl
24 | // SYCL Platform: Intel(R) Level-Zero is associated with SYCL Backend: ext_oneapi_level_zero
25 | // SYCL Platform: NVIDIA CUDA BACKEND is associated with SYCL Backend: ext_oneapi_cuda
26 | // SYCL Platform: AMD HIP BACKEND is associated with SYCL Backend: ext_oneapi_hip
27 | 


--------------------------------------------------------------------------------
/samples/Ch21_migrating_cuda_code/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2023 Intel Corporation
 2 | 
 3 | # SPDX-License-Identifier: MIT
 4 | 
 5 | add_book_sample(
 6 |     TEST
 7 |     TARGET fig_21_1_basicsycl
 8 |     SOURCES fig_21_1_basicsycl.cpp)
 9 | 
10 | add_book_sample(
11 |     TEST
12 |     TARGET fig_21_5_walkorder
13 |     SOURCES fig_21_5_walkorder.cpp)
14 | 
15 | add_book_sample(
16 |     TARGET fig_21_7_possible_deadlock
17 |     SOURCES fig_21_7_possible_deadlock.cpp)
18 | 
19 | add_book_sample(
20 |     TEST
21 |     TARGET fig_21_8_barriers
22 |     SOURCES fig_21_8_barriers.cpp)
23 | 
24 | add_book_sample(
25 |     TEST
26 |     TARGET fig_21_9_atomics
27 |     SOURCES fig_21_9_atomics.cpp)
28 | 
29 | if(NOT NODPCT)
30 | add_book_sample(
31 |     TEST
32 |     TARGET fig_21_13-14_reverse_migrated
33 |     SOURCES fig_21_13-14_reverse_migrated.cpp)
34 | endif()
35 | 
36 | # Note: we do not currently build these pure CUDA samples:
37 | # fig_21_2_basiccuda.cu
38 | # fig_21_4-6_walkorder.cu
39 | # fig_21_10_reverse.cu
40 | 


--------------------------------------------------------------------------------
/samples/Ch21_migrating_cuda_code/fig_21_10_reverse.cu:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <cuda_runtime.h>
 6 | 
 7 | #include <iostream>
 8 | #include <numeric>
 9 | #include <vector>
10 | 
11 | constexpr size_t size = 1024 * 1024;
12 | 
13 | // BEGIN CODE SNIP
14 | __shared__ int scratch[256];
15 | __global__ void Reverse(int* ptr, size_t size) {
16 |   auto gid = blockIdx.x * blockDim.x + threadIdx.x;
17 |   auto lid = threadIdx.x;
18 | 
19 |   scratch[lid] = ptr[gid];
20 |   __syncthreads();
21 |   ptr[gid] = scratch[256 - lid - 1];
22 | }
23 | 
24 | int main() {
25 |   std::vector<int> data(size);
26 |   std::iota(data.begin(), data.end(), 0);
27 | 
28 |   cudaDeviceProp deviceProp;
29 |   cudaGetDeviceProperties(&deviceProp, 0);
30 |   std::cout << "Running on device: " << deviceProp.name << "\n";
31 | 
32 |   int* ptr = nullptr;
33 |   cudaMalloc(&ptr, size * sizeof(int));
34 |   cudaMemcpy(ptr, data.data(), size * sizeof(int),
35 |              cudaMemcpyDefault);
36 |   Reverse<<<size / 256, 256>>>(ptr, size);
37 |   cudaError_t result = cudaDeviceSynchronize();
38 |   if (result != cudaSuccess) {
39 |     std::cout << "An error occurred!\n";
40 |   }
41 |   // ...
42 | // END CODE SNIP
43 | 
44 |   cudaMemcpy(data.data(), ptr, size * sizeof(int),
45 |              cudaMemcpyDefault);
46 | 
47 |   for (size_t s = 0; s < size; s += 256) {
48 |     for (size_t i = 0; i < 256; i++) {
49 |       auto got = data[s + i];
50 |       auto want = s + 256 - i - 1;
51 |       if (got != want) {
52 |         std::cout << "Mismatch at index " << s + i << ", got "
53 |                   << got << ", wanted " << want << "\n";
54 |         return -1;
55 |       }
56 |     }
57 |   }
58 | 
59 |   cudaFree(ptr);
60 |   std::cout << "Success.\n";
61 |   return 0;
62 | }
63 | 


--------------------------------------------------------------------------------
/samples/Ch21_migrating_cuda_code/fig_21_1_basicsycl.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <iostream>
 6 | #include <sycl/sycl.hpp>
 7 | using namespace sycl;
 8 | 
 9 | constexpr int count = 1024 * 1024;
10 | 
11 | int main() {
12 |   // BEGIN CODE SNIP
13 |   // Declare an in-order SYCL queue for the default device
14 |   queue q{property::queue::in_order()};
15 |   std::cout << "Running on device: "
16 |             << q.get_device().get_info<info::device::name>()
17 |             << "\n";
18 | 
19 |   int* buffer = malloc_host<int>(count, q);
20 |   q.fill(buffer, 0, count);
21 | 
22 |   q.parallel_for(count, [=](auto id) {
23 |      buffer[id] = id;
24 |    }).wait();
25 |   // END CODE SNIP
26 | 
27 |   int mismatches = 0;
28 |   for (int i = 0; i < count; i++) {
29 |     if (buffer[i] != i) {
30 |       mismatches++;
31 |     }
32 |   }
33 |   if (mismatches) {
34 |     std::cout << "Found " << mismatches
35 |               << " mismatches out of " << count
36 |               << " elements.\n";
37 |   } else {
38 |     std::cout << "Success.\n";
39 |   }
40 | 
41 |   free(buffer, q);
42 |   return 0;
43 | }
44 | 


--------------------------------------------------------------------------------
/samples/Ch21_migrating_cuda_code/fig_21_2_basiccuda.cu:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <cuda_runtime.h>
 6 | 
 7 | #include <iostream>
 8 | 
 9 | constexpr int count = 1024 * 1024;
10 | 
11 | // BEGIN CODE SNIP
12 | // The CUDA kernel is a separate function
13 | __global__ void TestKernel(int* dst) {
14 |   auto id = blockIdx.x * blockDim.x + threadIdx.x;
15 |   dst[id] = id;
16 | }
17 | 
18 | int main() {
19 |   // CUDA uses device zero by default
20 |   cudaDeviceProp deviceProp;
21 |   cudaGetDeviceProperties(&deviceProp, 0);
22 |   std::cout << "Running on device: " << deviceProp.name << "\n";
23 | 
24 |   int* buffer = nullptr;
25 |   cudaMallocHost(&buffer, count * sizeof(int));
26 |   cudaMemset(buffer, 0, count * sizeof(int));
27 | 
28 |   TestKernel<<<count / 256, 256>>>(buffer);
29 |   cudaDeviceSynchronize();
30 |   // ...
31 | // END CODE SNIP
32 | 
33 |   int mismatches = 0;
34 |   for (int i = 0; i < count; i++) {
35 |     if (buffer[i] != i) {
36 |       mismatches++;
37 |     }
38 |   }
39 |   if (mismatches) {
40 |     std::cout << "Found " << mismatches << " mismatches out of "
41 |               << count << " elements.\n";
42 |   } else {
43 |     std::cout << "Success.\n";
44 |   }
45 | 
46 |   cudaFreeHost(buffer);
47 |   return 0;
48 | }
49 | 


--------------------------------------------------------------------------------
/samples/Ch21_migrating_cuda_code/fig_21_4-6_walkorder.cu:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <cuda_runtime.h>
 6 | #include <cooperative_groups.h>
 7 | 
 8 | #include <iostream>
 9 | 
10 | constexpr int count = 16 * 2;
11 | 
12 | __device__ int get_global_linear_id() {
13 |   auto blockId = gridDim.x * blockIdx.y + blockIdx.x;
14 |   return blockId * blockDim.y * blockDim.x +
15 |     threadIdx.y * blockDim.x +
16 |     threadIdx.x;
17 | }
18 | 
19 | // BEGIN CODE SNIP #1, Part 1/2
20 | __global__ void ExchangeKernel(int* dst) {
21 |   auto index = get_global_linear_id(); // helper function
22 |   auto fastest = threadIdx.x;
23 |   auto neighbor = __shfl_xor_sync(0xFFFFFFFF, fastest, 1);
24 |   dst[index] = neighbor;
25 | }
26 | // END CODE SNIP #1, Part 1/2
27 | 
28 | // BEGIN CODE SNIP #2
29 | __global__ void ExchangeKernelCoopGroups(int* dst) {
30 |   namespace cg = cooperative_groups;
31 |   auto index = cg::this_grid().thread_rank();
32 |   auto fastest = threadIdx.x;
33 |   auto warp = cg::tiled_partition<32>(cg::this_thread_block());
34 |   auto neighbor = warp.shfl_xor(fastest, 1);
35 |   dst[index] = neighbor;
36 | }
37 | // END CODE SNIP #2
38 | 
39 | int main() {
40 |   cudaDeviceProp deviceProp;
41 |   cudaGetDeviceProperties(&deviceProp, 0);
42 |   std::cout << "Running on device: " << deviceProp.name << "\n";
43 | 
44 |   int* buffer = nullptr;
45 |   cudaMallocHost(&buffer, count * sizeof(int));
46 |   cudaMemset(buffer, 0, count * sizeof(int));
47 | 
48 | #if 0
49 |   // BEGIN CODE SNIP #1, Part 2/2
50 |   dim3 threadsPerBlock(16, 2);
51 |   ExchangeKernel<<<1, threadsPerBlock>>>(buffer);
52 |   cudaDeviceSynchronize();
53 |   // END CODE SNIP #1, Part 2/2
54 | #else
55 |   dim3 threadsPerBlock(16, 2);
56 |   ExchangeKernelCoopGroups<<<1, threadsPerBlock>>>(buffer);
57 |   cudaDeviceSynchronize();
58 | #endif
59 | 
60 |   int unexpected = 0;
61 |   for (int i = 0; i < count; i+=2) {
62 |     if (buffer[i] == buffer[i+1]) {
63 |       unexpected++;
64 |     }
65 |   }
66 |   if (unexpected) {
67 |     std::cout << "Error, found " << unexpected << " matching pairs.\n";
68 |   } else {
69 |     std::cout << "Success.\n";
70 |   }
71 | 
72 |   cudaFreeHost(buffer);
73 |   return 0;
74 | }
75 | 


--------------------------------------------------------------------------------
/samples/Ch21_migrating_cuda_code/fig_21_5_walkorder.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <iostream>
 6 | #include <sycl/sycl.hpp>
 7 | using namespace sycl;
 8 | 
 9 | constexpr int count = 16 * 2;
10 | 
11 | int main() {
12 |   queue q{property::queue::in_order()};
13 |   std::cout << "Running on device: "
14 |             << q.get_device().get_info<info::device::name>()
15 |             << "\n";
16 | 
17 |   int* buffer = malloc_host<int>(count, q);
18 |   q.fill(buffer, 0, count);
19 | 
20 |   // BEGIN CODE SNIP
21 |   q.parallel_for(nd_range<2>{{2, 16}, {2, 16}},
22 |                  [=](auto item) {
23 |                    auto index = item.get_global_linear_id();
24 |                    auto fastest = item.get_local_id(1);
25 |                    auto sg = item.get_sub_group();
26 |                    auto neighbor =
27 |                        permute_group_by_xor(sg, fastest, 1);
28 |                    buffer[index] = neighbor;
29 |                  })
30 |       .wait();
31 |   // END CODE SNIP
32 | 
33 |   int unexpected = 0;
34 |   for (int i = 0; i < count; i += 2) {
35 |     if (buffer[i] == buffer[i + 1]) {
36 |       unexpected++;
37 |     }
38 |   }
39 |   if (unexpected) {
40 |     std::cout << "Error, found " << unexpected
41 |               << " matching pairs.\n";
42 |   } else {
43 |     std::cout << "Success.\n";
44 |   }
45 | 
46 |   free(buffer, q);
47 |   return 0;
48 | }
49 | 


--------------------------------------------------------------------------------
/samples/Ch21_migrating_cuda_code/fig_21_7_possible_deadlock.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <iostream>
 6 | #include <sycl/sycl.hpp>
 7 | using namespace sycl;
 8 | 
 9 | constexpr int count = 10;
10 | 
11 | int main() {
12 |   queue q{property::queue::in_order()};
13 |   std::cout << "Running on device: "
14 |             << q.get_device().get_info<info::device::name>()
15 |             << "\n";
16 | 
17 |   int* buffer = malloc_host<int>(count, q);
18 |   q.fill(buffer, 0, count);
19 | 
20 |   // BEGIN CODE SNIP
21 |   std::cout << "WARNING: May deadlock on some devices!\n";
22 |   q.parallel_for(nd_range<1>{64, 64}, [=](auto item) {
23 |      int id = item.get_global_id(0);
24 |      if (id >= count) {
25 |        return;  // early exit
26 |      }
27 |      group_barrier(item.get_group());
28 |      buffer[id] = id;
29 |    }).wait();
30 |   // END CODE SNIP
31 | 
32 |   int mismatches = 0;
33 |   for (int i = 0; i < count; i++) {
34 |     if (buffer[i] != i) {
35 |       mismatches++;
36 |     }
37 |   }
38 |   if (mismatches) {
39 |     std::cout << "Found " << mismatches
40 |               << " mismatches out of " << count
41 |               << " elements.\n";
42 |   } else {
43 |     std::cout << "Success.\n";
44 |   }
45 | 
46 |   free(buffer, q);
47 |   return 0;
48 | }
49 | 


--------------------------------------------------------------------------------
/samples/Ch21_migrating_cuda_code/fig_21_8_barriers.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <iostream>
 6 | #include <sycl/sycl.hpp>
 7 | using namespace sycl;
 8 | 
 9 | int main() {
10 |   queue q{property::queue::in_order()};
11 |   std::cout << "Running on device: "
12 |             << q.get_device().get_info<info::device::name>()
13 |             << "\n";
14 | 
15 |   // BEGIN CODE SNIP
16 |   q.parallel_for(nd_range<1>{16, 16}, [=](auto item) {
17 |      // Equivalent of __syncthreads, or
18 |      // this_thread_block().sync():
19 |      group_barrier(item.get_group());
20 | 
21 |      // Equivalent of __syncwarp, or
22 |      // tiled_partition<32>(this_thread_block()).sync():
23 |      group_barrier(item.get_sub_group());
24 |    }).wait();
25 |   // END CODE SNIP
26 | 
27 |   std::cout << "Success.\n";
28 |   return 0;
29 | }
30 | 


--------------------------------------------------------------------------------
/samples/Ch21_migrating_cuda_code/fig_21_9_atomics.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <iostream>
 6 | #include <sycl/sycl.hpp>
 7 | using namespace sycl;
 8 | 
 9 | constexpr int count = 1024 * 1024;
10 | 
11 | int main() {
12 |   queue q{property::queue::in_order()};
13 |   std::cout << "Running on device: "
14 |             << q.get_device().get_info<info::device::name>()
15 |             << "\n";
16 | 
17 |   int* buffer = malloc_device<int>(1, q);
18 |   q.fill(buffer, 0, 1);
19 | 
20 |   // BEGIN CODE SNIP
21 |   q.parallel_for(count, [=](auto id) {
22 |     // The SYCL atomic_ref must specify the default order
23 |     // and default scope as part of the atomic_ref type. To
24 |     // match the behavior of the CUDA atomicAdd we want a
25 |     // relaxed atomic with device scope:
26 |     atomic_ref<int, memory_order::relaxed,
27 |                memory_scope::device>
28 |         aref(*buffer);
29 | 
30 |     // When no memory order is specified, the defaults are
31 |     // used:
32 |     aref.fetch_add(1);
33 | 
34 |     // We can also specify the memory order and scope as
35 |     // part of the atomic operation:
36 |     aref.fetch_add(1, memory_order::relaxed,
37 |                    memory_scope::device);
38 |   });
39 |   // END CODE SNIP
40 | 
41 |   int test = -1;
42 |   q.copy(buffer, &test, 1).wait();
43 | 
44 |   if (test != 2 * count) {
45 |     std::cout << "Found " << test << ", wanted "
46 |               << 2 * count << ".\n";
47 |   } else {
48 |     std::cout << "Success.\n";
49 |   }
50 | 
51 |   free(buffer, q);
52 |   return 0;
53 | }
54 | 


--------------------------------------------------------------------------------
/samples/Epilogue_future_direction_of_sycl/fig_ep_1_mdspan.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <experimental/mdspan>
 6 | #include <sycl/sycl.hpp>
 7 | 
 8 | using namespace sycl;
 9 | namespace stdex = std::experimental;
10 | 
11 | int main() {
12 |   // BEGIN CODE SNIP
13 |   queue q;
14 |   constexpr int N = 4;
15 |   constexpr int M = 2;
16 |   int* data = malloc_shared<int>(N * M, q);
17 | 
18 |   stdex::mdspan<int, N, M> view{data};
19 |   q.parallel_for(range<2>{N, M}, [=](id<2> idx) {
20 |      int i = idx[0];
21 |      int j = idx[1];
22 |      view(i, j) = i * M + j;
23 |    }).wait();
24 |   // END CODE SNIP
25 | 
26 |   bool passed = true;
27 |   for (int i = 0; i < N; ++i) {
28 |     for (int j = 0; j < M; ++j) {
29 |       if (data[i * M + j] != i * M + j) {
30 |         passed = false;
31 |       }
32 |     }
33 |   }
34 |   std::cout << ((passed) ? "SUCCESS" : "FAILURE") << "\n";
35 | 
36 |   free(data, q);
37 |   return (passed) ? 0 : 1;
38 | }
39 | 


--------------------------------------------------------------------------------
/samples/Epilogue_future_direction_of_sycl/fig_ep_2.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | // These ".hpp" files are text from the book that are
 6 | // snippets that are not set up to be compiled as is.
 7 | 
 8 | // Function expects one vector argument (x) and one scalar
 9 | // argument (n)
10 | simd<float, 8> scale(simd<float, 8> x, float n) {
11 |   return x * n;
12 | }
13 | 
14 | q.parallel_for(..., sycl::nd_item<1> it)
15 |     [[sycl::reqd_sub_group_size(8)]] {
16 |   // In SPMD code, each work-item has its own x and n
17 |   // variables
18 |   float x = ...;
19 |   float n = ...;
20 | 
21 |   // Invoke SIMD function (scale) using work-items in the
22 |   // sub-group x values from each work-item are combined
23 |   // into a simd<float, 8>
24 |   // The value of n is defined to be the
25 |   // same (uniform) across all work-items
26 |   // Returned simd<float, 8> is unpacked
27 |   sycl::sub_group sg = it.get_sub_group();
28 |   float y = invoke_simd(sg, scale, x, uniform(n));
29 | });
30 | 


--------------------------------------------------------------------------------
/samples/Epilogue_future_direction_of_sycl/fig_ep_3_device_constexpr.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023 Intel Corporation
 2 | 
 3 | // SPDX-License-Identifier: MIT
 4 | 
 5 | #include <sycl/sycl.hpp>
 6 | 
 7 | using namespace sycl;
 8 | 
 9 | int main() {
10 |   queue q;
11 | 
12 |   q.submit([&](handler& h) {
13 |      stream out(9, 9, h);
14 |      // BEGIN CODE SNIP
15 |      h.parallel_for(range{1}, [=](id<1> idx) {
16 |        if_device_has<aspect::cpu>([&]() {
17 |          /* Code specialized for CPUs */
18 |          out << "On a CPU!" << endl;
19 |        }).else_if_device_has<aspect::gpu>([&]() {
20 |          /* Code specialized for GPUs */
21 |          out << "On a GPU!" << endl;
22 |        });
23 |      });
24 |      // END CODE SNIP
25 |    }).wait();
26 | }
27 | 


--------------------------------------------------------------------------------