├── .github ├── dependabot.yml └── workflows │ ├── cmake.yml │ └── macos.yml ├── CMakeLists.txt ├── LICENSE ├── LICENSE_FILE_HEADER ├── README.md ├── cmake └── LinAlgConfig.cmake.in ├── compilation_tests ├── CMakeLists.txt └── ctest_common.hpp ├── do-configure ├── examples ├── 01_scale.cpp ├── 02_matrix_vector_product_basic.cpp ├── 03_matrix_vector_product_mixedprec.cpp ├── CMakeLists.txt └── kokkos-based │ ├── CMakeLists.txt │ ├── add_kokkos.cpp │ ├── dot_kokkos.cpp │ ├── dotc_kokkos.cpp │ ├── idx_abs_max_kokkos.cpp │ ├── matrix_vector_product_kokkos.cpp │ ├── scale_kokkos.cpp │ ├── vector_abs_sum_kokkos.cpp │ ├── vector_norm2_kokkos.cpp │ └── vector_sum_of_squares_kokkos.cpp ├── include └── experimental │ ├── __p1673_bits │ ├── abs_if_needed.hpp │ ├── blas1_dot.hpp │ ├── blas1_givens.hpp │ ├── blas1_linalg_add.hpp │ ├── blas1_linalg_copy.hpp │ ├── blas1_linalg_swap.hpp │ ├── blas1_matrix_frob_norm.hpp │ ├── blas1_matrix_inf_norm.hpp │ ├── blas1_matrix_one_norm.hpp │ ├── blas1_scale.hpp │ ├── blas1_vector_abs_sum.hpp │ ├── blas1_vector_idx_abs_max.hpp │ ├── blas1_vector_norm2.hpp │ ├── blas1_vector_sum_of_squares.hpp │ ├── blas2_matrix_rank_1_update.hpp │ ├── blas2_matrix_rank_2_update.hpp │ ├── blas2_matrix_vector_product.hpp │ ├── blas2_matrix_vector_solve.hpp │ ├── blas3_matrix_product.hpp │ ├── blas3_matrix_rank_2k_update.hpp │ ├── blas3_matrix_rank_k_update.hpp │ ├── blas3_triangular_matrix_matrix_solve.hpp │ ├── conj_if_needed.hpp │ ├── conjugate_transposed.hpp │ ├── conjugated.hpp │ ├── imag_if_needed.hpp │ ├── layout_tags.hpp │ ├── layout_triangle.hpp │ ├── linalg_config.h.in │ ├── linalg_execpolicy_mapper.hpp │ ├── macros.hpp │ ├── maybe_static_size.hpp │ ├── packed_layout.hpp │ ├── proxy_reference.hpp │ ├── real_if_needed.hpp │ ├── scaled.hpp │ └── transposed.hpp │ └── linalg ├── lewg-presentation.md ├── make_single_header.py ├── tests ├── CMakeLists.txt ├── kokkos-based │ ├── CMakeLists.txt │ ├── add_kokkos.cpp │ ├── copy_kokkos.cpp │ ├── dot_kokkos.cpp │ ├── dotc_kokkos.cpp │ ├── gemm_C_AB.cpp │ ├── gemm_C_ABT.cpp │ ├── gemm_C_ATB.cpp │ ├── gtest_fixtures.hpp │ ├── gtest_main_kokkos.cpp │ ├── helpers.hpp │ ├── hermitian_matrix_left_product_kokkos.cpp │ ├── hermitian_matrix_rank1_update_kokkos.cpp │ ├── hermitian_matrix_rank2_update_kokkos.cpp │ ├── hermitian_matrix_rank_2k_update_kokkos.cpp │ ├── hermitian_matrix_rank_k_update_kokkos.cpp │ ├── hermitian_matrix_right_product_kokkos.cpp │ ├── idx_abs_max_kokkos.cpp │ ├── matrix_frob_norm_kokkos.cpp │ ├── matrix_inf_norm_kokkos.cpp │ ├── matrix_one_norm_kokkos.cpp │ ├── matrix_rank1_update_kokkos.cpp │ ├── mdspan_to_view.cpp │ ├── overwriting_hermitian_matrix_vector_product.cpp │ ├── overwriting_matrix_vector_product.cpp │ ├── overwriting_symmetric_matrix_vector_product.cpp │ ├── overwriting_triangular_matrix_vector_product.cpp │ ├── scale_rank1_kokkos.cpp │ ├── scale_rank2_kokkos.cpp │ ├── swap_elements_rank1_kokkos.cpp │ ├── swap_elements_rank2_kokkos.cpp │ ├── symmetric_matrix_left_product_kokkos.cpp │ ├── symmetric_matrix_rank1_update_kokkos.cpp │ ├── symmetric_matrix_rank2_update_kokkos.cpp │ ├── symmetric_matrix_rank_2k_update_kokkos.cpp │ ├── symmetric_matrix_rank_k_update_kokkos.cpp │ ├── symmetric_matrix_right_product_kokkos.cpp │ ├── test.cmake │ ├── triangular_matrix_left_product_kokkos.cpp │ ├── triangular_matrix_matrix_left_solve.cpp │ ├── triangular_matrix_matrix_right_solve.cpp │ ├── triangular_matrix_right_product_kokkos.cpp │ ├── triangular_matrix_vector_solve.cpp │ ├── updating_hermitian_matrix_vector_product.cpp │ ├── updating_matrix_vector_product.cpp │ ├── updating_symmetric_matrix_vector_product.cpp │ ├── updating_triangular_matrix_vector_product.cpp │ ├── vector_abs_sum_kokkos.cpp │ ├── vector_norm2_kokkos.cpp │ └── vector_sum_of_squares_kokkos.cpp └── native │ ├── CMakeLists.txt │ ├── abs_if_needed.cpp │ ├── abs_sum.cpp │ ├── add.cpp │ ├── conj_if_needed.cpp │ ├── conjugate_transposed.cpp │ ├── conjugated.cpp │ ├── copy.cpp │ ├── dot.cpp │ ├── gemm.cpp │ ├── gemv.cpp │ ├── gemv_no_ambig.cpp │ ├── ger.cpp │ ├── gerc.cpp │ ├── givens.cpp │ ├── gtest_fixtures.hpp │ ├── hemm.cpp │ ├── her.cpp │ ├── her2.cpp │ ├── her2k.cpp │ ├── herk.cpp │ ├── idx_abs_max.cpp │ ├── imag_if_needed.cpp │ ├── matrix_inf_norm.cpp │ ├── matrix_one_norm.cpp │ ├── mixed_accessors.cpp │ ├── my_numbers.hpp │ ├── norm2.cpp │ ├── proxy_refs.cpp │ ├── real_if_needed.cpp │ ├── scale.cpp │ ├── scaled.cpp │ ├── swap.cpp │ ├── symm.cpp │ ├── syr.cpp │ ├── syr2.cpp │ ├── syr2k.cpp │ ├── syrk.cpp │ ├── transposed.cpp │ ├── trmm.cpp │ ├── trmv.cpp │ └── trsm.cpp └── tpl-implementations └── include └── experimental ├── __p1673_bits └── kokkos-kernels │ ├── blas1_add_kk.hpp │ ├── blas1_copy_kk.hpp │ ├── blas1_dot_kk.hpp │ ├── blas1_idx_abs_max_kk.hpp │ ├── blas1_matrix_frob_norm_kk.hpp │ ├── blas1_matrix_inf_norm_kk.hpp │ ├── blas1_matrix_one_norm_kk.hpp │ ├── blas1_scale_kk.hpp │ ├── blas1_swap_elements_kk.hpp │ ├── blas1_vector_abs_sum_kk.hpp │ ├── blas1_vector_norm2_kk.hpp │ ├── blas1_vector_sum_of_squares_kk.hpp │ ├── blas2_gemv_kk.hpp │ ├── blas2_hemv_kk.hpp │ ├── blas2_matrix_rank_1_update.hpp │ ├── blas2_matrix_rank_2_update.hpp │ ├── blas2_symv_kk.hpp │ ├── blas2_triangular_mat_vec_product.hpp │ ├── blas2_triangular_matrix_vector_solve.hpp │ ├── blas3_matrix_product_kk.hpp │ ├── blas3_matrix_rank_2k_update.hpp │ ├── blas3_matrix_rank_k_update.hpp │ ├── blas3_overwriting_gemm_kk.hpp │ ├── blas3_triangular_matrix_matrix_solve.hpp │ ├── exec_policy_wrapper_kk.hpp │ ├── kokkos_conjugate.hpp │ ├── mdspan_to_view_mapper_kk.hpp │ ├── parallel_matrix.hpp │ ├── signal_kokkos_impl_called.hpp │ ├── static_extent_match.hpp │ └── triangle.hpp └── linalg_kokkoskernels /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: / 5 | schedule: 6 | interval: "weekly" 7 | -------------------------------------------------------------------------------- /.github/workflows/cmake.yml: -------------------------------------------------------------------------------- 1 | name: CMake 2 | 3 | on: [push, pull_request] 4 | 5 | env: 6 | # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.) 7 | BUILD_TYPE: RelWithDebInfo 8 | 9 | jobs: 10 | build-mdspan: 11 | runs-on: ubuntu-latest 12 | strategy: 13 | fail-fast: false 14 | matrix: 15 | include: 16 | - compiler_driver: g++ 17 | compiler_prefix: /usr/bin 18 | steps: 19 | - name: Create Build Environment 20 | run: cmake -E make_directory mdspan-build 21 | 22 | - name: Check Out 23 | uses: actions/checkout@v4 24 | with: 25 | repository: kokkos/mdspan 26 | path: mdspan-src 27 | 28 | - name: Configure CMake 29 | working-directory: mdspan-build 30 | run: cmake $GITHUB_WORKSPACE/mdspan-src -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DCMAKE_INSTALL_PREFIX=$GITHUB_WORKSPACE/mdspan-install 31 | 32 | - name: Build 33 | working-directory: mdspan-build 34 | run: make 35 | 36 | - name: Install 37 | working-directory: mdspan-build 38 | run: make install 39 | 40 | - name: Upload 41 | uses: actions/upload-artifact@v4 42 | with: 43 | name: mdspan 44 | path: mdspan-install 45 | 46 | build-stdblas: 47 | runs-on: ubuntu-latest 48 | needs: build-mdspan 49 | 50 | steps: 51 | - name: Download mdspan 52 | uses: actions/download-artifact@v4 53 | with: 54 | name: mdspan 55 | path: mdspan-install 56 | 57 | - name: Create Build Environment 58 | run: cmake -E make_directory stdblas-build 59 | 60 | - name: Check Out 61 | uses: actions/checkout@v4 62 | with: 63 | path: stdblas-src 64 | 65 | - name: Configure CMake 66 | shell: bash 67 | working-directory: stdblas-build 68 | run: cmake $GITHUB_WORKSPACE/stdblas-src -Dmdspan_DIR=$GITHUB_WORKSPACE/mdspan-install/lib/cmake/mdspan -DLINALG_ENABLE_TESTS=On -DLINALG_ENABLE_EXAMPLES=On -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DCMAKE_INSTALL_PREFIX=$GITHUB_WORKSPACE/stdblas-install 69 | 70 | - name: Build 71 | working-directory: stdblas-build 72 | shell: bash 73 | run: make 74 | 75 | - name: Tar files 76 | shell: bash 77 | run: tar -cvf stdblas.tar * 78 | 79 | - name: Upload workspace 80 | uses: actions/upload-artifact@v4 81 | with: 82 | name: stdblas 83 | path: stdblas.tar 84 | 85 | test-stdBLAS: 86 | runs-on: ubuntu-latest 87 | needs: build-stdblas 88 | 89 | steps: 90 | 91 | - name: Download workspace 92 | uses: actions/download-artifact@v4 93 | with: 94 | name: stdblas 95 | path: . 96 | 97 | - name: Untar files 98 | shell: bash 99 | run: tar -xvf stdblas.tar 100 | 101 | - name: Test 102 | working-directory: stdblas-build 103 | shell: bash 104 | run: ctest --output-on-failure 105 | 106 | - name: Install 107 | working-directory: stdblas-build 108 | shell: bash 109 | run: make install 110 | -------------------------------------------------------------------------------- /.github/workflows/macos.yml: -------------------------------------------------------------------------------- 1 | name: MacOS 2 | 3 | on: [push, pull_request] 4 | 5 | env: 6 | # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.) 7 | BUILD_TYPE: RelWithDebInfo 8 | 9 | jobs: 10 | osx-ci: 11 | runs-on: [macos-latest] 12 | 13 | steps: 14 | - name: Check Out mdspan 15 | uses: actions/checkout@v4 16 | with: 17 | repository: kokkos/mdspan 18 | path: mdspan-src 19 | 20 | - name: create directories 21 | run: cmake -E make_directory mdspan-build stdblas-build 22 | 23 | - name: Configure mdspan 24 | working-directory: mdspan-build 25 | run: cmake -S $GITHUB_WORKSPACE/mdspan-src -B $GITHUB_WORKSPACE/mdspan-build -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DCMAKE_INSTALL_PREFIX=$GITHUB_WORKSPACE/mdspan-install 26 | 27 | - name: Build mdspan 28 | working-directory: mdspan-build 29 | run: cmake --build $GITHUB_WORKSPACE/mdspan-build -j 3 30 | 31 | - name: Install mdspan 32 | working-directory: mdspan-build 33 | run: cmake --install $GITHUB_WORKSPACE/mdspan-build 34 | 35 | - name: Check Out 36 | uses: actions/checkout@v4 37 | with: 38 | path: stdblas-src 39 | 40 | - name: Configure stdblas 41 | shell: bash 42 | working-directory: stdblas-build 43 | run: cmake -S $GITHUB_WORKSPACE/stdblas-src -B $GITHUB_WORKSPACE/stdblas-build -Dmdspan_ROOT=$GITHUB_WORKSPACE/mdspan-install/ -DLINALG_ENABLE_TESTS=On -DLINALG_ENABLE_EXAMPLES=On -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DCMAKE_INSTALL_PREFIX=$GITHUB_WORKSPACE/stdblas-install 44 | 45 | - name: Build stdblas 46 | working-directory: stdblas-build 47 | shell: bash 48 | run: cmake --build $GITHUB_WORKSPACE/stdblas-build -j 3 49 | 50 | - name: Test stdblas 51 | working-directory: stdblas-build 52 | shell: bash 53 | run: ctest --output-on-failure 54 | 55 | - name: Install stdblas 56 | working-directory: stdblas-build 57 | shell: bash 58 | run: cmake --install $GITHUB_WORKSPACE/stdblas-build 59 | -------------------------------------------------------------------------------- /LICENSE_FILE_HEADER: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | //@HEADER 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # P1673 reference implementation 2 | 3 | This is a reference implementation of P1673, 4 | "A free function linear algebra interface based on the BLAS." 5 | You can find the latest submitted revision of P1673 6 | [at this URL](https://wg21.link/p1673). 7 | 8 | ## Requirements 9 | 10 | - CMake >= 3.17 (earlier versions may work, but are not tested) 11 | - C++ build environment that supports C++17 or greater 12 | 13 | ## Tested compilers 14 | 15 | We run github's automated tests on every pull request. 16 | Automated tests use "ubuntu-latest", 17 | which presumably defaults to a fairly new GCC. 18 | Other compilers, including MSVC 2019, have been tested in the past. 19 | 20 | ## Brief build instructions 21 | 22 | 1. Download and install googletest (GTest) 23 | - https://github.com/google/googletest 24 | 2. Download and install mdspan: 25 | - git@github.com:kokkos/mdspan.git 26 | 3. Run CMake, pointing it to your googletest and mdspan install locations 27 | - If you want to build tests, set LINALG_ENABLE_TESTS=ON 28 | - If you want to build examples, set LINALG_ENABLE_EXAMPLES=ON 29 | - If you have a BLAS installation, set LINALG_ENABLE_BLAS=ON. 30 | BLAS support is currently experimental. 31 | - If you have a TBB (Threading Building Blocks) installation 32 | and want to use TBB, set LINALG_ENABLE_TBB=ON (and optionally 33 | set TBB_DIR to the lib/cmake/TBB subdirectory of your TBB installation, 34 | or wherever the TBBConfig.cmake file happens to live). 35 | TBB support is currently experimental. 36 | 4. Build and install as usual 37 | 5. If you enabled tests, use "ctest" to run them 38 | 39 | ## More detailed MSVC build instructions 40 | 41 | Be sure to build mdspan and googletest in the Release configuration before installing. 42 | 43 | The following CMake options are known to work: 44 | 45 | - mdspan_DIR=${MDSPAN_INSTALL_DIR}\lib\cmake\mdspan 46 | (where MDSPAN_INSTALL_DIR is the path to your mdspan installation) 47 | - GTEST_INCLUDE_DIR=${GTEST_INSTALL_DIR}\include 48 | (where GTEST_INSTALL_DIR is the path to your googletest installation) 49 | - GTEST_LIBRARY=${GTEST_INSTALL_DIR}\lib\gtest.lib 50 | - GTEST_MAIN_LIBRARY=${GTEST_INSTALL_DIR}\lib\gtest_main.lib 51 | 52 | When building tests, for all CMAKE_CXX_FLAGS_* options, 53 | you might need to change "/MD" to "/MT", depending on how googletest was built. 54 | -------------------------------------------------------------------------------- /cmake/LinAlgConfig.cmake.in: -------------------------------------------------------------------------------- 1 | @PACKAGE_INIT@ 2 | 3 | include("${CMAKE_CURRENT_LIST_DIR}/linalgTargets.cmake") 4 | -------------------------------------------------------------------------------- /compilation_tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | macro(add_compilation_test name) 3 | add_executable(${name} ${name}.cpp) 4 | target_link_libraries(${name} linalg) 5 | endmacro() 6 | 7 | #add_compilation_test(ctest_thingy) 8 | 9 | -------------------------------------------------------------------------------- /compilation_tests/ctest_common.hpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | #include 19 | 20 | #include 21 | 22 | #pragma once 23 | 24 | #define MDSPAN_STATIC_TEST(...) \ 25 | static_assert(__VA_ARGS__, "MDSpan compile time test failed at " __FILE__ ":" MDSPAN_PP_STRINGIFY(__LINE__)) 26 | 27 | 28 | // All tests need a main so that they'll link 29 | int main() { } 30 | -------------------------------------------------------------------------------- /do-configure: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Requirements: 4 | # 5 | # - CMake >= 3.12 6 | # - C++14 or greater compiler (requirement of mdspan) 7 | 8 | # Build instructions 9 | # 10 | # 1. Download and install googletest 11 | # - https://github.com/google/googletest 12 | # - (master appears to work) 13 | # 2. Download and install mdspan: 14 | # - git@github.com:kokkos/mdspan.git 15 | 16 | # Set this to the path of your stdBLAS source directory. 17 | SRC_DIR=$HOME/Documents/Code/CPP/src/stdBLAS 18 | 19 | # Set this to the root directory of the place where you 20 | # installed googletest and mdspan. 21 | INSTALL_ROOT=$HOME/Documents/Code/CPP/install 22 | 23 | cmake \ 24 | -D CMAKE_INSTALL_PREFIX=${INSTALL_ROOT}/stdBLAS \ 25 | -D CMAKE_PREFIX_PATH="${INSTALL_ROOT}/mdspan;${INSTALL_ROOT}/googletest" \ 26 | -D LINALG_ENABLE_TESTS:BOOL=ON \ 27 | -D LINALG_ENABLE_EXAMPLES:BOOL=ON \ 28 | ${SRC_DIR} 29 | 30 | -------------------------------------------------------------------------------- /examples/01_scale.cpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | // Examples currently use parentheses (e.g., A(i,j)) 19 | // for the array access operator, 20 | // instead of square brackets (e.g., A[i,j]). 21 | // This must be defined before including any mdspan headers. 22 | #define MDSPAN_USE_PAREN_OPERATOR 1 23 | 24 | #include 25 | #include "experimental/__p2630_bits/submdspan.hpp" 26 | #include 27 | #include 28 | #include 29 | 30 | #ifdef LINALG_HAS_EXECUTION 31 | # include 32 | #endif 33 | 34 | namespace MdSpan = MDSPAN_IMPL_STANDARD_NAMESPACE; 35 | namespace LinearAlgebra = MDSPAN_IMPL_STANDARD_NAMESPACE :: MDSPAN_IMPL_PROPOSED_NAMESPACE :: linalg; 36 | 37 | using MdSpan::mdspan; 38 | using MdSpan::extents; 39 | #if defined(__cpp_lib_span) 40 | #include 41 | using std::dynamic_extent; 42 | #else 43 | using MdSpan::dynamic_extent; 44 | #endif 45 | 46 | int main(int argc, char* argv[]) { 47 | std::cout << "Scale" << std::endl; 48 | int N = 40; 49 | { 50 | // Create Data 51 | std::vector x_vec(N); 52 | 53 | // Create and initialize mdspan 54 | // 55 | // With CTAD working we could do the following. 56 | // GCC 11.1 works but some other compilers are buggy. 57 | // 58 | // mdspan x(x_vec.data(), N); 59 | mdspan> x(x_vec.data(), N); 60 | for (int i = 0; i < x.extent(0); ++i) { 61 | x(i) = i; 62 | } 63 | 64 | // Call linalg::scale x = 2.0*x; 65 | LinearAlgebra::scale(2.0, x); 66 | #ifdef LINALG_HAS_EXECUTION 67 | LinearAlgebra::scale(std::execution::par, 2.0, x); 68 | #else 69 | LinearAlgebra::scale(2.0, x); 70 | #endif 71 | 72 | for (int i = 0; i < x.extent(0); i += 5) { 73 | std::cout << i << " " << x(i) << std::endl; 74 | } 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /examples/02_matrix_vector_product_basic.cpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | // Examples currently use parentheses (e.g., A(i,j)) 19 | // for the array access operator, 20 | // instead of square brackets (e.g., A[i,j]). 21 | // This must be defined before including any mdspan headers. 22 | #define MDSPAN_USE_PAREN_OPERATOR 1 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | #ifdef LINALG_HAS_EXECUTION 30 | # include 31 | #endif 32 | 33 | namespace MdSpan = MDSPAN_IMPL_STANDARD_NAMESPACE; 34 | namespace LinearAlgebra = MDSPAN_IMPL_STANDARD_NAMESPACE :: MDSPAN_IMPL_PROPOSED_NAMESPACE :: linalg; 35 | 36 | using MdSpan::mdspan; 37 | using MdSpan::extents; 38 | #if defined(__cpp_lib_span) 39 | #include 40 | using std::dynamic_extent; 41 | #else 42 | using MdSpan::dynamic_extent; 43 | #endif 44 | 45 | int main(int argc, char* argv[]) { 46 | std::cout << "Matrix Vector Product Basic" << std::endl; 47 | int N = 40, M = 20; 48 | { 49 | // Create Data 50 | std::vector A_vec(N*M); 51 | std::vector x_vec(M); 52 | std::vector y_vec(N); 53 | 54 | // Create and initialize mdspan 55 | // Would look simple with CTAD, GCC 11.1 works but some others are buggy 56 | mdspan> A(A_vec.data(), N, M); 57 | mdspan> x(x_vec.data(), M); 58 | mdspan> y(y_vec.data(), N); 59 | for (int i = 0; i < A.extent(0); ++i) { 60 | for (int j = 0; j < A.extent(1); ++j) { 61 | A(i,j) = 100.0 * i + j; 62 | } 63 | } 64 | for (int i = 0; i < x.extent(0); ++i) { 65 | x(i) = 1.0 * i; 66 | } 67 | for (int i = 0; i < y.extent(0); ++i) { 68 | y(i) = -1.0 * i; 69 | } 70 | 71 | // y = A * x 72 | LinearAlgebra::matrix_vector_product(A, x, y); 73 | 74 | // y = 0.5 * y + 2 * A * x 75 | #ifdef LINALG_HAS_EXECUTION 76 | LinearAlgebra::matrix_vector_product(std::execution::par, 77 | LinearAlgebra::scaled(2.0, A), x, 78 | LinearAlgebra::scaled(0.5, y), y); 79 | #else 80 | LinearAlgebra::matrix_vector_product( 81 | LinearAlgebra::scaled(2.0, A), x, 82 | LinearAlgebra::scaled(0.5, y), y); 83 | #endif 84 | for (int i = 0; i < y.extent(0); i += 5) { 85 | std::cout << i << " " << y(i) << std::endl; 86 | } 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /examples/03_matrix_vector_product_mixedprec.cpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | // Examples currently use parentheses (e.g., A(i,j)) 19 | // for the array access operator, 20 | // instead of square brackets (e.g., A[i,j]). 21 | // This must be defined before including any mdspan headers. 22 | #define MDSPAN_USE_PAREN_OPERATOR 1 23 | 24 | #include 25 | #include "experimental/__p2630_bits/submdspan.hpp" 26 | #include 27 | #include 28 | #include 29 | 30 | namespace MdSpan = MDSPAN_IMPL_STANDARD_NAMESPACE; 31 | namespace LinearAlgebra = MDSPAN_IMPL_STANDARD_NAMESPACE :: MDSPAN_IMPL_PROPOSED_NAMESPACE :: linalg; 32 | 33 | using MdSpan::mdspan; 34 | using MdSpan::extents; 35 | using MdSpan::full_extent; 36 | using MdSpan::submdspan; 37 | #if defined(__cpp_lib_span) 38 | #include 39 | using std::dynamic_extent; 40 | #else 41 | using MdSpan::dynamic_extent; 42 | #endif 43 | 44 | int main(int argc, char* argv[]) { 45 | std::cout << "Matrix Vector Product MixedPrec" << std::endl; 46 | int M = 40; 47 | { 48 | // Create Data 49 | std::vector A_vec(M * 8 * 4); 50 | std::vector x_vec(M * 4); 51 | std::vector y_vec(M * 8); 52 | 53 | // Create and initialize mdspan 54 | mdspan> A(A_vec.data(), M); 55 | mdspan> x(x_vec.data(), M); 56 | mdspan> y(y_vec.data(), M); 57 | for (int m = 0; m < A.extent(0); ++m) { 58 | for (int i = 0; i < A.extent(1); ++i) { 59 | for (int j = 0; j < A.extent(2); ++j) { 60 | A(m,i,j) = 1000.0 * m + 100.0 * i + j; 61 | } 62 | } 63 | } 64 | for (int i = 0; i < x.extent(0); ++i) { 65 | for (int m = 0; m < x.extent(1); ++m) { 66 | x(i,m) = 33.0 * i + 0.33 * m; 67 | } 68 | } 69 | for (int m = 0; m < y.extent(0); ++m) { 70 | for (int i = 0; i < y.extent(1); ++i) { 71 | y(m,i) = 33.0 * m + 0.33 * i; 72 | } 73 | } 74 | 75 | for (int m = 0; m < M; ++m) { 76 | auto A_m = submdspan(A, m, full_extent, full_extent); 77 | auto x_m = submdspan(x, full_extent, m); 78 | auto y_m = submdspan(y, m, full_extent); 79 | // y_m = A * x_m 80 | LinearAlgebra::matrix_vector_product(A_m, x_m, y_m); 81 | } 82 | 83 | for (int i = 0; i < y.extent(0); i += 5) { 84 | std::cout << i << " " << y(i,1) << std::endl; 85 | } 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | function(linalg_add_example EXENAME) 3 | add_executable(${EXENAME} ${EXENAME}.cpp) 4 | target_link_libraries(${EXENAME} linalg) 5 | endfunction(linalg_add_example) 6 | 7 | linalg_add_example(01_scale) 8 | linalg_add_example(02_matrix_vector_product_basic) 9 | linalg_add_example(03_matrix_vector_product_mixedprec) 10 | 11 | if(LINALG_ENABLE_KOKKOS) 12 | add_subdirectory(kokkos-based) 13 | endif() 14 | -------------------------------------------------------------------------------- /examples/kokkos-based/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | linalg_add_example(add_kokkos) 3 | linalg_add_example(dot_kokkos) 4 | linalg_add_example(dotc_kokkos) 5 | linalg_add_example(idx_abs_max_kokkos) 6 | linalg_add_example(vector_norm2_kokkos) 7 | linalg_add_example(vector_abs_sum_kokkos) 8 | linalg_add_example(vector_sum_of_squares_kokkos) 9 | linalg_add_example(scale_kokkos) 10 | linalg_add_example(matrix_vector_product_kokkos) 11 | -------------------------------------------------------------------------------- /examples/kokkos-based/add_kokkos.cpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | #include 19 | #include 20 | 21 | template 22 | void print_elements(const T1 & v, const std::vector & gold) 23 | { 24 | for(std::size_t i=0; i x_view("x",N); 42 | Kokkos::View y_view("y",N); 43 | Kokkos::View z_view("z",N); 44 | 45 | value_type* x_ptr = x_view.data(); 46 | value_type* y_ptr = y_view.data(); 47 | value_type* z_ptr = z_view.data(); 48 | 49 | using dyn_1d_ext_type = std::experimental::extents; 50 | using mdspan_type = std::experimental::mdspan; 51 | mdspan_type x(x_ptr,N); 52 | mdspan_type y(y_ptr,N); 53 | mdspan_type z(z_ptr,N); 54 | 55 | std::vector gold(N); 56 | for(std::size_t i=0; i(10); 59 | z(i) = 0; 60 | gold[i] = x(i) + y(i); 61 | } 62 | 63 | namespace stdla = std::experimental::linalg; 64 | const value_type init_value = 2.0; 65 | 66 | { 67 | // This goes to the base implementation 68 | stdla::add(std::execution::seq, x, y, z); 69 | } 70 | 71 | { 72 | // reset z since it is modified above 73 | for(std::size_t i=0; i(), x,y,z); 77 | print_elements(z, gold); 78 | } 79 | 80 | } 81 | Kokkos::finalize(); 82 | } 83 | -------------------------------------------------------------------------------- /examples/kokkos-based/dot_kokkos.cpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | #include 19 | #include 20 | 21 | int main(int argc, char* argv[]) 22 | { 23 | std::cout << "dot example: calling kokkos-kernels" << std::endl; 24 | 25 | std::size_t N = 50; 26 | Kokkos::initialize(argc,argv); 27 | { 28 | using value_type = double; 29 | 30 | Kokkos::View a_view("A",N); 31 | Kokkos::View b_view("B",N); 32 | value_type* a_ptr = a_view.data(); 33 | value_type* b_ptr = b_view.data(); 34 | 35 | using dyn_1d_ext_type = std::experimental::extents; 36 | using mdspan_type = std::experimental::mdspan; 37 | mdspan_type a(a_ptr,N); 38 | mdspan_type b(b_ptr,N); 39 | for(std::size_t i=0; i(), a, b, init_value); 53 | printf("Kokkos result = %lf\n", res_kk); 54 | } 55 | Kokkos::finalize(); 56 | } 57 | -------------------------------------------------------------------------------- /examples/kokkos-based/dotc_kokkos.cpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | #include 19 | #include 20 | 21 | int main(int argc, char* argv[]) 22 | { 23 | std::cout << "dotc example: calling kokkos-kernels" << std::endl; 24 | 25 | std::size_t N = 10; 26 | Kokkos::initialize(argc,argv); 27 | { 28 | using value_type = std::complex; 29 | using view_t = Kokkos::View; 30 | view_t a_view("A",N); 31 | view_t b_view("B",N); 32 | value_type* a_ptr = a_view.data(); 33 | value_type* b_ptr = b_view.data(); 34 | 35 | using dyn_1d_ext_type = std::experimental::extents; 36 | using mdspan_type = std::experimental::mdspan; 37 | mdspan_type a(a_ptr,N); 38 | mdspan_type b(b_ptr,N); 39 | for(std::size_t i=0; i(i); 42 | const value_type a_i(i_double + 1.0, i_double + 1.0); 43 | const value_type b_i(i_double - 2.0, i_double - 2.0); 44 | a(i) = a_i; 45 | b(i) = b_i; 46 | } 47 | 48 | namespace stdla = std::experimental::linalg; 49 | const value_type init_value(2., 3.); 50 | 51 | // This goes to the base implementation 52 | const auto res_seq = stdla::dotc(std::execution::seq, a, b, init_value); 53 | std::cout << "Seq result = " << res_seq << "\n"; 54 | 55 | // This forwards to KokkosKernels 56 | const auto res_kk = stdla::dotc(KokkosKernelsSTD::kokkos_exec<>(), a, b, init_value); 57 | std::cout << "Kokkos result = " << res_kk << "\n"; 58 | } 59 | Kokkos::finalize(); 60 | } 61 | -------------------------------------------------------------------------------- /examples/kokkos-based/idx_abs_max_kokkos.cpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | #include 19 | #include 20 | 21 | namespace stdexp = std::experimental; 22 | namespace stdla = stdexp::linalg; 23 | using value_type = double; 24 | 25 | void run_trivial_example() 26 | { 27 | std::array arr; 28 | using extents_type = stdexp::extents; 29 | stdexp::mdspan a(arr.data(),0); 30 | 31 | const auto idx = stdla::vector_idx_abs_max(std::execution::seq, a); 32 | std::cout << "Sequen result = " << idx << '\n'; 33 | 34 | const auto idx_kk = stdla::vector_idx_abs_max(KokkosKernelsSTD::kokkos_exec<>(), a); 35 | std::cout << "Kokkos result = " << idx_kk << '\n'; 36 | } 37 | 38 | void run_nontrivial_example() 39 | { 40 | std::size_t N = 10; 41 | 42 | Kokkos::View a_view("A",N); 43 | value_type* a_ptr = a_view.data(); 44 | 45 | using extents_type = stdexp::extents; 46 | stdexp::mdspan a(a_ptr,N); 47 | a(0) = 0.5; 48 | a(1) = 0.2; 49 | a(2) = 0.1; 50 | a(3) = 0.4; 51 | a(4) = -0.8; 52 | a(5) = -1.7; 53 | a(6) = -0.3; 54 | a(7) = 0.5; 55 | a(8) = -1.7; 56 | a(9) = -0.9; 57 | 58 | // This goes to the base implementation 59 | const auto idx = stdla::vector_idx_abs_max(std::execution::seq, a); 60 | std::cout << "Sequen result = " << idx << '\n'; 61 | 62 | // This forwards to KokkosKernels (https://github.com/kokkos/kokkos-kernels 63 | const auto idx_kk = stdla::vector_idx_abs_max(KokkosKernelsSTD::kokkos_exec<>(), a); 64 | std::cout << "Kokkos result = " << idx_kk << '\n'; 65 | } 66 | 67 | int main(int argc, char* argv[]) 68 | { 69 | std::cout << "vector_idx_abs_max example: calling kokkos-kernels" << std::endl; 70 | 71 | Kokkos::initialize(argc,argv); 72 | { 73 | run_trivial_example(); 74 | run_nontrivial_example(); 75 | } 76 | Kokkos::finalize(); 77 | } 78 | -------------------------------------------------------------------------------- /examples/kokkos-based/matrix_vector_product_kokkos.cpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | #include 19 | 20 | #include 21 | 22 | using namespace std::experimental; 23 | 24 | int main(int argc, char* argv[]) { 25 | std::cout << "MatrixVectorProduct" << std::endl; 26 | int N = 10; 27 | int M = 20; 28 | Kokkos::initialize(argc,argv); 29 | { 30 | Kokkos::View x_view("X",M); 31 | Kokkos::View y_view("Y",N); 32 | Kokkos::View A_view("A",N,M); 33 | 34 | { 35 | // example for y = A * x 36 | 37 | Kokkos::deep_copy(x_view,1.0); 38 | Kokkos::deep_copy(A_view,2.0); 39 | 40 | // std::experimental::mdspan a(a_ptr,N); // Requires CDAT 41 | mdspan> x(x_view.data(),M); 42 | mdspan> y(y_view.data(),N); 43 | mdspan> A(A_view.data(),N,M); 44 | 45 | // This forwards to KokkosKernels (https://github.com/kokkos/kokkos-kernels 46 | linalg::matrix_vector_product(KokkosKernelsSTD::kokkos_exec<>(),A,x,y); 47 | // This forwards to KokkosKernels if LINALG_ENABLE_KOKKOS_DEFAULT is ON 48 | linalg::matrix_vector_product(A,x,y); 49 | linalg::matrix_vector_product(std::execution::par,A,linalg::scaled(2.0,x),y); 50 | // This goes to the base implementation 51 | //linalg::matrix_vector_product(std::execution::seq,A,x,y); 52 | 53 | // note that this prints 80 for each element because of the scale(2.0, x) above 54 | for(int i=0; i z_view("Z",N); 61 | Kokkos::deep_copy(x_view,1.0); 62 | Kokkos::deep_copy(y_view,2.0); 63 | Kokkos::deep_copy(z_view,0.0); 64 | Kokkos::deep_copy(A_view,1.0); 65 | 66 | mdspan> x(x_view.data(),M); 67 | mdspan> y(y_view.data(),N); 68 | mdspan> z(z_view.data(),N); 69 | mdspan> A(A_view.data(),N,M); 70 | 71 | // 1. 72 | linalg::matrix_vector_product(KokkosKernelsSTD::kokkos_exec<>(),A,x,y,z); 73 | // should print 22 74 | for(int i=0; i(), A, x, linalg::scaled(4.,y), z); 79 | // should print 28 80 | for(int i=0; i(), A, linalg::scaled(2., x), linalg::scaled(4.,y), z); 85 | // should print 48 86 | for(int i=0; i 19 | 20 | #include 21 | 22 | 23 | int main(int argc, char* argv[]) 24 | { 25 | std::cout << "dot example: calling kokkos-kernels" << std::endl; 26 | 27 | std::size_t N = 40; 28 | Kokkos::initialize(argc,argv); 29 | { 30 | Kokkos::View a_view("A",N); 31 | double* a_ptr = a_view.data(); 32 | 33 | // Requires CTAD working, GCC 11.1 works but some others are buggy 34 | // std::experimental::mdspan a(a_ptr,N); 35 | std::experimental::mdspan> a(a_ptr,N); 36 | for(std::size_t i=0; i(),2.0,a); 40 | // This forwards to KokkosKernels if LINALG_ENABLE_KOKKOS_DEFAULT is ON 41 | std::experimental::linalg::scale(std::execution::par,2.0,a); 42 | // This goes to the base implementation 43 | std::experimental::linalg::scale(std::execution::seq,2.0,a); 44 | for(std::size_t i=0; i 19 | #include 20 | 21 | int main(int argc, char* argv[]) 22 | { 23 | std::cout << "vector_abs_sum example: calling kokkos-kernels" << std::endl; 24 | 25 | std::size_t N = 20; 26 | Kokkos::initialize(argc,argv); 27 | { 28 | using value_type = double; 29 | Kokkos::View x_view("x",N); 30 | value_type* x_ptr = x_view.data(); 31 | 32 | using dyn_1d_ext_type = std::experimental::extents; 33 | using mdspan_type = std::experimental::mdspan; 34 | mdspan_type x(x_ptr,N); 35 | for(std::size_t i=0; i(-1); 38 | } 39 | else{ 40 | x(i) = static_cast(i); 41 | } 42 | } 43 | 44 | namespace stdla = std::experimental::linalg; 45 | const value_type init_value(2); 46 | 47 | const auto res = stdla::vector_abs_sum(x, init_value); 48 | printf("Default result = %lf\n", res); 49 | 50 | // FRIZZI: Oct 27: kk currently not impl yet, just placeholder to ensure hook forwards correctly 51 | const auto res_kk = stdla::vector_abs_sum(KokkosKernelsSTD::kokkos_exec<>(), x, init_value); 52 | (void)res_kk; 53 | //printf("Kokkos result = %lf\n", res_kk); 54 | 55 | } 56 | Kokkos::finalize(); 57 | } 58 | -------------------------------------------------------------------------------- /examples/kokkos-based/vector_norm2_kokkos.cpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | #include 19 | #include 20 | 21 | int main(int argc, char* argv[]) 22 | { 23 | std::cout << "vector_norm2 example: calling kokkos-kernels" << std::endl; 24 | 25 | std::size_t N = 20; 26 | Kokkos::initialize(argc,argv); 27 | { 28 | using value_type = double; 29 | Kokkos::View x_view("x",N); 30 | value_type* x_ptr = x_view.data(); 31 | 32 | using dyn_1d_ext_type = std::experimental::extents; 33 | using mdspan_type = std::experimental::mdspan; 34 | mdspan_type x(x_ptr,N); 35 | for(std::size_t i=0; i(i); 37 | } 38 | 39 | namespace stdla = std::experimental::linalg; 40 | const value_type init_value(2); 41 | 42 | const auto res = stdla::vector_norm2(x, init_value); 43 | printf("Default result = %lf\n", res); 44 | 45 | const auto res_kk = stdla::vector_norm2(KokkosKernelsSTD::kokkos_exec<>(), x, init_value); 46 | printf("Kokkos result = %lf\n", res_kk); 47 | 48 | } 49 | Kokkos::finalize(); 50 | } 51 | -------------------------------------------------------------------------------- /examples/kokkos-based/vector_sum_of_squares_kokkos.cpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | #include 19 | #include 20 | 21 | int main(int argc, char* argv[]) 22 | { 23 | std::cout << "vector_sum_of_squares example: calling kokkos-kernels" << std::endl; 24 | 25 | std::size_t N = 20; 26 | Kokkos::initialize(argc,argv); 27 | { 28 | using value_type = double; 29 | Kokkos::View x_view("x",N); 30 | value_type* x_ptr = x_view.data(); 31 | 32 | using dyn_1d_ext_type = std::experimental::extents; 33 | using mdspan_type = std::experimental::mdspan; 34 | mdspan_type x(x_ptr,N); 35 | for(std::size_t i=0; i init_value{1., 1.}; 41 | 42 | const auto res = stdla::vector_sum_of_squares(x, init_value); 43 | std::cout << "Default result: " << res.scaling_factor << " " << res.scaled_sum_of_squares << '\n'; 44 | 45 | // FRIZZI: Oct 27: kk currently not impl yet, just placeholder to ensure hook forwards correctly 46 | const auto res_kk = stdla::vector_sum_of_squares(KokkosKernelsSTD::kokkos_exec<>(), x, init_value); 47 | (void)res_kk; 48 | //std::cout << "Kokkos result: " << res_kk.scaling_factor << " " << res_kk.scaled_sum_of_squares << '\n'; 49 | 50 | } 51 | Kokkos::finalize(); 52 | } 53 | -------------------------------------------------------------------------------- /include/experimental/__p1673_bits/abs_if_needed.hpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | #ifndef LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_ABS_IF_NEEDED_HPP_ 19 | #define LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_ABS_IF_NEEDED_HPP_ 20 | 21 | #include 22 | #include 23 | #include 24 | 25 | namespace MDSPAN_IMPL_STANDARD_NAMESPACE { 26 | namespace MDSPAN_IMPL_PROPOSED_NAMESPACE { 27 | inline namespace __p1673_version_0 { 28 | namespace linalg { 29 | namespace impl { 30 | 31 | // E if T is an unsigned integer; 32 | // 33 | // (1.2) otherwise, std::abs(E) if T is an arithmetic type, 34 | // 35 | // (1.3) otherwise, abs(E), if that expression is valid, with overload 36 | // resolution performed in a context that includes the declaration 37 | // template T abs(T) = delete;. If the function selected by 38 | // overload resolution does not return the absolute value of its 39 | // input, the program is ill-formed, no diagnostic required. 40 | 41 | // Inline static variables require C++17. 42 | constexpr inline auto abs_if_needed = [](auto t) 43 | { 44 | using T = std::remove_const_t>; 45 | if constexpr (std::is_arithmetic_v) { 46 | if constexpr (std::is_unsigned_v) { 47 | return t; 48 | } 49 | else { 50 | return std::abs(t); 51 | } 52 | } 53 | else { 54 | return abs(t); 55 | } 56 | }; 57 | 58 | } // end namespace impl 59 | } // end namespace linalg 60 | } // end inline namespace __p1673_version_0 61 | } // end namespace MDSPAN_IMPL_PROPOSED_NAMESPACE 62 | } // end namespace MDSPAN_IMPL_STANDARD_NAMESPACE 63 | 64 | #endif //LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_ABS_IF_NEEDED_HPP_ 65 | -------------------------------------------------------------------------------- /include/experimental/__p1673_bits/conj_if_needed.hpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | #ifndef LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_CONJUGATE_IF_NEEDED_HPP_ 19 | #define LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_CONJUGATE_IF_NEEDED_HPP_ 20 | 21 | #include 22 | #include 23 | 24 | namespace MDSPAN_IMPL_STANDARD_NAMESPACE { 25 | namespace MDSPAN_IMPL_PROPOSED_NAMESPACE { 26 | inline namespace __p1673_version_0 { 27 | namespace linalg { 28 | namespace impl { 29 | 30 | template struct is_complex : std::false_type{}; 31 | 32 | template<> struct is_complex> : std::true_type{}; 33 | template<> struct is_complex> : std::true_type{}; 34 | template<> struct is_complex> : std::true_type{}; 35 | 36 | template inline constexpr bool is_complex_v = is_complex::value; 37 | 38 | template 39 | struct has_conj : std::false_type {}; 40 | 41 | // If I can find unqualified conj via overload resolution, 42 | // then assume that conj(t) returns the conjugate of t. 43 | template 44 | struct has_conj()), void())> : std::true_type {}; 45 | 46 | template 47 | T conj_if_needed_impl(const T& t, std::false_type) 48 | { 49 | return t; 50 | } 51 | 52 | template 53 | auto conj_if_needed_impl(const T& t, std::true_type) 54 | { 55 | if constexpr (std::is_arithmetic_v) { 56 | return t; 57 | } else { 58 | return conj(t); 59 | } 60 | } 61 | 62 | // Inline static variables require C++17. 63 | constexpr inline auto conj_if_needed = [](const auto& t) 64 | { 65 | using T = std::remove_const_t; 66 | return conj_if_needed_impl(t, has_conj{}); 67 | }; 68 | 69 | } // end namespace impl 70 | } // end namespace linalg 71 | } // end inline namespace __p1673_version_0 72 | } // end namespace MDSPAN_IMPL_PROPOSED_NAMESPACE 73 | } // end namespace MDSPAN_IMPL_STANDARD_NAMESPACE 74 | 75 | #endif //LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_CONJUGATE_IF_NEEDED_HPP_ 76 | -------------------------------------------------------------------------------- /include/experimental/__p1673_bits/conjugate_transposed.hpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | #ifndef LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_CONJUGATE_TRANSPOSED_HPP_ 19 | #define LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_CONJUGATE_TRANSPOSED_HPP_ 20 | 21 | #include "conjugated.hpp" 22 | #include "transposed.hpp" 23 | 24 | namespace MDSPAN_IMPL_STANDARD_NAMESPACE { 25 | namespace MDSPAN_IMPL_PROPOSED_NAMESPACE { 26 | inline namespace __p1673_version_0 { 27 | namespace linalg { 28 | 29 | template 30 | auto conjugate_transposed(mdspan a) 31 | { 32 | return conjugated(transposed(a)); 33 | } 34 | 35 | } // end namespace linalg 36 | } // end inline namespace __p1673_version_0 37 | } // end namespace MDSPAN_IMPL_PROPOSED_NAMESPACE 38 | } // end namespace MDSPAN_IMPL_STANDARD_NAMESPACE 39 | 40 | #endif //LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_CONJUGATE_TRANSPOSED_HPP_ 41 | -------------------------------------------------------------------------------- /include/experimental/__p1673_bits/imag_if_needed.hpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | #ifndef LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_IMAG_IF_NEEDED_HPP_ 19 | #define LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_IMAG_IF_NEEDED_HPP_ 20 | 21 | #include 22 | #include 23 | 24 | namespace MDSPAN_IMPL_STANDARD_NAMESPACE { 25 | namespace MDSPAN_IMPL_PROPOSED_NAMESPACE { 26 | inline namespace __p1673_version_0 { 27 | namespace linalg { 28 | namespace impl{ 29 | 30 | template 31 | struct has_imag : std::false_type {}; 32 | 33 | // If I can find unqualified imag via overload resolution, 34 | // then assume that imag(t) returns the imag part of t. 35 | template 36 | struct has_imag()), void())> : std::true_type {}; 37 | 38 | template 39 | T imag_if_needed_impl(const T& t, std::false_type) 40 | { 41 | // If imag(t) can't be ADL-found, then assume 42 | // that T represents a noncomplex number type. 43 | return T{}; 44 | } 45 | 46 | template 47 | auto imag_if_needed_impl(const T& t, std::true_type) 48 | { 49 | if constexpr (std::is_arithmetic_v) { 50 | // Overloads for integers have a return type of double. 51 | // We want to preserve the input type T. 52 | return T{}; 53 | } else { 54 | return imag(t); 55 | } 56 | } 57 | 58 | // Inline static variables require C++17. 59 | constexpr inline auto imag_if_needed = [](const auto& t) 60 | { 61 | using T = std::remove_const_t; 62 | return imag_if_needed_impl(t, has_imag{}); 63 | }; 64 | 65 | } // end namespace impl 66 | } // end namespace linalg 67 | } // end inline namespace __p1673_version_0 68 | } // end namespace MDSPAN_IMPL_PROPOSED_NAMESPACE 69 | } // end namespace MDSPAN_IMPL_STANDARD_NAMESPACE 70 | 71 | #endif //LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_IMAG_IF_NEEDED_HPP_ 72 | -------------------------------------------------------------------------------- /include/experimental/__p1673_bits/layout_tags.hpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | #ifndef LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_LAYOUT_TAGS_HPP_ 19 | #define LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_LAYOUT_TAGS_HPP_ 20 | 21 | #include 22 | 23 | namespace MDSPAN_IMPL_STANDARD_NAMESPACE { 24 | namespace MDSPAN_IMPL_PROPOSED_NAMESPACE { 25 | inline namespace __p1673_version_0 { 26 | namespace linalg { 27 | 28 | // TODO @proposal-bug make sure these can't convert from `{}` 29 | 30 | struct column_major_t { }; 31 | MDSPAN_IMPL_INLINE_VARIABLE constexpr auto column_major = column_major_t{}; 32 | struct row_major_t { }; 33 | MDSPAN_IMPL_INLINE_VARIABLE constexpr auto row_major = row_major_t{}; 34 | 35 | struct upper_triangle_t { }; 36 | MDSPAN_IMPL_INLINE_VARIABLE constexpr auto upper_triangle = upper_triangle_t{}; 37 | struct lower_triangle_t { }; 38 | MDSPAN_IMPL_INLINE_VARIABLE constexpr auto lower_triangle = lower_triangle_t{}; 39 | 40 | struct implicit_unit_diagonal_t { }; 41 | MDSPAN_IMPL_INLINE_VARIABLE constexpr auto implicit_unit_diagonal = implicit_unit_diagonal_t{}; 42 | struct explicit_diagonal_t { }; 43 | MDSPAN_IMPL_INLINE_VARIABLE constexpr auto explicit_diagonal = explicit_diagonal_t{}; 44 | 45 | struct left_side_t { }; 46 | MDSPAN_IMPL_INLINE_VARIABLE constexpr auto left_side = left_side_t{}; 47 | struct right_side_t { }; 48 | MDSPAN_IMPL_INLINE_VARIABLE constexpr auto right_side = right_side_t{}; 49 | 50 | } // end namespace linalg 51 | } // end inline namespace __p1673_version_0 52 | } // end namespace MDSPAN_IMPL_PROPOSED_NAMESPACE 53 | } // end namespace MDSPAN_IMPL_STANDARD_NAMESPACE 54 | 55 | 56 | #endif //LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_LAYOUT_TAGS_HPP_ 57 | -------------------------------------------------------------------------------- /include/experimental/__p1673_bits/layout_triangle.hpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | #ifndef LINALG_INCLUDE_EXPERIMENTAL_BITS_LAYOUT_TRIANGLE_HPP_ 19 | #define LINALG_INCLUDE_EXPERIMENTAL_BITS_LAYOUT_TRIANGLE_HPP_ 20 | 21 | #include "layout_tags.hpp" 22 | 23 | #include 24 | #include 25 | 26 | namespace MDSPAN_IMPL_STANDARD_NAMESPACE { 27 | namespace MDSPAN_IMPL_PROPOSED_NAMESPACE { 28 | inline namespace __p1673_version_0 { 29 | namespace linalg { 30 | 31 | namespace __triangular_layouts_impl { 32 | 33 | template 34 | struct __lower_triangle_layout_impl; 35 | 36 | // FIXME work-around for #4. 37 | #if 0 38 | 39 | // lower triangular offsets are triangular numbers (n*(n+1)/2) 40 | template < 41 | ptrdiff_t ExtLast, ptrdiff_t... Exts, class BaseMap, class LastTwoMap, 42 | size_t... ExtIdxs, size_t... ExtMinus2Idxs 43 | > 44 | struct __lower_triangle_layout_impl< 45 | extents, 46 | BaseMap, LastTwoMap, 47 | std::integer_sequence, 48 | std::integer_sequence 49 | > { 50 | 51 | private: 52 | 53 | static constexpr auto __rank = sizeof...(Exts) + 2; 54 | 55 | _MDSPAN_NO_UNIQUE_ADDRESS LastTwoMap _trimap; 56 | _MDSPAN_NO_UNIQUE_ADDRESS BaseMap _base_map; 57 | 58 | public: 59 | 60 | 61 | template 62 | MDSPAN_FORCE_INLINE_FUNCTION 63 | constexpr ptrdiff_t operator()(Integral... idxs) const noexcept { 64 | auto base_val = _base_map( 65 | [&](size_t N) { 66 | _MDSPAN_FOLD_PLUS_RIGHT(((ExtIdxs == N) ? idx : 0), /* + ... + */ 0) 67 | }(ExtMinus2Idxs)... 68 | ); 69 | auto triang_val = _trimap( 70 | _MDSPAN_FOLD_PLUS_RIGHT(((ExtIdxs == __rank - 2) ? idx : 0), /* + ... + */ 0), 71 | _MDSPAN_FOLD_PLUS_RIGHT(((ExtIdxs == __rank - 1) ? idx : 0), /* + ... + */ 0) 72 | ); 73 | return base_val * triang_val; 74 | } 75 | 76 | }; 77 | 78 | #endif // 0 79 | 80 | } // end namespace __triangular_layouts_impl 81 | 82 | template 83 | class layout_blas_packed; 84 | 85 | } // end namespace linalg 86 | } // end inline namespace __p1673_version_0 87 | } // end namespace MDSPAN_IMPL_PROPOSED_NAMESPACE 88 | } // end namespace MDSPAN_IMPL_STANDARD_NAMESPACE 89 | 90 | #endif //LINALG_INCLUDE_EXPERIMENTAL_BITS_LAYOUT_TRIANGLE_HPP_ 91 | -------------------------------------------------------------------------------- /include/experimental/__p1673_bits/linalg_config.h.in: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #cmakedefine LINALG_ENABLE_ATOMIC_REF 4 | #cmakedefine LINALG_ENABLE_BLAS 5 | #cmakedefine LINALG_ENABLE_CONCEPTS 6 | #cmakedefine LINALG_ENABLE_KOKKOS 7 | #cmakedefine LINALG_ENABLE_KOKKOS_DEFAULT 8 | #cmakedefine LINALG_ENABLE_TBB 9 | #cmakedefine LINALG_FIX_CONJUGATED_FOR_NONCOMPLEX 10 | #cmakedefine LINALG_FIX_RANK_UPDATES 11 | #cmakedefine LINALG_FIX_TRANSPOSED_FOR_PADDED_LAYOUTS 12 | -------------------------------------------------------------------------------- /include/experimental/__p1673_bits/macros.hpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | #ifndef LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_MACROS_HPP_ 19 | #define LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_MACROS_HPP_ 20 | 21 | #include "__p1673_bits/linalg_config.h" 22 | 23 | // Work around a known MSVC issue, that by default 24 | // it always defines __cplusplus as for C++98, 25 | // even if building in a more recent C++ mode. 26 | #ifdef _MSVC_LANG 27 | #define _LINALG_CPLUSPLUS _MSVC_LANG 28 | #else 29 | #define _LINALG_CPLUSPLUS __cplusplus 30 | #endif 31 | 32 | #define _LINALG_CXX_STD_14 201402L 33 | #define _LINALG_CXX_STD_17 201703L 34 | #define _LINALG_CXX_STD_20 202002L 35 | 36 | #define _LINALG_HAS_CXX_14 (_LINALG_CPLUSPLUS >= _LINALG_CXX_STD_14) 37 | #define _LINALG_HAS_CXX_17 (_LINALG_CPLUSPLUS >= _LINALG_CXX_STD_17) 38 | #define _LINALG_HAS_CXX_20 (_LINALG_CPLUSPLUS >= _LINALG_CXX_STD_20) 39 | 40 | static_assert(_LINALG_CPLUSPLUS >= _LINALG_CXX_STD_17, "stdBLAS requires C++17 or later."); 41 | 42 | // A sufficiently recent nvc++ comes with . 43 | // GCC (even 13.1.0) needs TBB, else std::execution::* won't even compile. 44 | // Other compilers like to define __GNUC__ to claim GCC compatibility, 45 | // even if they aren't GCC (and don't have GCC's issue of needing TBB). 46 | #if defined(__NVCOMPILER) 47 | # define LINALG_HAS_EXECUTION 1 48 | #elif ! defined(__clang__) && ! defined(_MSC_VER) && ! defined(_INTEL_COMPILER) && ! defined(_INTEL_LLMV_COMPILER) && defined(__GNUC__) 49 | # if defined(LINALG_ENABLE_TBB) 50 | # define LINALG_HAS_EXECUTION 1 51 | # endif 52 | #elif ! defined(__apple_build_version__) 53 | # define LINALG_HAS_EXECUTION 1 54 | #endif 55 | 56 | #define P1673_MATRIX_EXTENTS_TEMPLATE_PARAMETERS( MATRIX_NAME ) \ 57 | class SizeType_ ## MATRIX_NAME , \ 58 | ::std::size_t numRows_ ## MATRIX_NAME , \ 59 | ::std::size_t numCols_ ## MATRIX_NAME 60 | 61 | #define P1673_MATRIX_TEMPLATE_PARAMETERS( MATRIX_NAME ) \ 62 | class ElementType_ ## MATRIX_NAME , \ 63 | P1673_MATRIX_EXTENTS_TEMPLATE_PARAMETERS( MATRIX_NAME ) , \ 64 | class Layout_ ## MATRIX_NAME , \ 65 | class Accessor_ ## MATRIX_NAME 66 | 67 | #define P1673_MATRIX_EXTENTS_PARAMETER( MATRIX_NAME ) \ 68 | extents< \ 69 | SizeType_ ## MATRIX_NAME , \ 70 | numRows_ ## MATRIX_NAME , \ 71 | numCols_ ## MATRIX_NAME \ 72 | > 73 | 74 | #define P1673_MATRIX_PARAMETER( MATRIX_NAME ) \ 75 | mdspan< \ 76 | ElementType_ ## MATRIX_NAME , \ 77 | P1673_MATRIX_EXTENTS_PARAMETER( MATRIX_NAME ), \ 78 | Layout_ ## MATRIX_NAME , \ 79 | Accessor_ ## MATRIX_NAME \ 80 | > MATRIX_NAME 81 | 82 | #endif //LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_MACROS_HPP_ 83 | -------------------------------------------------------------------------------- /include/experimental/__p1673_bits/maybe_static_size.hpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | #ifndef LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_MAYBE_STATIC_SIZE_HPP_ 19 | #define LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_MAYBE_STATIC_SIZE_HPP_ 20 | 21 | #include 22 | 23 | namespace MDSPAN_IMPL_STANDARD_NAMESPACE { 24 | namespace MDSPAN_IMPL_PROPOSED_NAMESPACE { 25 | inline namespace __p1673_version_0 { 26 | namespace linalg { 27 | namespace impl { 28 | 29 | template 30 | struct __maybe_static_value { 31 | 32 | MDSPAN_INLINE_FUNCTION constexpr 33 | __maybe_static_value(T) noexcept { } 34 | MDSPAN_INLINE_FUNCTION MDSPAN_IMPL_CONSTEXPR_14 35 | __maybe_static_value& operator=(T) noexcept { } 36 | 37 | MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr 38 | __maybe_static_value() noexcept = default; 39 | MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr 40 | __maybe_static_value(__maybe_static_value const&) noexcept = default; 41 | MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr 42 | __maybe_static_value(__maybe_static_value&&) noexcept = default; 43 | MDSPAN_INLINE_FUNCTION_DEFAULTED MDSPAN_IMPL_CONSTEXPR_14_DEFAULTED 44 | __maybe_static_value& operator=(__maybe_static_value const&) noexcept = default; 45 | MDSPAN_INLINE_FUNCTION_DEFAULTED MDSPAN_IMPL_CONSTEXPR_14_DEFAULTED 46 | __maybe_static_value& operator=(__maybe_static_value&&) noexcept = default; 47 | MDSPAN_INLINE_FUNCTION_DEFAULTED 48 | ~__maybe_static_value() = default; 49 | 50 | static constexpr auto value = Value; 51 | static constexpr auto is_static = true; 52 | static constexpr auto value_static = Value; 53 | }; 54 | 55 | template 56 | struct __maybe_static_value { 57 | T value{}; 58 | static constexpr auto is_static = false; 59 | static constexpr auto value_static = DynSentinel; 60 | }; 61 | 62 | template <::std::size_t StaticSize, ::std::size_t Sentinel=dynamic_extent> 63 | using __maybe_static_extent = __maybe_static_value<::std::size_t, StaticSize, Sentinel>; 64 | 65 | } // end namespace impl 66 | } // end namespace linalg 67 | } // end inline namespace __p1673_version_0 68 | } // end namespace MDSPAN_IMPL_PROPOSED_NAMESPACE 69 | } // end namespace MDSPAN_IMPL_STANDARD_NAMESPACE 70 | 71 | #endif //LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_MAYBE_STATIC_SIZE_HPP_ 72 | -------------------------------------------------------------------------------- /include/experimental/__p1673_bits/packed_layout.hpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | #ifndef LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_PACKED_LAYOUT_HPP_ 19 | #define LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_PACKED_LAYOUT_HPP_ 20 | 21 | #include 22 | #include "layout_triangle.hpp" 23 | 24 | namespace MDSPAN_IMPL_STANDARD_NAMESPACE { 25 | namespace MDSPAN_IMPL_PROPOSED_NAMESPACE { 26 | inline namespace __p1673_version_0 { 27 | namespace linalg { 28 | 29 | // TODO declarations need extents-see-returns-below defined 30 | 31 | #if 0 32 | template 38 | constexpr mdspanextents-see-returns-below, 40 | layout_blas_packed< 41 | Triangle, 42 | StorageOrder>, 43 | Accessor> 44 | packed( 45 | const mdspan& m, 46 | typename mdspan::index_type num_rows, 47 | Triangle, 48 | StorageOrder); 49 | #endif // 0 50 | 51 | } // end namespace linalg 52 | } // end inline namespace __p1673_version_0 53 | } // end namespace MDSPAN_IMPL_PROPOSED_NAMESPACE 54 | } // end namespace MDSPAN_IMPL_STANDARD_NAMESPACE 55 | 56 | #endif //LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_PACKED_LAYOUT_HPP_ 57 | -------------------------------------------------------------------------------- /include/experimental/__p1673_bits/real_if_needed.hpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | #ifndef LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_REAL_IF_NEEDED_HPP_ 19 | #define LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_REAL_IF_NEEDED_HPP_ 20 | 21 | #include 22 | #include 23 | 24 | namespace MDSPAN_IMPL_STANDARD_NAMESPACE { 25 | namespace MDSPAN_IMPL_PROPOSED_NAMESPACE { 26 | inline namespace __p1673_version_0 { 27 | namespace linalg { 28 | namespace impl{ 29 | 30 | template 31 | struct has_real : std::false_type {}; 32 | 33 | // If I can find unqualified real via overload resolution, 34 | // then assume that real(t) returns the real part of t. 35 | template 36 | struct has_real()), void())> : std::true_type {}; 37 | 38 | template 39 | T real_if_needed_impl(const T& t, std::false_type) 40 | { 41 | // If real(t) can't be ADL-found, then assume 42 | // that T represents a noncomplex number type. 43 | return t; 44 | } 45 | 46 | template 47 | auto real_if_needed_impl(const T& t, std::true_type) 48 | { 49 | if constexpr (std::is_arithmetic_v) { 50 | // Overloads for integers have a return type of double. 51 | // We want to preserve the input type T. 52 | return t; 53 | } else { 54 | return real(t); 55 | } 56 | } 57 | 58 | // Inline static variables require C++17. 59 | constexpr inline auto real_if_needed = [](const auto& t) 60 | { 61 | using T = std::remove_const_t; 62 | return real_if_needed_impl(t, has_real{}); 63 | }; 64 | 65 | } // end namespace impl 66 | } // end namespace linalg 67 | } // end inline namespace __p1673_version_0 68 | } // end namespace MDSPAN_IMPL_PROPOSED_NAMESPACE 69 | } // end namespace MDSPAN_IMPL_STANDARD_NAMESPACE 70 | 71 | #endif //LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_REAL_IF_NEEDED_HPP_ 72 | -------------------------------------------------------------------------------- /include/experimental/__p1673_bits/scaled.hpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | #ifndef LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_SCALED_HPP_ 19 | #define LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_SCALED_HPP_ 20 | 21 | #include 22 | 23 | namespace MDSPAN_IMPL_STANDARD_NAMESPACE { 24 | namespace MDSPAN_IMPL_PROPOSED_NAMESPACE { 25 | inline namespace __p1673_version_0 { 26 | namespace linalg { 27 | 28 | template 29 | class scaled_accessor { 30 | public: 31 | using element_type = 32 | std::add_const_t() * std::declval())>; 33 | using reference = std::remove_const_t; 34 | using data_handle_type = typename NestedAccessor::data_handle_type; 35 | using offset_policy = 36 | scaled_accessor; 37 | 38 | constexpr scaled_accessor() = default; 39 | 40 | MDSPAN_TEMPLATE_REQUIRES( 41 | class OtherScalingFactor, 42 | class OtherNestedAccessor, 43 | /* requires */ ( 44 | std::is_constructible_v && 45 | std::is_constructible_v 46 | ) 47 | ) 48 | #if defined(__cpp_conditional_explicit) 49 | explicit(!std::is_convertible_v) 50 | #endif 51 | constexpr scaled_accessor(const scaled_accessor& other) : 52 | scaling_factor_(other.scaling_factor()), 53 | nested_accessor_(other.nested_accessor()) 54 | {} 55 | 56 | constexpr scaled_accessor(const ScalingFactor& s, const NestedAccessor& a) : 57 | scaling_factor_(s), 58 | nested_accessor_(a) 59 | {} 60 | 61 | constexpr reference access(data_handle_type p, ::std::size_t i) const { 62 | return scaling_factor_ * typename NestedAccessor::element_type(nested_accessor_.access(p, i)); 63 | } 64 | 65 | typename offset_policy::data_handle_type 66 | constexpr offset(data_handle_type p, ::std::size_t i) const { 67 | return nested_accessor_.offset(p, i); 68 | } 69 | 70 | constexpr NestedAccessor nested_accessor() const noexcept { 71 | return nested_accessor_; 72 | } 73 | 74 | constexpr ScalingFactor scaling_factor() const noexcept { 75 | return scaling_factor_; 76 | } 77 | 78 | private: 79 | ScalingFactor scaling_factor_; 80 | NestedAccessor nested_accessor_; 81 | }; 82 | 83 | namespace impl { 84 | 85 | template 87 | using scaled_element_type = 88 | std::add_const_t::element_type>; 89 | 90 | } // namespace impl 91 | 92 | template 97 | mdspan, 98 | Extents, 99 | Layout, 100 | scaled_accessor> 101 | scaled(ScalingFactor scaling_factor, 102 | mdspan x) 103 | { 104 | using acc_type = scaled_accessor; 105 | return {x.data_handle(), x.mapping(), acc_type{scaling_factor, x.accessor()}}; 106 | } 107 | 108 | } // end namespace linalg 109 | } // end inline namespace __p1673_version_0 110 | } // end namespace MDSPAN_IMPL_PROPOSED_NAMESPACE 111 | } // end namespace MDSPAN_IMPL_STANDARD_NAMESPACE 112 | 113 | #endif //LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_SCALED_HPP_ 114 | -------------------------------------------------------------------------------- /include/experimental/linalg: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | #pragma once 19 | 20 | #include "__p1673_bits/linalg_config.h" 21 | #include "__p1673_bits/macros.hpp" 22 | #include "__p1673_bits/linalg_execpolicy_mapper.hpp" 23 | #include "__p1673_bits/maybe_static_size.hpp" 24 | #include "__p1673_bits/layout_tags.hpp" 25 | #include "__p1673_bits/layout_triangle.hpp" 26 | #include "__p1673_bits/packed_layout.hpp" 27 | #include "__p1673_bits/abs_if_needed.hpp" 28 | #include "__p1673_bits/conj_if_needed.hpp" 29 | #include "__p1673_bits/real_if_needed.hpp" 30 | #include "__p1673_bits/imag_if_needed.hpp" 31 | #include "__p1673_bits/scaled.hpp" 32 | #include "__p1673_bits/conjugated.hpp" 33 | #include "__p1673_bits/transposed.hpp" 34 | #include "__p1673_bits/conjugate_transposed.hpp" 35 | #include "__p1673_bits/blas1_givens.hpp" 36 | #include "__p1673_bits/blas1_linalg_swap.hpp" 37 | #include "__p1673_bits/blas1_matrix_frob_norm.hpp" 38 | #include "__p1673_bits/blas1_matrix_inf_norm.hpp" 39 | #include "__p1673_bits/blas1_matrix_one_norm.hpp" 40 | #include "__p1673_bits/blas1_scale.hpp" 41 | #include "__p1673_bits/blas1_linalg_copy.hpp" 42 | #include "__p1673_bits/blas1_linalg_add.hpp" 43 | #include "__p1673_bits/blas1_dot.hpp" 44 | #include "__p1673_bits/blas1_vector_norm2.hpp" 45 | #include "__p1673_bits/blas1_vector_abs_sum.hpp" 46 | #include "__p1673_bits/blas1_vector_idx_abs_max.hpp" 47 | #include "__p1673_bits/blas1_vector_sum_of_squares.hpp" 48 | #include "__p1673_bits/blas2_matrix_vector_product.hpp" 49 | #include "__p1673_bits/blas2_matrix_vector_solve.hpp" 50 | #include "__p1673_bits/blas2_matrix_rank_1_update.hpp" 51 | #include "__p1673_bits/blas2_matrix_rank_2_update.hpp" 52 | #include "__p1673_bits/blas3_matrix_product.hpp" 53 | #include "__p1673_bits/blas3_matrix_rank_k_update.hpp" 54 | #include "__p1673_bits/blas3_matrix_rank_2k_update.hpp" 55 | #include "__p1673_bits/blas3_triangular_matrix_matrix_solve.hpp" 56 | #ifdef LINALG_ENABLE_KOKKOS 57 | #include 58 | #endif 59 | -------------------------------------------------------------------------------- /make_single_header.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import re 4 | import sys 5 | import os 6 | from os.path import dirname, join as path_join, abspath, exists 7 | 8 | extra_paths = [path_join(dirname(abspath(__file__)), "include")] 9 | 10 | def find_file(included_name, current_file): 11 | current_dir = dirname(abspath(current_file)) 12 | for idir in [current_dir] + extra_paths: 13 | try_path = path_join(idir, included_name) 14 | if exists(try_path): 15 | return try_path 16 | return None 17 | 18 | def process_file(file_path, out_lines=[], front_matter_lines=[], processed_files=[]): 19 | with open(file_path, "r") as f: 20 | for line in f: 21 | m_inc = re.match(r'#include\s*[<"](.+)[>"]\s*', line) 22 | if m_inc: 23 | inc_name = m_inc.group(1) 24 | inc_path = find_file(inc_name, file_path) 25 | if inc_path not in processed_files: 26 | if inc_path is not None: 27 | processed_files += [inc_path] 28 | process_file(inc_path, out_lines, front_matter_lines, processed_files) 29 | else: 30 | # assume it's a system header; add it to the front matter just to be clean 31 | front_matter_lines += [line] 32 | continue 33 | m_once = re.match(r"#pragma once\s*", line) 34 | # ignore pragma once; we're handling it here 35 | if m_once: 36 | continue 37 | # otherwise, just add the line to the output 38 | if line[-1] != "\n": line = line + "\n" 39 | out_lines += [line] 40 | return "".join(front_matter_lines) + "\n" + "".join(out_lines) 41 | 42 | if __name__ == "__main__": 43 | print(process_file(abspath(sys.argv[1]), [], ["#pragma once\n"], [abspath(sys.argv[1])])) 44 | 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | find_package(GTest) 2 | if (NOT GTest_FOUND) 3 | message(STATUS "No installed GTest found, fetching from Github") 4 | include(FetchContent) 5 | FetchContent_Declare( 6 | googletest 7 | GIT_REPOSITORY https://github.com/google/googletest.git 8 | GIT_TAG release-1.11.0 9 | ) 10 | # need to set the variables in CACHE due to CMP0077 11 | set(gtest_disable_pthreads ON CACHE INTERNAL "") 12 | if(MSVC) 13 | set(gtest_force_shared_crt ON CACHE INTERNAL "") 14 | endif() 15 | FetchContent_GetProperties(googletest) 16 | if(NOT googletest_POPULATED) 17 | FetchContent_Populate(googletest) 18 | add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR} EXCLUDE_FROM_ALL) 19 | endif() 20 | add_library(GTest::Main ALIAS gtest_main) 21 | add_library(GTest::GTest ALIAS gtest) 22 | endif() 23 | 24 | add_subdirectory(native) 25 | 26 | if(LINALG_ENABLE_KOKKOS) 27 | add_subdirectory(kokkos-based) 28 | endif() 29 | -------------------------------------------------------------------------------- /tests/kokkos-based/add_kokkos.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "gtest_fixtures.hpp" 3 | #include "helpers.hpp" 4 | 5 | namespace{ 6 | 7 | template 8 | void add_gold_solution(x_t x, y_t y, z_t z) 9 | { 10 | for (std::size_t i=0; i 16 | void kokkos_blas1_add_test_impl(x_t x, y_t y, z_t z) 17 | { 18 | namespace stdla = std::experimental::linalg; 19 | 20 | using value_type = typename x_t::value_type; 21 | const std::size_t extent = x.extent(0); 22 | 23 | // copy x and y to verify they are not changed after kernel 24 | auto x_preKernel = kokkostesting::create_stdvector_and_copy(x); 25 | auto y_preKernel = kokkostesting::create_stdvector_and_copy(y); 26 | 27 | // compute gold 28 | std::vector gold(extent); 29 | using mdspan_t = mdspan>; 30 | mdspan_t z_gold(gold.data(), extent); 31 | add_gold_solution(x, y, z_gold); 32 | 33 | stdla::add(KokkosKernelsSTD::kokkos_exec<>(), x, y, z); 34 | 35 | if constexpr(std::is_same_v){ 36 | for (std::size_t i=0; i){ 44 | for (std::size_t i=0; i>){ 52 | for (std::size_t i=0; i; 76 | using stdc_t = value_type; 77 | if (alignof(value_type) == alignof(kc_t)){ 78 | kokkos_blas1_add_test_impl(x, y, z); 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /tests/kokkos-based/copy_kokkos.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "gtest_fixtures.hpp" 3 | #include "helpers.hpp" 4 | 5 | namespace{ 6 | 7 | template 8 | void kokkos_blas1_copy_test_impl(x_t x, y_t y) 9 | { 10 | namespace stdla = std::experimental::linalg; 11 | 12 | using value_type = typename x_t::value_type; 13 | const std::size_t extent = x.extent(0); 14 | 15 | // verify that x, y are different before running kernel 16 | for (std::size_t i=0; i(), x, y); 25 | 26 | // after kernel, x should be unchanged, y should be equal to x 27 | if constexpr(std::is_same_v){ 28 | for (std::size_t i=0; i){ 36 | for (std::size_t i=0; i>){ 44 | for (std::size_t i=0; i; 68 | using stdc_t = value_type; 69 | if constexpr(alignof(value_type) == alignof(kc_t)){ 70 | kokkos_blas1_copy_test_impl(x, y); 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /tests/kokkos-based/dot_kokkos.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "gtest_fixtures.hpp" 3 | #include "helpers.hpp" 4 | 5 | namespace 6 | { 7 | 8 | template 9 | auto dot_gold_solution(x_t x, y_t y, T initValue, bool useInit) 10 | { 11 | 12 | T result = {}; 13 | for (std::size_t i=0; i 22 | void kokkos_blas1_dot_test_impl(x_t x, y_t y, T initValue, bool useInit) 23 | { 24 | namespace stdla = std::experimental::linalg; 25 | 26 | using value_type = typename x_t::value_type; 27 | const std::size_t extent = x.extent(0); 28 | 29 | // copy x and y to verify they are not changed after kernel 30 | auto x_preKernel = kokkostesting::create_stdvector_and_copy(x); 31 | auto y_preKernel = kokkostesting::create_stdvector_and_copy(y); 32 | 33 | // compute gold 34 | const T gold = dot_gold_solution(x, y, initValue, useInit); 35 | 36 | T result = {}; 37 | if (useInit){ 38 | result = stdla::dot(KokkosKernelsSTD::kokkos_exec<>(), 39 | x, y, initValue); 40 | }else{ 41 | result = stdla::dot(KokkosKernelsSTD::kokkos_exec<>(), 42 | x, y); 43 | } 44 | 45 | if constexpr(std::is_same_v){ 46 | // cannot use EXPECT_FLOAT_EQ because 47 | // in some cases that fails on third digit or similr 48 | EXPECT_NEAR(result, gold, 1e-2); 49 | } 50 | 51 | if constexpr(std::is_same_v){ 52 | // similarly to float 53 | EXPECT_NEAR(result, gold, 1e-9); 54 | } 55 | 56 | if constexpr(std::is_same_v>){ 57 | EXPECT_NEAR(result.real(), gold.real(), 1e-9); 58 | EXPECT_NEAR(result.imag(), gold.imag(), 1e-9); 59 | } 60 | 61 | // x,y should not change after kernel 62 | for (std::size_t i=0; i(0), false); 72 | } 73 | 74 | TEST_F(blas1_signed_float_fixture, kokkos_dot_initvalue) 75 | { 76 | kokkos_blas1_dot_test_impl(x, y, static_cast(3), true); 77 | } 78 | 79 | TEST_F(blas1_signed_double_fixture, kokkos_dot_noinitvalue) 80 | { 81 | kokkos_blas1_dot_test_impl(x, y, static_cast(0), false); 82 | } 83 | 84 | TEST_F(blas1_signed_double_fixture, kokkos_dot_initvalue) 85 | { 86 | kokkos_blas1_dot_test_impl(x, y, static_cast(5), true); 87 | } 88 | 89 | TEST_F(blas1_signed_complex_double_fixture, kokkos_dot_noinitvalue) 90 | { 91 | using kc_t = Kokkos::complex; 92 | using stdc_t = value_type; 93 | if constexpr (alignof(value_type) == alignof(kc_t)){ 94 | const value_type init{0., 0.}; 95 | kokkos_blas1_dot_test_impl(x, y, init, false); 96 | } 97 | } 98 | 99 | TEST_F(blas1_signed_complex_double_fixture, kokkos_dot_initvalue) 100 | { 101 | using kc_t = Kokkos::complex; 102 | using stdc_t = value_type; 103 | if constexpr (alignof(value_type) == alignof(kc_t)){ 104 | const value_type init{-2., 4.}; 105 | kokkos_blas1_dot_test_impl(x, y, init, true); 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /tests/kokkos-based/dotc_kokkos.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "gtest_fixtures.hpp" 3 | #include "helpers.hpp" 4 | 5 | namespace 6 | { 7 | 8 | template 9 | auto dotc_gold_solution(x_t x, y_t y, T initValue, bool useInit) 10 | { 11 | 12 | T result = {}; 13 | if (useInit) result = initValue; 14 | 15 | for (std::size_t i=0; i 23 | void kokkos_blas1_dotc_test_impl(x_t x, y_t y, T initValue, bool useInit) 24 | { 25 | namespace stdla = std::experimental::linalg; 26 | 27 | using value_type = typename x_t::value_type; 28 | const std::size_t extent = x.extent(0); 29 | 30 | // copy x and y to verify they are not changed after kernel 31 | auto x_preKernel = kokkostesting::create_stdvector_and_copy(x); 32 | auto y_preKernel = kokkostesting::create_stdvector_and_copy(y); 33 | 34 | // compute gold 35 | const auto gold = dotc_gold_solution(x, y, initValue, useInit); 36 | 37 | T result = {}; 38 | if (useInit){ 39 | result = stdla::dotc(KokkosKernelsSTD::kokkos_exec<>(), x, y, initValue); 40 | }else{ 41 | result = stdla::dotc(KokkosKernelsSTD::kokkos_exec<>(), x, y); 42 | } 43 | 44 | if constexpr(std::is_same_v>) 45 | { 46 | EXPECT_NEAR(result.real(), gold.real(), 1e-9); 47 | EXPECT_NEAR(result.imag(), gold.imag(), 1e-9); 48 | 49 | for (std::size_t i=0; i; 60 | using stdc_t = value_type; 61 | if constexpr (alignof(value_type) == alignof(kc_t)){ 62 | const value_type init{0., 0.}; 63 | kokkos_blas1_dotc_test_impl(x, y, init, false); 64 | } 65 | } 66 | 67 | TEST_F(blas1_signed_complex_double_fixture, kokkos_dotc_initvalue) 68 | { 69 | using kc_t = Kokkos::complex; 70 | using stdc_t = value_type; 71 | if constexpr (alignof(value_type) == alignof(kc_t)){ 72 | const value_type init{-4., 5.}; 73 | kokkos_blas1_dotc_test_impl(x, y, init, true); 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /tests/kokkos-based/gtest_main_kokkos.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | 6 | namespace KokkosKernelsSTD { 7 | namespace Impl { 8 | 9 | #if defined(KOKKOS_STDBLAS_ENABLE_TESTS) 10 | void signal_kokkos_impl_called(std::string_view functionName) 11 | { 12 | std::cout << functionName << ": kokkos impl" << std::endl; 13 | } 14 | #endif 15 | 16 | } // namespace Impl 17 | } // namespace KokkosKernelsSTD 18 | 19 | int main(int argc, char *argv[]) 20 | { 21 | ::testing::InitGoogleTest(&argc,argv); 22 | int err = 0; 23 | { 24 | Kokkos::initialize (argc, argv); 25 | err = RUN_ALL_TESTS(); 26 | Kokkos::finalize(); 27 | } 28 | return err; 29 | } 30 | -------------------------------------------------------------------------------- /tests/kokkos-based/hermitian_matrix_rank1_update_kokkos.cpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | #include "gtest_fixtures.hpp" 19 | #include "helpers.hpp" 20 | 21 | namespace{ 22 | 23 | using namespace kokkostesting; 24 | 25 | template 26 | void hermitian_matrix_rank_1_update_gold_solution(const x_t &x, A_t &A, Triangle /* t */) 27 | { 28 | using size_type = std::experimental::extents<>::size_type; 29 | using std::experimental::linalg::impl::conj_if_needed; 30 | constexpr bool low = std::is_same_v; 31 | for (size_type j = 0; j < A.extent(1); ++j) { 32 | const size_type i1 = low ? A.extent(0) : j + 1; 33 | for (size_type i = low ? j : 0; i < i1; ++i) { 34 | A(i,j) += x(i) * conj_if_needed(x(j)); 35 | } 36 | } 37 | } 38 | 39 | template 40 | void test_kokkos_hermitian_matrix_rank1_update_impl(const x_t &x, A_t &A, Triangle t) 41 | { 42 | const auto get_gold = [&](auto A_gold) { 43 | hermitian_matrix_rank_1_update_gold_solution(x, A_gold, t); 44 | }; 45 | const auto compute = [&]() { 46 | std::experimental::linalg::hermitian_matrix_rank_1_update( 47 | KokkosKernelsSTD::kokkos_exec<>(), x, A, t); 48 | }; 49 | const auto tol = tolerance(1e-20, 1e-10f); 50 | test_op_Ax(x, A, tol, get_gold, compute); 51 | } 52 | 53 | } // anonymous namespace 54 | 55 | #define DEFINE_TESTS(blas_val_type) \ 56 | TEST_F(blas2_signed_##blas_val_type##_fixture, \ 57 | kokkos_hermitian_matrix_rank1_update) { \ 58 | using val_t = typename blas2_signed_##blas_val_type##_fixture::value_type; \ 59 | run_checked_tests("kokkos_", "hermitian_matrix_rank1_update", "", \ 60 | #blas_val_type, [&]() { \ 61 | \ 62 | test_kokkos_hermitian_matrix_rank1_update_impl(x_e0, A_sym_e0, \ 63 | std::experimental::linalg::lower_triangle); \ 64 | test_kokkos_hermitian_matrix_rank1_update_impl(x_e0, A_sym_e0, \ 65 | std::experimental::linalg::upper_triangle); \ 66 | \ 67 | }); \ 68 | } 69 | 70 | DEFINE_TESTS(double) 71 | DEFINE_TESTS(float) 72 | DEFINE_TESTS(complex_double) 73 | -------------------------------------------------------------------------------- /tests/kokkos-based/hermitian_matrix_rank2_update_kokkos.cpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | #include "gtest_fixtures.hpp" 19 | #include "helpers.hpp" 20 | 21 | namespace{ 22 | 23 | using namespace kokkostesting; 24 | 25 | template 26 | void hermitian_matrix_rank_2_update_gold_solution(const x_t &x, const y_t &y, A_t &A, Triangle /* t */) 27 | { 28 | using std::experimental::linalg::impl::conj_if_needed; 29 | using size_type = std::experimental::extents<>::size_type; 30 | constexpr bool low = std::is_same_v; 31 | for (size_type j = 0; j < A.extent(1); ++j) { 32 | const size_type i1 = low ? A.extent(0) : j + 1; 33 | for (size_type i = low ? j : 0; i < i1; ++i) { 34 | A(i,j) += x(i) * conj_if_needed(y(j)) + y(i) * conj_if_needed(x(j)); 35 | } 36 | } 37 | } 38 | 39 | template 40 | void test_kokkos_hermitian_matrix_rank2_update_impl(const x_t &x, const y_t &y, A_t &A, Triangle t) 41 | { 42 | const auto get_gold = [&](auto A_gold) { 43 | hermitian_matrix_rank_2_update_gold_solution(x, y, A_gold, t); 44 | }; 45 | const auto compute = [&]() { 46 | std::experimental::linalg::hermitian_matrix_rank_2_update( 47 | KokkosKernelsSTD::kokkos_exec<>(), x, y, A, t); 48 | }; 49 | const auto tol = tolerance(1e-20, 1e-10f); 50 | test_op_Axy(x, y, A, tol, get_gold, compute); 51 | } 52 | 53 | } // anonymous namespace 54 | 55 | #define DEFINE_TESTS(blas_val_type) \ 56 | TEST_F(blas2_signed_##blas_val_type##_fixture, \ 57 | kokkos_hermitian_matrix_rank2_update) { \ 58 | using val_t = typename blas2_signed_##blas_val_type##_fixture::value_type; \ 59 | run_checked_tests("kokkos_", "hermitian_matrix_rank2_update", "", \ 60 | #blas_val_type, [&]() { \ 61 | \ 62 | test_kokkos_hermitian_matrix_rank2_update_impl(x_e0, y_e0, A_sym_e0, \ 63 | std::experimental::linalg::lower_triangle); \ 64 | test_kokkos_hermitian_matrix_rank2_update_impl(x_e0, y_e0, A_sym_e0, \ 65 | std::experimental::linalg::upper_triangle); \ 66 | \ 67 | }); \ 68 | } 69 | 70 | DEFINE_TESTS(double) 71 | DEFINE_TESTS(float) 72 | DEFINE_TESTS(complex_double) 73 | -------------------------------------------------------------------------------- /tests/kokkos-based/hermitian_matrix_rank_2k_update_kokkos.cpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | #include "gtest_fixtures.hpp" 19 | #include "helpers.hpp" 20 | 21 | namespace{ 22 | 23 | using namespace kokkostesting; 24 | 25 | template 26 | void hermitian_matrix_rank_2k_update_gold_solution(const A_t &A, const B_t &B, C_t &C, Triangle /* t */) 27 | { 28 | using std::experimental::linalg::impl::conj_if_needed; 29 | using size_type = std::experimental::extents<>::size_type; 30 | constexpr bool low = std::is_same_v; 31 | const auto size = A.extent(1); 32 | for (size_type j = 0; j < size; ++j) { 33 | const size_type i1 = low ? size : j + 1; 34 | for (size_type i = low ? j : 0; i < i1; ++i) { 35 | for (size_type k = 0; k < size; ++k) { 36 | C(i, j) += A(i, k) * conj_if_needed(B(j, k)) + B(i, k) * conj_if_needed(A(j, k)); 37 | } 38 | } 39 | } 40 | } 41 | 42 | template 43 | void test_kokkos_hermitian_matrix_rank2k_update_impl(const A_t &A, const B_t &B, C_t &C, Triangle t) 44 | { 45 | const auto get_gold = [&](auto C_gold) { 46 | hermitian_matrix_rank_2k_update_gold_solution(A, B, C_gold, t); 47 | }; 48 | const auto compute = [&]() { 49 | std::experimental::linalg::hermitian_matrix_rank_2k_update( 50 | KokkosKernelsSTD::kokkos_exec<>(), A, B, C, t); 51 | }; 52 | const auto tol = tolerance(1e-20, 1e-10f); 53 | test_op_CAB(A, B, C, tol, get_gold, compute); 54 | } 55 | 56 | } // anonymous namespace 57 | 58 | #define DEFINE_TESTS(blas_val_type) \ 59 | TEST_F(blas2_signed_##blas_val_type##_fixture, \ 60 | kokkos_hermitian_matrix_rank2k_update) { \ 61 | using val_t = typename blas2_signed_##blas_val_type##_fixture::value_type; \ 62 | run_checked_tests("kokkos_", "hermitian_matrix_rank2k_update", "", \ 63 | #blas_val_type, [&]() { \ 64 | \ 65 | test_kokkos_hermitian_matrix_rank2k_update_impl(A_sym_e0, A_sym_e0, A_hem_e0, \ 66 | std::experimental::linalg::lower_triangle); \ 67 | test_kokkos_hermitian_matrix_rank2k_update_impl(A_sym_e0, A_sym_e0, A_hem_e0, \ 68 | std::experimental::linalg::upper_triangle); \ 69 | \ 70 | }); \ 71 | } 72 | 73 | DEFINE_TESTS(double) 74 | DEFINE_TESTS(float) 75 | DEFINE_TESTS(complex_double) 76 | -------------------------------------------------------------------------------- /tests/kokkos-based/hermitian_matrix_rank_k_update_kokkos.cpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | #include "gtest_fixtures.hpp" 19 | #include "helpers.hpp" 20 | 21 | namespace{ 22 | 23 | using namespace kokkostesting; 24 | 25 | template 26 | void hermitian_matrix_rank_k_update_gold_solution(AlphaType alpha, 27 | const A_t &A, C_t &C, Triangle /* t */) 28 | { 29 | using std::experimental::linalg::impl::conj_if_needed; 30 | using size_type = std::experimental::extents<>::size_type; 31 | constexpr bool low = std::is_same_v; 32 | const auto C_ext = C.extent(0); 33 | const auto A_ext1 = A.extent(1); 34 | for (size_type j = 0; j < C_ext; ++j) { 35 | const size_type i1 = low ? C_ext : j + 1; 36 | for (size_type i = low ? j : 0; i < i1; ++i) { 37 | for (size_type k = 0; k < A_ext1; ++k) { 38 | C(i, j) += alpha * A(i, k) * conj_if_needed(A(j, k)); 39 | } 40 | } 41 | } 42 | } 43 | 44 | template 45 | void test_kokkos_hermitian_matrix_rank_k_update_impl(const A_t A, C_t C, Triangle t) 46 | { 47 | const auto alpha = static_cast(2.0); // check if alpha is applied 48 | const auto get_gold = [&](auto C_gold) { 49 | hermitian_matrix_rank_k_update_gold_solution(alpha, A, C_gold, t); 50 | }; 51 | const auto compute = [&]() { 52 | std::experimental::linalg::hermitian_matrix_rank_k_update( 53 | KokkosKernelsSTD::kokkos_exec<>(), alpha, A, C, t); 54 | }; 55 | const auto tol = tolerance(1e-20, 1e-10f); 56 | test_op_CA(A, C, tol, get_gold, compute); 57 | } 58 | 59 | } // anonymous namespace 60 | 61 | #define DEFINE_TESTS(blas_val_type) \ 62 | TEST_F(blas2_signed_##blas_val_type##_fixture, \ 63 | kokkos_hermitian_matrix_rank_k_update) { \ 64 | using val_t = typename blas2_signed_##blas_val_type##_fixture::value_type; \ 65 | run_checked_tests("kokkos_", "hermitian_matrix_rank_k_update", "", \ 66 | #blas_val_type, [&]() { \ 67 | \ 68 | test_kokkos_hermitian_matrix_rank_k_update_impl(A_sym_e0, A_hem_e0, \ 69 | std::experimental::linalg::lower_triangle); \ 70 | test_kokkos_hermitian_matrix_rank_k_update_impl(A_sym_e0, A_hem_e0, \ 71 | std::experimental::linalg::upper_triangle); \ 72 | \ 73 | }); \ 74 | } 75 | 76 | DEFINE_TESTS(double) 77 | DEFINE_TESTS(float) 78 | DEFINE_TESTS(complex_double) 79 | -------------------------------------------------------------------------------- /tests/kokkos-based/idx_abs_max_kokkos.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "gtest_fixtures.hpp" 3 | #include "helpers.hpp" 4 | 5 | namespace 6 | { 7 | 8 | template 9 | std::experimental::extents<>::size_type 10 | vector_idx_abs_max_gold_solution(x_t x) 11 | { 12 | 13 | using std::abs; 14 | using size_type = std::experimental::extents<>::size_type; 15 | 16 | size_type maxInd = 0; 17 | decltype(abs(x(0))) maxVal = abs(x(0)); 18 | for (size_type i = 1; i < x.extent(0); ++i) { 19 | if (maxVal < abs(x(i))) { 20 | maxVal = abs(x(i)); 21 | maxInd = i; 22 | } 23 | } 24 | 25 | return maxInd; 26 | } 27 | 28 | template 29 | void kokkos_blas1_vector_idx_abs_max_test_impl(x_t x) 30 | { 31 | 32 | namespace stdla = std::experimental::linalg; 33 | 34 | // copy x to verify it is not changed after kernel 35 | auto x_preKernel = kokkostesting::create_stdvector_and_copy(x); 36 | 37 | const auto gold = vector_idx_abs_max_gold_solution(x); 38 | const auto result = stdla::vector_idx_abs_max(KokkosKernelsSTD::kokkos_exec<>(), x); 39 | EXPECT_TRUE(gold == result); 40 | static_assert(std::is_same_v, 41 | "test:vector_idx_abs_max: gold and result types not same"); 42 | 43 | // x should not change after kernel 44 | const std::size_t extent = x.extent(0); 45 | for (std::size_t i=0; i; 66 | using stdc_t = value_type; 67 | if constexpr(alignof(value_type) == alignof(kc_t)){ 68 | kokkos_blas1_vector_idx_abs_max_test_impl(x); 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /tests/kokkos-based/matrix_rank1_update_kokkos.cpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | #include "gtest_fixtures.hpp" 19 | #include "helpers.hpp" 20 | 21 | namespace{ 22 | 23 | using namespace kokkostesting; 24 | 25 | template 26 | void matrix_rank_1_update_gold_solution(const x_t &x, const y_t &y, A_t &A) 27 | { 28 | using size_type = std::experimental::extents<>::size_type; 29 | for (size_type i = 0; i < A.extent(0); ++i) { 30 | for (size_type j = 0; j < A.extent(1); ++j) { 31 | A(i, j) += x(i) * y(j); 32 | } 33 | } 34 | } 35 | 36 | template 37 | void test_kokkos_matrix_rank1_update_impl(const x_t &x, const y_t &y, A_t &A) 38 | { 39 | const auto get_gold = [&](auto A_gold) { 40 | matrix_rank_1_update_gold_solution(x, y, A_gold); 41 | }; 42 | const auto compute = [&]() { 43 | std::experimental::linalg::matrix_rank_1_update( 44 | KokkosKernelsSTD::kokkos_exec<>(), x, y, A); 45 | }; 46 | const auto tol = tolerance(1e-20, 1e-10f); 47 | test_op_Axy(x, y, A, tol, get_gold, compute); 48 | } 49 | 50 | template 51 | void test_kokkos_matrix_rank1_update_conj_impl(const x_t &x, const y_t &y, A_t &A) 52 | { 53 | const auto get_gold = [&](auto A_gold) { 54 | matrix_rank_1_update_gold_solution(x, 55 | std::experimental::linalg::conjugated(y), A_gold); 56 | }; 57 | const auto compute = [&]() { 58 | std::experimental::linalg::matrix_rank_1_update_c( 59 | KokkosKernelsSTD::kokkos_exec<>(), x, y, A); 60 | }; 61 | const auto tol = tolerance(1e-20, 1e-10f); 62 | test_op_Axy(x, y, A, tol, get_gold, compute); 63 | } 64 | 65 | } // anonymous namespace 66 | 67 | #define DEFINE_TESTS(blas_val_type) \ 68 | TEST_F(blas2_signed_##blas_val_type##_fixture, kokkos_matrix_rank1_update) { \ 69 | using val_t = typename blas2_signed_##blas_val_type##_fixture::value_type; \ 70 | run_checked_tests("kokkos_", "matrix_rank1_update", "", \ 71 | #blas_val_type, [&]() { \ 72 | \ 73 | test_kokkos_matrix_rank1_update_impl(x_e0, x_e1, A_e0e1); \ 74 | \ 75 | }); \ 76 | } \ 77 | TEST_F(blas2_signed_##blas_val_type##_fixture, \ 78 | kokkos_matrix_rank1_update_conjugated) { \ 79 | using val_t = typename blas2_signed_##blas_val_type##_fixture::value_type; \ 80 | run_checked_tests("kokkos_", "matrix_rank1_update", "_conjugated", \ 81 | #blas_val_type, [&]() { \ 82 | \ 83 | test_kokkos_matrix_rank1_update_conj_impl(x_e0, x_e1, A_e0e1); \ 84 | \ 85 | }); \ 86 | } 87 | 88 | DEFINE_TESTS(double) 89 | DEFINE_TESTS(float) 90 | DEFINE_TESTS(complex_double) 91 | -------------------------------------------------------------------------------- /tests/kokkos-based/overwriting_matrix_vector_product.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "gtest_fixtures.hpp" 3 | #include "helpers.hpp" 4 | 5 | namespace 6 | { 7 | 8 | template 9 | void gemv_gold_solution(A_t A, x_t x, y_t y) 10 | { 11 | for (std::size_t i=0; i 20 | void kokkos_blas_overwriting_gemv_impl(A_t A, x_t x, y_t y) 21 | { 22 | namespace stdla = std::experimental::linalg; 23 | 24 | using value_type = typename A_t::value_type; 25 | const std::size_t extent0 = A.extent(0); 26 | const std::size_t extent1 = A.extent(1); 27 | 28 | // copy operands before running the kernel 29 | auto A_preKernel = kokkostesting::create_stdvector_and_copy_rowwise(A); 30 | auto x_preKernel = kokkostesting::create_stdvector_and_copy(x); 31 | auto y_preKernel = kokkostesting::create_stdvector_and_copy(y); 32 | 33 | // compute y gold gemv 34 | std::vector gold(y.extent(0)); 35 | using mdspan_t = mdspan>; 36 | mdspan_t y_gold(gold.data(), y.extent(0)); 37 | gemv_gold_solution(A, x, y_gold); 38 | 39 | stdla::matrix_vector_product(KokkosKernelsSTD::kokkos_exec<>(), A, x, y); 40 | 41 | // after kernel, A,x should be unchanged, y should be equal to y_gold. 42 | // note that for A we need to visit all elements rowwise 43 | // since that is how we stored above the preKernel values 44 | 45 | if constexpr(std::is_same_v){ 46 | // check x 47 | for (std::size_t j=0; j){ 62 | // check x 63 | for (std::size_t j=0; j>){ 78 | // check x 79 | for (std::size_t j=0; j; 113 | using stdc_t = value_type; 114 | if constexpr (alignof(value_type) == alignof(kc_t)){ 115 | kokkos_blas_overwriting_gemv_impl(A_e0e1, x_e1, x_e0); 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /tests/kokkos-based/scale_rank1_kokkos.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "gtest_fixtures.hpp" 3 | #include "helpers.hpp" 4 | 5 | namespace 6 | { 7 | 8 | template 9 | void scale_gold_solution(x_t x, FactorT factor) 10 | { 11 | FactorT result = {}; 12 | for (std::size_t i=0; i 18 | void kokkos_blas1_scale_test_impl(x_t x, FactorT factor) 19 | { 20 | namespace stdla = std::experimental::linalg; 21 | 22 | using value_type = typename x_t::value_type; 23 | const std::size_t extent = x.extent(0); 24 | 25 | // compute gold 26 | std::vector gold(extent); 27 | using mdspan_t = mdspan>; 28 | mdspan_t x_gold(gold.data(), extent); 29 | for (std::size_t i=0; i(), factor, x); 35 | 36 | if constexpr(std::is_same_v){ 37 | for (std::size_t i=0; i){ 43 | for (std::size_t i=0; i>){ 49 | for (std::size_t i=0; i(2)); 61 | } 62 | 63 | TEST_F(blas1_signed_double_fixture, kokkos_scale) 64 | { 65 | kokkos_blas1_scale_test_impl(x, static_cast(2)); 66 | } 67 | 68 | TEST_F(blas1_signed_complex_double_fixture, kokkos_scale_complex_factor) 69 | { 70 | using kc_t = Kokkos::complex; 71 | using stdc_t = value_type; 72 | if constexpr(alignof(value_type) == alignof(kc_t)){ 73 | const value_type factor{2., 0.}; 74 | kokkos_blas1_scale_test_impl(x, factor); 75 | } 76 | } 77 | 78 | TEST_F(blas1_signed_complex_double_fixture, kokkos_scale_double_factor) 79 | { 80 | using kc_t = Kokkos::complex; 81 | using stdc_t = value_type; 82 | if constexpr(alignof(value_type) == alignof(kc_t)){ 83 | kokkos_blas1_scale_test_impl(x, 2.); 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /tests/kokkos-based/scale_rank2_kokkos.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "gtest_fixtures.hpp" 3 | #include "helpers.hpp" 4 | 5 | namespace 6 | { 7 | 8 | template 9 | void scale_gold_solution(A_t A, FactorT factor) 10 | { 11 | FactorT result = {}; 12 | for (std::size_t i=0; i 20 | void kokkos_blas_scale_test_impl(A_t A, FactorT factor) 21 | { 22 | namespace stdla = std::experimental::linalg; 23 | 24 | using value_type = typename A_t::value_type; 25 | const std::size_t extent0 = A.extent(0); 26 | const std::size_t extent1 = A.extent(1); 27 | 28 | // compute gold 29 | std::vector gold(extent0*extent1); 30 | using mdspan_t = mdspan>; 31 | mdspan_t A_gold(gold.data(), extent0, extent1); 32 | for (std::size_t i=0; i(), factor, A); 40 | 41 | if constexpr(std::is_same_v){ 42 | for (std::size_t i=0; i){ 50 | for (std::size_t i=0; i>){ 58 | for (std::size_t i=0; i(2)); 72 | } 73 | 74 | TEST_F(blas2_signed_double_fixture, kokkos_scale) 75 | { 76 | kokkos_blas_scale_test_impl(A_e0e1, static_cast(2)); 77 | } 78 | 79 | TEST_F(blas2_signed_complex_double_fixture, kokkos_scale_complex_factor) 80 | { 81 | using kc_t = Kokkos::complex; 82 | using stdc_t = value_type; 83 | if constexpr (alignof(value_type) == alignof(kc_t)){ 84 | const value_type factor{2., 0.}; 85 | kokkos_blas_scale_test_impl(A_e0e1, factor); 86 | } 87 | } 88 | 89 | TEST_F(blas2_signed_complex_double_fixture, kokkos_scale_double_factor) 90 | { 91 | using kc_t = Kokkos::complex; 92 | using stdc_t = value_type; 93 | if constexpr (alignof(value_type) == alignof(kc_t)){ 94 | kokkos_blas_scale_test_impl(A_e0e1, 2.); 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /tests/kokkos-based/swap_elements_rank1_kokkos.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "gtest_fixtures.hpp" 3 | #include "helpers.hpp" 4 | 5 | namespace{ 6 | 7 | template 8 | void kokkos_blas_swap_test_rank1_impl(x_t x, y_t y) 9 | { 10 | namespace stdla = std::experimental::linalg; 11 | 12 | using value_type = typename x_t::value_type; 13 | const std::size_t extent = x.extent(0); 14 | 15 | // verify that x, y are different before running kernel 16 | for (std::size_t i=0; i(), x, y); 25 | 26 | // after kernel, x should be unchanged, y should be equal to x 27 | if constexpr(std::is_same_v){ 28 | for (std::size_t i=0; i){ 35 | for (std::size_t i=0; i>){ 42 | for (std::size_t i=0; i; 66 | using stdc_t = value_type; 67 | if constexpr(alignof(value_type) == alignof(kc_t)){ 68 | kokkos_blas_swap_test_rank1_impl(x, y); 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /tests/kokkos-based/swap_elements_rank2_kokkos.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "gtest_fixtures.hpp" 3 | #include "helpers.hpp" 4 | 5 | namespace{ 6 | 7 | template 8 | void kokkos_blas_swap_test_rank2_impl(A_t A, B_t B) 9 | { 10 | namespace stdla = std::experimental::linalg; 11 | 12 | using value_type = typename A_t::value_type; 13 | const std::size_t extent0 = A.extent(0); 14 | const std::size_t extent1 = A.extent(1); 15 | 16 | // verify that A, B are different before running kernel 17 | for (std::size_t i=0; i(), A, B); 28 | 29 | // after kernel, A should be unchanged, B should be equal to A 30 | // note that we need to visit all elements rowwise since that is 31 | // how we stored above the preKernel values 32 | if constexpr(std::is_same_v){ 33 | std::size_t count=0; 34 | for (std::size_t i=0; i){ 43 | std::size_t count=0; 44 | for (std::size_t i=0; i>){ 53 | std::size_t count=0; 54 | for (std::size_t i=0; i; 80 | using stdc_t = value_type; 81 | if constexpr(alignof(value_type) == alignof(kc_t)){ 82 | kokkos_blas_swap_test_rank2_impl(A_e0e1, B_e0e1); 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /tests/kokkos-based/symmetric_matrix_rank1_update_kokkos.cpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | #include "gtest_fixtures.hpp" 19 | #include "helpers.hpp" 20 | 21 | namespace{ 22 | 23 | using namespace kokkostesting; 24 | 25 | template 26 | void symmetric_matrix_rank_1_update_gold_solution(const x_t &x, A_t &A, Triangle /* t */) 27 | { 28 | using size_type = std::experimental::extents<>::size_type; 29 | constexpr bool low = std::is_same_v; 30 | for (size_type j = 0; j < A.extent(1); ++j) { 31 | const size_type i1 = low ? A.extent(0) : j + 1; 32 | for (size_type i = low ? j : 0; i < i1; ++i) { 33 | A(i,j) += x(i) * x(j); 34 | } 35 | } 36 | } 37 | 38 | template 39 | void test_kokkos_symmetric_matrix_rank1_update_impl(const x_t &x, A_t &A, Triangle t) 40 | { 41 | const auto get_gold = [&](auto A_gold) { 42 | symmetric_matrix_rank_1_update_gold_solution(x, A_gold, t); 43 | }; 44 | const auto compute = [&]() { 45 | std::experimental::linalg::symmetric_matrix_rank_1_update( 46 | KokkosKernelsSTD::kokkos_exec<>(), x, A, t); 47 | }; 48 | const auto tol = tolerance(1e-20, 1e-10f); 49 | test_op_Ax(x, A, tol, get_gold, compute); 50 | } 51 | 52 | } // anonymous namespace 53 | 54 | #define DEFINE_TESTS(blas_val_type) \ 55 | TEST_F(blas2_signed_##blas_val_type##_fixture, \ 56 | kokkos_symmetric_matrix_rank1_update) { \ 57 | using val_t = typename blas2_signed_##blas_val_type##_fixture::value_type; \ 58 | run_checked_tests("kokkos_", "symmetric_matrix_rank1_update", "", \ 59 | #blas_val_type, [&]() { \ 60 | \ 61 | test_kokkos_symmetric_matrix_rank1_update_impl(x_e0, A_sym_e0, \ 62 | std::experimental::linalg::lower_triangle); \ 63 | test_kokkos_symmetric_matrix_rank1_update_impl(x_e0, A_sym_e0, \ 64 | std::experimental::linalg::upper_triangle); \ 65 | \ 66 | }); \ 67 | } 68 | 69 | DEFINE_TESTS(double) 70 | DEFINE_TESTS(float) 71 | DEFINE_TESTS(complex_double) 72 | -------------------------------------------------------------------------------- /tests/kokkos-based/symmetric_matrix_rank2_update_kokkos.cpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | #include "gtest_fixtures.hpp" 19 | #include "helpers.hpp" 20 | 21 | namespace{ 22 | 23 | using namespace kokkostesting; 24 | 25 | template 26 | void symmetric_matrix_rank_2_update_gold_solution(const x_t &x, const y_t &y, A_t &A, Triangle /* t */) 27 | { 28 | using size_type = std::experimental::extents<>::size_type; 29 | constexpr bool low = std::is_same_v; 30 | for (size_type j = 0; j < A.extent(1); ++j) { 31 | const size_type i1 = low ? A.extent(0) : j + 1; 32 | for (size_type i = low ? j : 0; i < i1; ++i) { 33 | A(i,j) += x(i) * y(j) + y(i) * x(j); 34 | } 35 | } 36 | } 37 | 38 | template 39 | void test_kokkos_symmetric_matrix_rank2_update_impl(const x_t &x, const y_t &y, A_t &A, Triangle t) 40 | { 41 | const auto get_gold = [&](auto A_gold) { 42 | symmetric_matrix_rank_2_update_gold_solution(x, y, A_gold, t); 43 | }; 44 | const auto compute = [&]() { 45 | std::experimental::linalg::symmetric_matrix_rank_2_update( 46 | KokkosKernelsSTD::kokkos_exec<>(), x, y, A, t); 47 | }; 48 | const auto tol = tolerance(1e-20, 1e-10f); 49 | test_op_Axy(x, y, A, tol, get_gold, compute); 50 | } 51 | 52 | } // anonymous namespace 53 | 54 | #define DEFINE_TESTS(blas_val_type) \ 55 | TEST_F(blas2_signed_##blas_val_type##_fixture, \ 56 | kokkos_symmetric_matrix_rank2_update) { \ 57 | using val_t = typename blas2_signed_##blas_val_type##_fixture::value_type; \ 58 | run_checked_tests("kokkos_", "symmetric_matrix_rank2_update", "", \ 59 | #blas_val_type, [&]() { \ 60 | \ 61 | test_kokkos_symmetric_matrix_rank2_update_impl(x_e0, y_e0, A_sym_e0, \ 62 | std::experimental::linalg::lower_triangle); \ 63 | test_kokkos_symmetric_matrix_rank2_update_impl(x_e0, y_e0, A_sym_e0, \ 64 | std::experimental::linalg::upper_triangle); \ 65 | \ 66 | }); \ 67 | } 68 | 69 | DEFINE_TESTS(double) 70 | DEFINE_TESTS(float) 71 | DEFINE_TESTS(complex_double) 72 | -------------------------------------------------------------------------------- /tests/kokkos-based/symmetric_matrix_rank_2k_update_kokkos.cpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | #include "gtest_fixtures.hpp" 19 | #include "helpers.hpp" 20 | 21 | namespace{ 22 | 23 | using namespace kokkostesting; 24 | 25 | template 26 | void symmetric_matrix_rank_2k_update_gold_solution(const A_t &A, const B_t &B, C_t &C, Triangle /* t */) 27 | { 28 | using size_type = std::experimental::extents<>::size_type; 29 | constexpr bool low = std::is_same_v; 30 | const auto size = A.extent(1); 31 | for (size_type j = 0; j < size; ++j) { 32 | const size_type i1 = low ? size : j + 1; 33 | for (size_type i = low ? j : 0; i < i1; ++i) { 34 | for (size_type k = 0; k < size; ++k) { 35 | C(i, j) += A(i, k) * B(j, k) + B(i, k) * A(j, k); 36 | } 37 | } 38 | } 39 | } 40 | 41 | template 42 | void test_kokkos_symmetric_matrix_rank2k_update_impl(const A_t &A, const B_t &B, C_t &C, Triangle t) 43 | { 44 | const auto get_gold = [&](auto C_gold) { 45 | symmetric_matrix_rank_2k_update_gold_solution(A, B, C_gold, t); 46 | }; 47 | const auto compute = [&]() { 48 | std::experimental::linalg::symmetric_matrix_rank_2k_update( 49 | KokkosKernelsSTD::kokkos_exec<>(), A, B, C, t); 50 | }; 51 | const auto tol = tolerance(1e-20, 1e-10f); 52 | test_op_CAB(A, B, C, tol, get_gold, compute); 53 | } 54 | 55 | } // anonymous namespace 56 | 57 | #define DEFINE_TESTS(blas_val_type) \ 58 | TEST_F(blas2_signed_##blas_val_type##_fixture, \ 59 | kokkos_symmetric_matrix_rank2k_update) { \ 60 | using val_t = typename blas2_signed_##blas_val_type##_fixture::value_type; \ 61 | run_checked_tests("kokkos_", "symmetric_matrix_rank2k_update", "", \ 62 | #blas_val_type, [&]() { \ 63 | \ 64 | test_kokkos_symmetric_matrix_rank2k_update_impl(A_sym_e0, A_sym_e0, A_hem_e0, \ 65 | std::experimental::linalg::lower_triangle); \ 66 | test_kokkos_symmetric_matrix_rank2k_update_impl(A_sym_e0, A_sym_e0, A_hem_e0, \ 67 | std::experimental::linalg::upper_triangle); \ 68 | \ 69 | }); \ 70 | } 71 | 72 | DEFINE_TESTS(double) 73 | DEFINE_TESTS(float) 74 | DEFINE_TESTS(complex_double) 75 | -------------------------------------------------------------------------------- /tests/kokkos-based/symmetric_matrix_rank_k_update_kokkos.cpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | #include "gtest_fixtures.hpp" 19 | #include "helpers.hpp" 20 | 21 | namespace{ 22 | 23 | using namespace kokkostesting; 24 | 25 | template 26 | void symmetric_matrix_rank_k_update_gold_solution(AlphaType alpha, 27 | const A_t &A, C_t &C, Triangle /* t */) 28 | { 29 | using size_type = std::experimental::extents<>::size_type; 30 | constexpr bool low = std::is_same_v; 31 | const auto C_ext = C.extent(0); 32 | const auto A_ext1 = A.extent(1); 33 | for (size_type j = 0; j < C_ext; ++j) { 34 | const size_type i1 = low ? C_ext : j + 1; 35 | for (size_type i = low ? j : 0; i < i1; ++i) { 36 | for (size_type k = 0; k < A_ext1; ++k) { 37 | C(i, j) += alpha * A(i, k) * A(j, k); 38 | } 39 | } 40 | } 41 | } 42 | 43 | template 44 | void test_kokkos_symmetric_matrix_rank_k_update_impl(const A_t A, C_t C, Triangle t) 45 | { 46 | const auto alpha = static_cast(2.0); // check if alpha is applied 47 | const auto get_gold = [&](auto C_gold) { 48 | symmetric_matrix_rank_k_update_gold_solution(alpha, A, C_gold, t); 49 | }; 50 | const auto compute = [&]() { 51 | std::experimental::linalg::symmetric_matrix_rank_k_update( 52 | KokkosKernelsSTD::kokkos_exec<>(), alpha, A, C, t); 53 | }; 54 | const auto tol = tolerance(1e-20, 1e-10f); 55 | test_op_CA(A, C, tol, get_gold, compute); 56 | } 57 | 58 | } // anonymous namespace 59 | 60 | #define DEFINE_TESTS(blas_val_type) \ 61 | TEST_F(blas2_signed_##blas_val_type##_fixture, \ 62 | kokkos_symmetric_matrix_rank_k_update) { \ 63 | using val_t = typename blas2_signed_##blas_val_type##_fixture::value_type; \ 64 | run_checked_tests("kokkos_", "symmetric_matrix_rank_k_update", "", \ 65 | #blas_val_type, [&]() { \ 66 | \ 67 | test_kokkos_symmetric_matrix_rank_k_update_impl(A_sym_e0, A_hem_e0, \ 68 | std::experimental::linalg::lower_triangle); \ 69 | test_kokkos_symmetric_matrix_rank_k_update_impl(A_sym_e0, A_hem_e0, \ 70 | std::experimental::linalg::upper_triangle); \ 71 | \ 72 | }); \ 73 | } 74 | 75 | DEFINE_TESTS(double) 76 | DEFINE_TESTS(float) 77 | DEFINE_TESTS(complex_double) 78 | -------------------------------------------------------------------------------- /tests/kokkos-based/test.cmake: -------------------------------------------------------------------------------- 1 | include(FindUnixCommands) 2 | 3 | # run test executable 4 | execute_process( 5 | COMMAND ${EXE_NAME} 6 | RESULT_VARIABLE RES_A 7 | OUTPUT_FILE ${LOG_FILE}) 8 | 9 | # first check that numerically the test passes 10 | if(RES_A) 11 | message(FATAL_ERROR "numerical test failed") 12 | else() 13 | message("numerical test succeeded") 14 | endif() 15 | 16 | # just checking the numerics is not enough 17 | # because we need to ensure that this result 18 | # comes from the kokkos impl 19 | # so we check that the proper string is found 20 | # which signals that the correct Kokkos impl was found/called 21 | set(CMD "grep -R '${TEST_STRING_FIND}' ${LOG_FILE} > /dev/null") 22 | execute_process(COMMAND ${BASH} -c ${CMD} RESULT_VARIABLE RES_B) 23 | if(RES_B) 24 | message(FATAL_ERROR 25 | "test failed: ${ALGO_NAME} did not call the correct Kokkos impl") 26 | else() 27 | message("${ALGO_NAME} called the correct Kokkos impl") 28 | endif() 29 | -------------------------------------------------------------------------------- /tests/kokkos-based/triangular_matrix_vector_solve.cpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | #include "gtest_fixtures.hpp" 19 | #include "helpers.hpp" 20 | 21 | namespace{ 22 | 23 | using namespace kokkostesting; 24 | 25 | template 29 | void triangular_matrix_vector_solve_gold_solution(A_t A, Triangle t, DiagonalStorage d, x_t x) 30 | { 31 | using size_type = typename std::experimental::extents<>::size_type; 32 | constexpr bool lower_triangle = std::is_same_v< 33 | Triangle, std::experimental::linalg::lower_triangle_t>; 34 | constexpr bool explicit_diagonal = std::is_same_v< 35 | DiagonalStorage, std::experimental::linalg::explicit_diagonal_t>; 36 | 37 | const size_type ext0 = A.extent(0); 38 | const size_type ext1 = A.extent(1); 39 | 40 | for (size_type ii = 0; ii < ext0; ++ii) { 41 | const size_type i = lower_triangle ? ii : ext0 - 1 - ii; 42 | const size_type j0 = lower_triangle ? 0 : i + 1; 43 | const size_type j1 = lower_triangle ? i : ext1; 44 | for (size_type j = j0; j < j1; ++j) { 45 | x(i) -= A(i, j) * x(j); 46 | } 47 | if constexpr (explicit_diagonal) { 48 | x(i) /= A(i, i); 49 | } 50 | } 51 | } 52 | 53 | template 58 | void test_triangular_matrix_vector_solve_impl(A_t A, b_t b, x_t x0, Triangle t, DiagonalStorage d) 59 | { 60 | // copy x to leave original fixture intact 61 | auto x_data = create_stdvector_and_copy(x0); 62 | auto x = make_mdspan(x_data); 63 | 64 | const auto get_gold = [&](auto x_gold) { 65 | std::experimental::linalg::copy(b, x_gold); 66 | triangular_matrix_vector_solve_gold_solution(A, t, d, x_gold); 67 | }; 68 | const auto compute = [&]() { 69 | std::experimental::linalg::triangular_matrix_vector_solve( 70 | KokkosKernelsSTD::kokkos_exec<>(), A, t, d, b, x); 71 | }; 72 | const auto tol = tolerance(1e-12, 1e-4f); 73 | test_op_xAb(A, b, x, tol, get_gold, compute); 74 | } 75 | 76 | } // anonymous namespace 77 | 78 | #define DEFINE_TESTS(blas_val_type) \ 79 | TEST_F(blas2_signed_##blas_val_type##_fixture, \ 80 | kokkos_triangular_matrix_vector_solve) { \ 81 | using val_t = typename blas2_signed_##blas_val_type##_fixture::value_type; \ 82 | run_checked_tests("kokkos_", "triangular_matrix_vector_solve", "", \ 83 | #blas_val_type, [&]() { \ 84 | \ 85 | test_triangular_matrix_vector_solve_impl(A_sym_e0, x_e0, x_e0, \ 86 | std::experimental::linalg::lower_triangle, \ 87 | std::experimental::linalg::implicit_unit_diagonal); \ 88 | test_triangular_matrix_vector_solve_impl(A_sym_e0, x_e0, x_e0, \ 89 | std::experimental::linalg::upper_triangle, \ 90 | std::experimental::linalg::explicit_diagonal); \ 91 | \ 92 | }); \ 93 | } 94 | 95 | DEFINE_TESTS(double) 96 | DEFINE_TESTS(float) 97 | DEFINE_TESTS(complex_double) 98 | -------------------------------------------------------------------------------- /tests/kokkos-based/vector_abs_sum_kokkos.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "gtest_fixtures.hpp" 3 | #include "helpers.hpp" 4 | 5 | namespace{ 6 | 7 | template 8 | auto vector_abs_sum_gold_solution(x_t x, 9 | T initValue, 10 | bool useInit) 11 | { 12 | using std::abs; 13 | 14 | T result = {}; 15 | for (std::size_t i=0; i 24 | void kokkos_blas1_vector_abs_sum_test_impl(x_t x, 25 | T initValue, 26 | bool useInit) 27 | { 28 | 29 | namespace stdla = std::experimental::linalg; 30 | 31 | using value_type = typename x_t::value_type; 32 | const std::size_t extent = x.extent(0); 33 | 34 | // copy x to verify it is not changed after kernel 35 | auto x_preKernel = kokkostesting::create_stdvector_and_copy(x); 36 | 37 | // compute gold 38 | const T gold = vector_abs_sum_gold_solution(x, initValue, useInit); 39 | 40 | T result = {}; 41 | if (useInit){ 42 | result = stdla::vector_abs_sum(KokkosKernelsSTD::kokkos_exec<>(), 43 | x, initValue); 44 | }else{ 45 | result = stdla::vector_abs_sum(KokkosKernelsSTD::kokkos_exec<>(), 46 | x); 47 | } 48 | 49 | if constexpr(std::is_same_v){ 50 | // cannot use EXPECT_FLOAT_EQ because 51 | // in some cases that fails on third digit or similr 52 | EXPECT_NEAR(result, gold, 1e-2); 53 | } 54 | 55 | if constexpr(std::is_same_v){ 56 | // similarly to float 57 | EXPECT_NEAR(result, gold, 1e-9); 58 | } 59 | 60 | if constexpr(std::is_same_v>){ 61 | EXPECT_NEAR(result, gold, 1e-9); 62 | } 63 | 64 | // x,y should not change after kernel 65 | for (std::size_t i=0; i(0), false); 76 | } 77 | 78 | TEST_F(blas1_signed_float_fixture, kokkos_vector_abs_sum_initvalue) 79 | { 80 | kokkos_blas1_vector_abs_sum_test_impl(x, static_cast(3), true); 81 | } 82 | 83 | TEST_F(blas1_signed_double_fixture, kokkos_vector_abs_sum_noinitvalue) 84 | { 85 | kokkos_blas1_vector_abs_sum_test_impl(x, static_cast(0), false); 86 | } 87 | 88 | TEST_F(blas1_signed_double_fixture, kokkos_vector_abs_sum_initvalue) 89 | { 90 | kokkos_blas1_vector_abs_sum_test_impl(x, static_cast(5), true); 91 | } 92 | 93 | TEST_F(blas1_signed_complex_double_fixture, kokkos_vector_abs_sum_noinitvalue) 94 | { 95 | using kc_t = Kokkos::complex; 96 | using stdc_t = value_type; 97 | if constexpr(alignof(value_type) == alignof(kc_t)){ 98 | // for complex values, abs returns magnitude 99 | const double init = 0.; 100 | kokkos_blas1_vector_abs_sum_test_impl(x, init, false); 101 | } 102 | } 103 | 104 | TEST_F(blas1_signed_complex_double_fixture, kokkos_vector_abs_sum_initvalue) 105 | { 106 | using kc_t = Kokkos::complex; 107 | using stdc_t = value_type; 108 | if constexpr(alignof(value_type) == alignof(kc_t)){ 109 | // for complex values, abs returns magnitude 110 | const double init = -2.; 111 | kokkos_blas1_vector_abs_sum_test_impl(x, init, true); 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /tests/kokkos-based/vector_norm2_kokkos.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "gtest_fixtures.hpp" 3 | #include "helpers.hpp" 4 | #include 5 | 6 | namespace 7 | { 8 | 9 | template 10 | T vector_two_norm_gold_solution(x_t x, T initValue, bool useInit) 11 | { 12 | using std::abs; 13 | using value_type = typename x_t::value_type; 14 | 15 | T result = {}; 16 | for (std::size_t i=0; i>){ 18 | result += std::norm(x(i)); 19 | } 20 | else{ 21 | result += x(i) * x(i); 22 | } 23 | } 24 | 25 | using std::sqrt; 26 | if (useInit){ 27 | return sqrt(initValue + result); 28 | } 29 | else{ 30 | return sqrt(result); 31 | } 32 | } 33 | 34 | template 35 | void kokkos_blas1_vector_two_norm_test_impl(x_t x, T initValue, bool useInit) 36 | { 37 | namespace stdla = std::experimental::linalg; 38 | 39 | using value_type = typename x_t::value_type; 40 | const std::size_t extent = x.extent(0); 41 | 42 | // copy x to verify they are not changed after kernel 43 | auto x_preKernel = kokkostesting::create_stdvector_and_copy(x); 44 | 45 | const T gold = vector_two_norm_gold_solution(x, initValue, useInit); 46 | 47 | T result = {}; 48 | if (useInit){ 49 | result = stdla::vector_two_norm(KokkosKernelsSTD::kokkos_exec<>(), 50 | x, initValue); 51 | }else{ 52 | result = stdla::vector_two_norm(KokkosKernelsSTD::kokkos_exec<>(), 53 | x); 54 | } 55 | 56 | if constexpr(std::is_same_v){ 57 | EXPECT_NEAR(result, gold, 1e-2); 58 | } 59 | 60 | if constexpr(std::is_same_v){ 61 | EXPECT_NEAR(result, gold, 1e-9); 62 | } 63 | 64 | if constexpr(std::is_same_v>){ 65 | EXPECT_NEAR(result, gold, 1e-9); 66 | } 67 | 68 | // x should not change after kernel 69 | for (std::size_t i=0; i(0), false); 79 | } 80 | 81 | TEST_F(blas1_signed_float_fixture, kokkos_vector_two_norm_initvalue) 82 | { 83 | kokkos_blas1_vector_two_norm_test_impl(x, static_cast(3), true); 84 | } 85 | 86 | TEST_F(blas1_signed_double_fixture, kokkos_vector_two_norm_noinitvalue) 87 | { 88 | kokkos_blas1_vector_two_norm_test_impl(x, static_cast(0), false); 89 | } 90 | 91 | TEST_F(blas1_signed_double_fixture, kokkos_vector_two_norm_initvalue) 92 | { 93 | kokkos_blas1_vector_two_norm_test_impl(x, static_cast(5), true); 94 | } 95 | 96 | TEST_F(blas1_signed_complex_double_fixture, kokkos_vector_two_norm_noinitvalue) 97 | { 98 | namespace stdla = std::experimental::linalg; 99 | using kc_t = Kokkos::complex; 100 | using stdc_t = value_type; 101 | if constexpr (alignof(value_type) == alignof(kc_t)){ 102 | kokkos_blas1_vector_two_norm_test_impl(x, static_cast(0), false); 103 | } 104 | } 105 | 106 | TEST_F(blas1_signed_complex_double_fixture, kokkos_vector_two_norm_initvalue) 107 | { 108 | namespace stdla = std::experimental::linalg; 109 | using kc_t = Kokkos::complex; 110 | using stdc_t = value_type; 111 | if constexpr (alignof(value_type) == alignof(kc_t)){ 112 | kokkos_blas1_vector_two_norm_test_impl(x, static_cast(5), true); 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /tests/kokkos-based/vector_sum_of_squares_kokkos.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "gtest_fixtures.hpp" 3 | #include "helpers.hpp" 4 | #include 5 | 6 | namespace 7 | { 8 | 9 | template 10 | std::experimental::linalg::sum_of_squares_result 11 | vector_sum_of_squares_gold_solution(x_t x, 12 | std::experimental::linalg::sum_of_squares_result init) 13 | { 14 | using std::abs; 15 | 16 | T scale = init.scaling_factor; 17 | for (std::size_t i = 0; i < x.extent(0); ++i) { 18 | scale = std::max(scale, abs(x(i))); 19 | } 20 | 21 | T ssq = (init.scaling_factor*init.scaling_factor*init.scaled_sum_of_squares)/(scale*scale); 22 | T s=0.; 23 | for (std::size_t i = 0; i < x.extent(0); ++i) { 24 | const auto absxi = abs(x(i)); 25 | const auto quotient = absxi/scale; 26 | ssq = ssq + quotient * quotient; 27 | s += absxi*absxi; 28 | } 29 | 30 | std::experimental::linalg::sum_of_squares_result result; 31 | result.scaled_sum_of_squares = ssq; 32 | result.scaling_factor = scale; 33 | 34 | // verify that things are consistent according to definition 35 | // scaled_sum_of_squares: is a value such that 36 | // scaling_factor^2 * scaled_sum_of_squares equals the 37 | // sum of squares of abs(x[i]) plus init.scaling_factor^2 * init.scaled_sum_of_squares. 38 | // 39 | const auto lhs = scale*scale*ssq; 40 | const auto rhs = s+init.scaling_factor*init.scaling_factor*init.scaled_sum_of_squares; 41 | std::cout << "Gold check : " << lhs << " " << rhs << std::endl; 42 | if constexpr(std::is_same_v){ 43 | EXPECT_NEAR(lhs, rhs, 1e-2); 44 | } 45 | if constexpr(std::is_same_v){ 46 | EXPECT_NEAR(lhs, rhs, 1e-9); 47 | } 48 | 49 | return result; 50 | } 51 | 52 | template 53 | void kokkos_blas1_vector_sum_of_squares_test_impl(x_t x, 54 | std::experimental::linalg::sum_of_squares_result initValue) 55 | { 56 | namespace stdla = std::experimental::linalg; 57 | 58 | using value_type = typename x_t::value_type; 59 | const std::size_t extent = x.extent(0); 60 | 61 | // copy x to verify they are not changed after kernel 62 | auto x_preKernel = kokkostesting::create_stdvector_and_copy(x); 63 | 64 | const auto gold = vector_sum_of_squares_gold_solution(x, initValue); 65 | auto result = stdla::vector_sum_of_squares(KokkosKernelsSTD::kokkos_exec<>(), 66 | x, initValue); 67 | 68 | if constexpr(std::is_same_v) 69 | { 70 | EXPECT_NEAR(result.scaled_sum_of_squares, gold.scaled_sum_of_squares, 1e-3); 71 | EXPECT_NEAR(result.scaling_factor, gold.scaling_factor, 1e-3); 72 | } 73 | 74 | if constexpr(std::is_same_v) 75 | { 76 | EXPECT_NEAR(result.scaled_sum_of_squares, gold.scaled_sum_of_squares, 1e-9); 77 | EXPECT_NEAR(result.scaling_factor, gold.scaling_factor, 1e-9); 78 | } 79 | 80 | // x should not change after kernel 81 | for (std::size_t i=0; i init_value{2.5f, 1.2f}; 92 | kokkos_blas1_vector_sum_of_squares_test_impl(x, init_value); 93 | } 94 | 95 | TEST_F(blas1_signed_double_fixture, kokkos_vector_sum_of_squares) 96 | { 97 | namespace stdla = std::experimental::linalg; 98 | stdla::sum_of_squares_result init_value{3.0, 1.2}; 99 | kokkos_blas1_vector_sum_of_squares_test_impl(x, init_value); 100 | } 101 | 102 | TEST_F(blas1_signed_complex_double_fixture, kokkos_vector_sum_of_squares) 103 | { 104 | namespace stdla = std::experimental::linalg; 105 | using kc_t = Kokkos::complex; 106 | using stdc_t = value_type; 107 | if constexpr (alignof(value_type) == alignof(kc_t)){ 108 | stdla::sum_of_squares_result init_value{2.5, 1.2}; 109 | kokkos_blas1_vector_sum_of_squares_test_impl(x, init_value); 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /tests/native/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | macro(linalg_add_test name) 3 | add_executable(${name} ${name}.cpp) 4 | if(BLAS_FOUND) 5 | target_link_libraries(${name} linalg GTest::GTest GTest::Main ${BLAS_LIBRARIES}) 6 | else() 7 | # BLAS_LIBRARIES is literally "FALSE" if the BLAS was not found. 8 | # Linking against that causes linker errors involving "FALSE.lib". 9 | # Thus, we exclude BLAS_LIBRARIES completely if the BLAS was not found. 10 | target_link_libraries(${name} linalg GTest::GTest GTest::Main) 11 | endif() 12 | add_test(${name} ${name}) 13 | endmacro() 14 | 15 | linalg_add_test(abs_if_needed) 16 | linalg_add_test(abs_sum) 17 | linalg_add_test(add) 18 | linalg_add_test(conj_if_needed) 19 | linalg_add_test(conjugate_transposed) 20 | linalg_add_test(conjugated) 21 | linalg_add_test(copy) 22 | linalg_add_test(dot) 23 | linalg_add_test(gemm) 24 | linalg_add_test(gemv) 25 | linalg_add_test(gemv_no_ambig) 26 | linalg_add_test(ger) 27 | linalg_add_test(gerc) 28 | linalg_add_test(givens) 29 | linalg_add_test(hemm) 30 | linalg_add_test(her) 31 | linalg_add_test(her2) 32 | linalg_add_test(herk) 33 | linalg_add_test(her2k) 34 | linalg_add_test(idx_abs_max) 35 | linalg_add_test(imag_if_needed) 36 | linalg_add_test(matrix_inf_norm) 37 | linalg_add_test(matrix_one_norm) 38 | linalg_add_test(mixed_accessors) 39 | linalg_add_test(norm2) 40 | linalg_add_test(proxy_refs) 41 | linalg_add_test(real_if_needed) 42 | linalg_add_test(scale) 43 | linalg_add_test(scaled) 44 | linalg_add_test(swap) 45 | linalg_add_test(symm) 46 | linalg_add_test(syr) 47 | linalg_add_test(syr2) 48 | linalg_add_test(syrk) 49 | linalg_add_test(syr2k) 50 | linalg_add_test(transposed) 51 | linalg_add_test(trmm) 52 | linalg_add_test(trmv) 53 | linalg_add_test(trsm) 54 | -------------------------------------------------------------------------------- /tests/native/add.cpp: -------------------------------------------------------------------------------- 1 | #include "./gtest_fixtures.hpp" 2 | 3 | namespace { 4 | using LinearAlgebra::add; 5 | 6 | TEST(BLAS1_add, vector_double) 7 | { 8 | using scalar_t = double; 9 | using vector_t = mdspan>; 10 | 11 | constexpr std::size_t vectorSize(5); 12 | constexpr std::size_t storageSize = std::size_t(3) * vectorSize; 13 | std::vector storage(storageSize); 14 | 15 | vector_t x(storage.data(), vectorSize); 16 | vector_t y(storage.data() + vectorSize, vectorSize); 17 | vector_t z(storage.data() + 2*vectorSize, vectorSize); 18 | 19 | for (std::size_t k = 0; k < vectorSize; ++k) { 20 | const scalar_t x_k = scalar_t (k) + 1.0; 21 | const scalar_t y_k = scalar_t (k) + 2.0; 22 | x(k) = x_k; 23 | y(k) = y_k; 24 | z(k) = 0.0; 25 | } 26 | 27 | add(x, y, z); 28 | for (std::size_t k = 0; k < vectorSize; ++k) { 29 | const scalar_t x_k = scalar_t (k) + 1.0; 30 | const scalar_t y_k = scalar_t (k) + 2.0; 31 | // Make sure the function didn't modify the input. 32 | EXPECT_EQ( x(k), x_k ); 33 | EXPECT_EQ( y(k), y_k ); 34 | EXPECT_EQ( z(k), x_k + y_k ); // check the output 35 | } 36 | } 37 | 38 | TEST(BLAS1_add, vector_complex_double) 39 | { 40 | using real_t = double; 41 | using scalar_t = std::complex; 42 | using vector_t = mdspan>; 43 | 44 | constexpr std::size_t vectorSize(5); 45 | constexpr std::size_t storageSize = std::size_t(3) * vectorSize; 46 | std::vector storage(storageSize); 47 | 48 | vector_t x(storage.data(), vectorSize); 49 | vector_t y(storage.data() + vectorSize, vectorSize); 50 | vector_t z(storage.data() + 2*vectorSize, vectorSize); 51 | 52 | for (std::size_t k = 0; k < vectorSize; ++k) { 53 | const scalar_t x_k(real_t(k) + 4.0, -real_t(k) - 1.0); 54 | const scalar_t y_k(real_t(k) + 5.0, -real_t(k) - 2.0); 55 | x(k) = x_k; 56 | y(k) = y_k; 57 | z(k) = scalar_t(0.0, 0.0); 58 | } 59 | 60 | add(x, y, z); 61 | for (std::size_t k = 0; k < vectorSize; ++k) { 62 | const scalar_t x_k(real_t(k) + 4.0, -real_t(k) - 1.0); 63 | const scalar_t y_k(real_t(k) + 5.0, -real_t(k) - 2.0); 64 | // Make sure the function didn't modify the input. 65 | EXPECT_EQ( x(k), x_k ); 66 | EXPECT_EQ( y(k), y_k ); 67 | EXPECT_EQ( z(k), x_k + y_k ); // check the output 68 | } 69 | } 70 | 71 | TEST(BLAS1_add, matrix_double) 72 | { 73 | using scalar_t = double; 74 | constexpr std::size_t numRows(5); 75 | constexpr std::size_t numCols(6); 76 | constexpr std::size_t matrixSize = numRows * numCols; 77 | std::array A_storage; 78 | std::array B_storage; 79 | std::array C_storage; 80 | 81 | using matrix_t = mdspan>; 82 | matrix_t A(A_storage.data()); 83 | matrix_t B(B_storage.data()); 84 | matrix_t C(C_storage.data()); 85 | 86 | for (std::size_t c = 0; c < numCols; ++c) { 87 | for (std::size_t r = 0; r < numRows; ++r) { 88 | const scalar_t A_rc = scalar_t(c) + scalar_t(numCols) * scalar_t(r); 89 | const scalar_t B_rc = scalar_t(2.0) * A_rc; 90 | A(r,c) = A_rc; 91 | B(r,c) = B_rc; 92 | C(r,c) = scalar_t{}; 93 | } 94 | } 95 | add(A, B, C); 96 | for (std::size_t c = 0; c < numCols; ++c) { 97 | for (std::size_t r = 0; r < numRows; ++r) { 98 | const scalar_t A_rc = scalar_t(c) + scalar_t(numCols) * scalar_t(r); 99 | const scalar_t B_rc = scalar_t(2.0) * A_rc; 100 | // Make sure the function didn't modify the input. 101 | EXPECT_EQ( A(r,c), A_rc ); 102 | EXPECT_EQ( B(r,c), B_rc ); 103 | EXPECT_EQ( C(r,c), A_rc + B_rc ); // check the output 104 | } 105 | } 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /tests/native/conj_if_needed.cpp: -------------------------------------------------------------------------------- 1 | #include "./my_numbers.hpp" 2 | 3 | namespace { 4 | template 5 | void test_real_conj_if_needed() 6 | { 7 | using LinearAlgebra::impl::conj_if_needed; 8 | 9 | Real z(2.0); 10 | const Real z_conj_expected(2.0); 11 | 12 | auto z_conj = conj_if_needed(z); 13 | static_assert(std::is_same_v); 14 | EXPECT_EQ(z_conj, z_conj_expected); 15 | } 16 | 17 | template 18 | void test_any_complex_conj_if_needed() 19 | { 20 | using LinearAlgebra::impl::conj_if_needed; 21 | 22 | Complex z(2.0, -3.0); 23 | Complex z_orig(2.0, -3.0); 24 | const Complex z_conj_expected(2.0, 3.0); 25 | 26 | auto z_conj = conj_if_needed(z); 27 | static_assert(std::is_same_v); 28 | EXPECT_EQ(z_conj, z_conj_expected); 29 | EXPECT_EQ(z, z_orig); // conj didn't change its input 30 | } 31 | 32 | template 33 | void test_std_complex_conj_if_needed() 34 | { 35 | test_any_complex_conj_if_needed>(); 36 | } 37 | 38 | void test_MyComplex_conj_if_needed() 39 | { 40 | test_any_complex_conj_if_needed(); 41 | } 42 | 43 | TEST(test_numbers, conj_if_needed) 44 | { 45 | test_std_complex_conj_if_needed(); 46 | test_std_complex_conj_if_needed(); 47 | test_std_complex_conj_if_needed(); 48 | 49 | test_MyComplex_conj_if_needed(); 50 | 51 | test_real_conj_if_needed(); 52 | test_real_conj_if_needed(); 53 | test_real_conj_if_needed(); 54 | 55 | test_real_conj_if_needed(); 56 | test_real_conj_if_needed(); 57 | test_real_conj_if_needed(); 58 | test_real_conj_if_needed(); 59 | } 60 | } // end anonymous namespace 61 | -------------------------------------------------------------------------------- /tests/native/conjugate_transposed.cpp: -------------------------------------------------------------------------------- 1 | #include "./gtest_fixtures.hpp" 2 | 3 | namespace { 4 | using LinearAlgebra::conjugate_transposed; 5 | 6 | TEST(conjugate_transposed, mdspan_complex_double) 7 | { 8 | using std::conj; 9 | using real_t = double; 10 | using scalar_t = std::complex; 11 | using matrix_dynamic_t = 12 | mdspan>; 13 | constexpr std::size_t dim = 5; 14 | using matrix_static_t = 15 | mdspan>; 16 | 17 | constexpr std::size_t storageSize = std::size_t(dim*dim); 18 | std::vector A_storage (storageSize); 19 | std::vector B_storage (storageSize); 20 | 21 | matrix_dynamic_t A (A_storage.data (), dim, dim); 22 | matrix_static_t B (B_storage.data ()); 23 | 24 | for (std::size_t i = 0; i < dim; ++i) { 25 | for (std::size_t j = 0; j < dim; ++j) { 26 | const real_t i_val_re (real_t(i) + 1.0); 27 | const scalar_t i_val (i_val_re, i_val_re); 28 | const real_t j_val_re = real_t(j) + 1.0; 29 | const scalar_t j_val (j_val_re, j_val_re); 30 | const scalar_t val = i_val + real_t(dim) * j_val; 31 | 32 | A(i,j) = val; 33 | B(i,j) = -val; 34 | } 35 | } 36 | 37 | auto A_h = conjugate_transposed (A); 38 | auto B_h = conjugate_transposed (B); 39 | 40 | for (std::size_t i = 0; i < dim; ++i) { 41 | for (std::size_t j = 0; j < dim; ++j) { 42 | const real_t i_val_re (real_t(i) + 1.0); 43 | const scalar_t i_val (i_val_re, i_val_re); 44 | const real_t j_val_re = real_t(j) + 1.0; 45 | const scalar_t j_val (j_val_re, j_val_re); 46 | const scalar_t val = i_val + real_t(dim) * j_val; 47 | 48 | EXPECT_EQ( A(i,j), val ); 49 | EXPECT_EQ( B(i,j), -val ); 50 | 51 | EXPECT_EQ( scalar_t(A_h(j,i)), conj(val) ); 52 | EXPECT_EQ( scalar_t(B_h(j,i)), -conj(val) ); 53 | 54 | EXPECT_EQ( scalar_t(A_h(j,i)), conj(A(i,j)) ); 55 | EXPECT_EQ( scalar_t(B_h(j,i)), conj(B(i,j)) ); 56 | } 57 | } 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /tests/native/gemv_no_ambig.cpp: -------------------------------------------------------------------------------- 1 | #include "./gtest_fixtures.hpp" 2 | #include 3 | 4 | #ifdef LINALG_HAS_EXECUTION 5 | # include 6 | #endif 7 | 8 | namespace { 9 | 10 | using LinearAlgebra::matrix_vector_product; 11 | using LinearAlgebra::scaled; 12 | 13 | TEST(gemv, no_ambiguity) 14 | { 15 | int N = 40, M = 20; 16 | { 17 | // Create Data 18 | std::vector A_vec(N*M); 19 | std::vector x_vec(M); 20 | std::vector y_vec(N); 21 | 22 | mdspan> A(A_vec.data(), N, M); 23 | mdspan> x(x_vec.data(), M); 24 | mdspan> y(y_vec.data(), N); 25 | for (int i = 0; i < A.extent(0); ++i) { 26 | for (int j = 0; j < A.extent(1); ++j) { 27 | A(i,j) = 100.0 * i + j; 28 | } 29 | } 30 | for(int i = 0; i < x.extent(0); ++i) { 31 | x(i) = 1.0 * i; 32 | } 33 | for(int i = 0; i < y.extent(0); ++i) { 34 | y(i) = -1.0 * i; 35 | } 36 | 37 | matrix_vector_product(A, x, y); 38 | // The following is an ambiguous call unless the implementation 39 | // correctly constraints ExecutionPolicy (the spec would imply 40 | // std::is_execution_policy_v, though implementations might define 41 | // their own custom "execution policies" that cannot satisfy this). 42 | matrix_vector_product( 43 | scaled(2.0, A), x, 44 | scaled(0.5, y), y); 45 | 46 | #ifdef LINALG_HAS_EXECUTION 47 | matrix_vector_product(std::execution::par, 48 | scaled(2.0, A), x, 49 | scaled(0.5, y), y); 50 | #endif 51 | } 52 | } 53 | 54 | } 55 | -------------------------------------------------------------------------------- /tests/native/idx_abs_max.cpp: -------------------------------------------------------------------------------- 1 | #include "./gtest_fixtures.hpp" 2 | 3 | namespace { 4 | 5 | using LinearAlgebra::vector_idx_abs_max; 6 | 7 | TEST_F(unsigned_double_vector, vector_idx_abs_max) 8 | { 9 | constexpr size_t expected(9); 10 | EXPECT_EQ(expected, vector_idx_abs_max(v)); 11 | } 12 | 13 | TEST_F(signed_double_vector, vector_idx_abs_max) 14 | { 15 | constexpr size_t expected(9); 16 | EXPECT_EQ(expected, vector_idx_abs_max(v)); 17 | } 18 | 19 | TEST_F(signed_complex_vector, vector_idx_abs_max) 20 | { 21 | constexpr size_t expected(3); 22 | EXPECT_EQ(expected, vector_idx_abs_max(v)); 23 | } 24 | 25 | TEST(BLAS1_vector_idx_abs_max, trivial_case) 26 | { 27 | constexpr auto expected = std::numeric_limits::max(); 28 | 29 | std::array arr; 30 | using extents_type = extents; 31 | mdspan a(arr.data(),0); 32 | EXPECT_EQ(expected, vector_idx_abs_max(a)); 33 | 34 | using extents_type2 = extents; 35 | mdspan b(arr.data()); 36 | EXPECT_EQ(expected, vector_idx_abs_max(b)); 37 | } 38 | 39 | } // end anonymous namespace 40 | -------------------------------------------------------------------------------- /tests/native/imag_if_needed.cpp: -------------------------------------------------------------------------------- 1 | #include "./my_numbers.hpp" 2 | 3 | namespace { 4 | template 5 | void test_imag_if_needed_complex() 6 | { 7 | using LinearAlgebra::impl::imag_if_needed; 8 | std::complex z{R(3.0), R(4.0)}; 9 | auto z_imag = imag_if_needed(z); 10 | EXPECT_EQ(z_imag, R(4.0)); 11 | static_assert(std::is_same_v); 12 | } 13 | template 14 | void test_imag_if_needed_floating_point() 15 | { 16 | using LinearAlgebra::impl::imag_if_needed; 17 | T x = 9.0; 18 | auto x_imag = imag_if_needed(x); 19 | EXPECT_EQ(x_imag, T(0.0)); 20 | static_assert(std::is_same_v); 21 | } 22 | template 23 | void test_imag_if_needed_integral() 24 | { 25 | using LinearAlgebra::impl::imag_if_needed; 26 | T x = 3; 27 | auto x_imag = imag_if_needed(x); 28 | EXPECT_EQ(x_imag, T(0)); 29 | static_assert(std::is_same_v); 30 | } 31 | 32 | TEST(test_numbers, imag_if_needed) 33 | { 34 | test_imag_if_needed_complex(); 35 | test_imag_if_needed_complex(); 36 | test_imag_if_needed_complex(); 37 | 38 | test_imag_if_needed_floating_point(); 39 | test_imag_if_needed_floating_point(); 40 | test_imag_if_needed_floating_point(); 41 | 42 | test_imag_if_needed_integral(); 43 | test_imag_if_needed_integral(); 44 | test_imag_if_needed_integral(); 45 | test_imag_if_needed_integral(); 46 | test_imag_if_needed_integral(); 47 | test_imag_if_needed_integral(); 48 | test_imag_if_needed_integral(); 49 | test_imag_if_needed_integral(); 50 | 51 | { 52 | using LinearAlgebra::impl::imag_if_needed; 53 | TestLinearAlgebra::MyComplex z{3.0, 4.0}; 54 | auto z_imag = imag_if_needed(z); 55 | EXPECT_EQ(z_imag, 4.0); 56 | static_assert(std::is_same_v); 57 | } 58 | { 59 | using LinearAlgebra::impl::imag_if_needed; 60 | TestLinearAlgebra::MyReal x{3.0}; 61 | auto x_imag = imag_if_needed(x); 62 | EXPECT_EQ(x_imag, TestLinearAlgebra::MyReal{}); 63 | static_assert(std::is_same_v); 64 | } 65 | } 66 | } // end anonymous namespace 67 | -------------------------------------------------------------------------------- /tests/native/my_numbers.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LINALG_TESTS_NATIVE_MY_NUMBERS_HPP 2 | #define LINALG_TESTS_NATIVE_MY_NUMBERS_HPP 3 | 4 | #include "./gtest_fixtures.hpp" 5 | 6 | namespace TestLinearAlgebra { 7 | 8 | class MyReal { 9 | public: 10 | MyReal() = default; 11 | explicit MyReal(double value) : value_(value) {} 12 | double value() const { return value_; } 13 | 14 | friend MyReal abs(MyReal x) { return MyReal{std::abs(x.value())}; } 15 | 16 | friend bool operator==(MyReal x, MyReal y) { 17 | return x.value() == y.value(); 18 | } 19 | 20 | private: 21 | double value_ = 0.0; 22 | }; 23 | 24 | class MyComplex { 25 | private: 26 | double real_ = 0.0; 27 | double imag_ = 0.0; 28 | 29 | public: 30 | MyComplex() = default; 31 | MyComplex(double re, double im) : real_(re), imag_(im) {} 32 | 33 | friend double real(MyComplex z) { 34 | return z.real_; 35 | } 36 | 37 | friend double imag(MyComplex z) { 38 | return z.imag_; 39 | } 40 | 41 | friend double abs(MyComplex z) { 42 | return std::sqrt(z.real_ * z.real_ + z.imag_ * z.imag_); 43 | } 44 | 45 | friend MyComplex conj(MyComplex z) { 46 | return {z.real_, -z.imag_}; 47 | } 48 | 49 | std::complex value() const { 50 | return {real_, imag_}; 51 | } 52 | 53 | friend bool operator==(MyComplex x, MyComplex y) { 54 | return x.value() == y.value(); 55 | } 56 | }; 57 | 58 | } // namespace TestLinearAlgebra 59 | 60 | #endif // LINALG_TESTS_NATIVE_MY_NUMBERS_HPP 61 | -------------------------------------------------------------------------------- /tests/native/real_if_needed.cpp: -------------------------------------------------------------------------------- 1 | #include "./my_numbers.hpp" 2 | 3 | namespace { 4 | template 5 | void test_real_if_needed_complex() 6 | { 7 | using LinearAlgebra::impl::real_if_needed; 8 | std::complex z{R(3.0), R(4.0)}; 9 | auto z_imag = real_if_needed(z); 10 | EXPECT_EQ(z_imag, R(3.0)); 11 | static_assert(std::is_same_v); 12 | } 13 | template 14 | void test_real_if_needed_floating_point() 15 | { 16 | using LinearAlgebra::impl::real_if_needed; 17 | T x = 9.0; 18 | auto x_imag = real_if_needed(x); 19 | EXPECT_EQ(x_imag, T(9.0)); 20 | static_assert(std::is_same_v); 21 | } 22 | template 23 | void test_real_if_needed_integral() 24 | { 25 | using LinearAlgebra::impl::real_if_needed; 26 | T x = 3; 27 | auto x_imag = real_if_needed(x); 28 | EXPECT_EQ(x_imag, T(3)); 29 | static_assert(std::is_same_v); 30 | } 31 | 32 | TEST(test_numbers, real_if_needed) 33 | { 34 | test_real_if_needed_complex(); 35 | test_real_if_needed_complex(); 36 | test_real_if_needed_complex(); 37 | 38 | test_real_if_needed_floating_point(); 39 | test_real_if_needed_floating_point(); 40 | test_real_if_needed_floating_point(); 41 | 42 | test_real_if_needed_integral(); 43 | test_real_if_needed_integral(); 44 | test_real_if_needed_integral(); 45 | test_real_if_needed_integral(); 46 | test_real_if_needed_integral(); 47 | test_real_if_needed_integral(); 48 | test_real_if_needed_integral(); 49 | test_real_if_needed_integral(); 50 | 51 | { 52 | using LinearAlgebra::impl::real_if_needed; 53 | TestLinearAlgebra::MyComplex z{ 3.0, 4.0 }; 54 | auto z_imag = real_if_needed(z); 55 | EXPECT_EQ(z_imag, 3.0); 56 | static_assert(std::is_same_v); 57 | } 58 | { 59 | using LinearAlgebra::impl::real_if_needed; 60 | TestLinearAlgebra::MyReal x{ 3.0 }; 61 | auto x_real = real_if_needed(x); 62 | EXPECT_EQ(x_real, TestLinearAlgebra::MyReal{ 3.0 }); 63 | static_assert(std::is_same_v); 64 | } 65 | } 66 | } // end anonymous namespace 67 | -------------------------------------------------------------------------------- /tests/native/scale.cpp: -------------------------------------------------------------------------------- 1 | #include "./gtest_fixtures.hpp" 2 | 3 | namespace { 4 | using LinearAlgebra::scale; 5 | 6 | TEST(BLAS1_scale, mdspan_double) 7 | { 8 | using scalar_t = double; 9 | using vector_t = mdspan>; 10 | 11 | constexpr std::size_t vectorSize(5); 12 | constexpr std::size_t storageSize = vectorSize; 13 | std::vector storage(storageSize); 14 | 15 | vector_t x(storage.data(), vectorSize); 16 | 17 | { 18 | for (std::size_t k = 0; k < vectorSize; ++k) { 19 | const scalar_t x_k = scalar_t (k) + 1.0; 20 | x(k) = x_k; 21 | } 22 | const scalar_t scaleFactor = 5.0; 23 | scale(scaleFactor, x); 24 | for (std::size_t k = 0; k < vectorSize; ++k) { 25 | const scalar_t x_k = scalar_t (k) + 1.0; 26 | EXPECT_EQ( x(k), scaleFactor * x_k ); 27 | } 28 | } 29 | { 30 | for (std::size_t k = 0; k < vectorSize; ++k) { 31 | const scalar_t x_k = scalar_t (k) + 1.0; 32 | x(k) = x_k; 33 | } 34 | const float scaleFactor = 5.0; 35 | scale(scaleFactor, x); 36 | for (std::size_t k = 0; k < vectorSize; ++k) { 37 | const scalar_t x_k = scalar_t (k) + 1.0; 38 | EXPECT_EQ( x(k), scaleFactor * x_k ); 39 | } 40 | } 41 | } 42 | 43 | TEST(BLAS1_scale, mdspan_complex_double) 44 | { 45 | using real_t = double; 46 | using scalar_t = std::complex; 47 | using vector_t = mdspan>; 48 | 49 | constexpr std::size_t vectorSize(5); 50 | constexpr std::size_t storageSize = vectorSize; 51 | std::vector storage(storageSize); 52 | 53 | vector_t x(storage.data(), vectorSize); 54 | 55 | { 56 | for (std::size_t k = 0; k < vectorSize; ++k) { 57 | const scalar_t x_k(real_t(k) + 4.0, -real_t(k) - 1.0); 58 | x(k) = x_k; 59 | } 60 | const real_t scaleFactor = 5.0; 61 | scale(scaleFactor, x); 62 | for (std::size_t k = 0; k < vectorSize; ++k) { 63 | const scalar_t x_k(real_t(k) + 4.0, -real_t(k) - 1.0); 64 | EXPECT_EQ( x(k), scaleFactor * x_k ); 65 | } 66 | } 67 | { 68 | for (std::size_t k = 0; k < vectorSize; ++k) { 69 | const scalar_t x_k(real_t(k) + 4.0, -real_t(k) - 1.0); 70 | x(k) = x_k; 71 | } 72 | const scalar_t scaleFactor (5.0, -1.0); 73 | scale(scaleFactor, x); 74 | for (std::size_t k = 0; k < vectorSize; ++k) { 75 | const scalar_t x_k(real_t(k) + 4.0, -real_t(k) - 1.0); 76 | EXPECT_EQ( x(k), scaleFactor * x_k ); 77 | } 78 | } 79 | } 80 | } 81 | 82 | // int main() { 83 | // std::cout << "hello world" << std::endl; 84 | // } 85 | -------------------------------------------------------------------------------- /tests/native/swap.cpp: -------------------------------------------------------------------------------- 1 | #include "./gtest_fixtures.hpp" 2 | 3 | namespace { 4 | using LinearAlgebra::swap_elements; 5 | 6 | TEST(BLAS1_swap, mdspan_double) 7 | { 8 | using scalar_t = double; 9 | using vector_t = mdspan>; 10 | 11 | constexpr std::size_t vectorSize(5); 12 | constexpr std::size_t storageSize = std::size_t(2) * vectorSize; 13 | std::vector storage(storageSize); 14 | 15 | vector_t x(storage.data(), vectorSize); 16 | vector_t y(storage.data() + vectorSize, vectorSize); 17 | 18 | for (std::size_t k = 0; k < vectorSize; ++k) { 19 | const scalar_t x_k = scalar_t (k) + 1.0; 20 | const scalar_t y_k = scalar_t (k) + 2.0; 21 | x(k) = x_k; 22 | y(k) = y_k; 23 | } 24 | 25 | swap_elements(x, y); 26 | for (std::size_t k = 0; k < vectorSize; ++k) { 27 | const scalar_t x_k = scalar_t (k) + 1.0; 28 | const scalar_t y_k = scalar_t (k) + 2.0; 29 | EXPECT_EQ( x(k), y_k ); 30 | EXPECT_EQ( y(k), x_k ); 31 | } 32 | } 33 | 34 | TEST(BLAS1_swap, mdspan_complex_double) 35 | { 36 | using real_t = double; 37 | using scalar_t = std::complex; 38 | using vector_t = mdspan>; 39 | 40 | constexpr std::size_t vectorSize(5); 41 | constexpr std::size_t storageSize = std::size_t(2) * vectorSize; 42 | std::vector storage(storageSize); 43 | 44 | vector_t x(storage.data(), vectorSize); 45 | vector_t y(storage.data() + vectorSize, vectorSize); 46 | 47 | for (std::size_t k = 0; k < vectorSize; ++k) { 48 | const scalar_t x_k(real_t(k) + 4.0, -real_t(k) - 1.0); 49 | const scalar_t y_k(real_t(k) + 5.0, -real_t(k) - 2.0); 50 | x(k) = x_k; 51 | y(k) = y_k; 52 | } 53 | 54 | swap_elements(x, y); 55 | for (std::size_t k = 0; k < vectorSize; ++k) { 56 | const scalar_t x_k(real_t(k) + 4.0, -real_t(k) - 1.0); 57 | const scalar_t y_k(real_t(k) + 5.0, -real_t(k) - 2.0); 58 | EXPECT_EQ( x(k), y_k ); 59 | EXPECT_EQ( y(k), x_k ); 60 | } 61 | } 62 | } 63 | 64 | // int main() { 65 | // std::cout << "hello world" << std::endl; 66 | // } 67 | -------------------------------------------------------------------------------- /tests/native/trsm.cpp: -------------------------------------------------------------------------------- 1 | #include "./gtest_fixtures.hpp" 2 | #include 3 | 4 | namespace { 5 | constexpr std::size_t num_rows_A = 3; 6 | constexpr std::size_t num_cols_A = 3; 7 | constexpr double storage_A[] = 8 | {8., 0., 0., 9 | 2., 8., 0., 10 | 1., 2., 8.}; 11 | constexpr std::size_t num_rows_B = 4; 12 | constexpr std::size_t num_cols_B = 3; 13 | constexpr double storage_B[] = 14 | {1., 2., 3., 15 | 4., 5., 6., 16 | 7., 8., 9., 17 | 10., 11., 12.}; 18 | constexpr double storage_B_times_A[] = 19 | {15., 22., 24., 20 | 48., 52., 48., 21 | 81., 82., 72., 22 | 114., 112., 96.}; 23 | constexpr double storage_B_times_inv_A[] = 24 | {0.0390625, 0.15625, 0.375, 25 | 0.296875 , 0.4375 , 0.75, 26 | 0.5546875, 0.71875, 1.125, 27 | 0.8125 , 1. , 1.5}; 28 | 29 | template 30 | void fill_from_layout_right_storage( 31 | mdspan, Layout> out, 32 | const double* const in_storage, 33 | const std::size_t num_rows, 34 | const std::size_t num_cols) 35 | { 36 | mdspan, layout_right> in(in_storage, num_rows, num_cols); 37 | for(std::size_t i = 0; i < num_rows; ++i) { 38 | for(std::size_t j = 0; j < num_cols; ++j) { 39 | out(i,j) = in(i,j); 40 | } 41 | } 42 | } 43 | 44 | // Regression test for https://github.com/kokkos/stdBLAS/issues/244 . 45 | // It will fail if the j loop (mentioned in the bug) counts up instead of down. 46 | template 47 | void test_tsrm_lower_triangular_right_side() 48 | { 49 | std::vector vec_A(num_rows_A * num_cols_A); 50 | std::vector vec_B(num_rows_B * num_cols_B); 51 | const std::size_t num_rows_X = num_rows_B; 52 | const std::size_t num_cols_X = num_cols_B; 53 | std::vector vec_X(num_rows_X * num_cols_X); 54 | 55 | mdspan, Layout> A(vec_A.data(), num_rows_A, num_cols_A); 56 | mdspan, Layout> B_nonconst(vec_B.data(), num_rows_B, num_cols_B); 57 | mdspan, Layout> X(vec_X.data(), num_rows_X, num_cols_X); 58 | 59 | fill_from_layout_right_storage(A, storage_A, num_rows_A, num_cols_A); 60 | fill_from_layout_right_storage(B_nonconst, storage_B, num_rows_B, num_cols_B); 61 | mdspan, Layout> B = B_nonconst; 62 | 63 | using LinearAlgebra::explicit_diagonal; 64 | using LinearAlgebra::lower_triangle; 65 | using LinearAlgebra::right_side; 66 | using LinearAlgebra::triangular_matrix_matrix_solve; 67 | triangular_matrix_matrix_solve(A, lower_triangle, explicit_diagonal, right_side, B, X); 68 | 69 | mdspan, layout_right> 70 | B_times_inv_A(storage_B_times_inv_A, num_rows_B, num_cols_A); 71 | 72 | for(IndexType r = 0; r < IndexType(num_rows_B); ++r) { 73 | for(IndexType c = 0; c < IndexType(num_cols_A); ++c) { 74 | // We chose the values in A and B so that triangular 75 | // solve could compute them without rounding error. 76 | EXPECT_EQ( X(r,c), B_times_inv_A(r,c) ); 77 | } 78 | } 79 | } 80 | 81 | TEST(BLAS3_trsm, double_size_t_layout_right ) 82 | { 83 | test_tsrm_lower_triangular_right_side< ::std::size_t, layout_right >(); 84 | } 85 | 86 | TEST(BLAS3_trsm, double_int_layout_left ) 87 | { 88 | test_tsrm_lower_triangular_right_side< int, layout_left >(); 89 | } 90 | 91 | } // end anonymous namespace 92 | -------------------------------------------------------------------------------- /tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/blas1_add_kk.hpp: -------------------------------------------------------------------------------- 1 | 2 | #ifndef LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_ADD_HPP_ 3 | #define LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_ADD_HPP_ 4 | 5 | #include "signal_kokkos_impl_called.hpp" 6 | #include "static_extent_match.hpp" 7 | 8 | namespace KokkosKernelsSTD { 9 | 10 | namespace add_impl{ 11 | 12 | template 13 | void repeat_impl(F&& f, std::integer_sequence){ 14 | ( f(std::integral_constant{}), ... ); 15 | } 16 | 17 | template 18 | void repeat(F&& f){ 19 | repeat_impl(f, std::make_integer_sequence{}); 20 | } 21 | 22 | } // namespace add_impl 23 | 24 | // keeping this in mind: https://github.com/kokkos/stdBLAS/issues/122 25 | 26 | template::size_type ... ext_x, 29 | class Layout_x, 30 | class ElementType_y, 31 | std::experimental::extents<>::size_type ... ext_y, 32 | class Layout_y, 33 | class ElementType_z, 34 | std::experimental::extents<>::size_type ... ext_z, 35 | class Layout_z> 36 | requires (sizeof...(ext_x) == sizeof...(ext_y) && sizeof...(ext_x) == sizeof...(ext_z)) 37 | void add(kokkos_exec, 38 | std::experimental::mdspan< 39 | ElementType_x, 40 | std::experimental::extents, 41 | Layout_x, 42 | std::experimental::default_accessor 43 | > x, 44 | std::experimental::mdspan< 45 | ElementType_y, 46 | std::experimental::extents, 47 | Layout_y, 48 | std::experimental::default_accessor 49 | > y, 50 | std::experimental::mdspan< 51 | ElementType_z, 52 | std::experimental::extents, 53 | Layout_z, 54 | std::experimental::default_accessor 55 | > z) 56 | { 57 | static_assert(z.rank() <= 2); 58 | 59 | // P1673 preconditions 60 | add_impl::repeat 61 | ([=](int r){ 62 | if ( x.extent(r) != y.extent(r) ){ 63 | throw std::runtime_error("KokkosBlas: add: x.extent(r) != y.extent(r) for r=" 64 | + std::to_string(r)); 65 | } 66 | if ( y.extent(r) != z.extent(r) ){ 67 | throw std::runtime_error("KokkosBlas: add: y.extent(r) != z.extent(r) for r = " 68 | + std::to_string(r)); 69 | } 70 | }); 71 | 72 | // P1673 mandates 73 | add_impl::repeat 74 | ([=](int r){ 75 | Impl::static_extent_match(x.static_extent(r), z.static_extent(r)); 76 | Impl::static_extent_match(y.static_extent(r), z.static_extent(r)); 77 | Impl::static_extent_match(x.static_extent(r), y.static_extent(r)); 78 | }); 79 | 80 | Impl::signal_kokkos_impl_called("add"); 81 | 82 | auto x_view = Impl::mdspan_to_view(x); 83 | auto y_view = Impl::mdspan_to_view(y); 84 | auto z_view = Impl::mdspan_to_view(z); 85 | 86 | const auto alpha = static_cast(1); 87 | const auto beta = static_cast(1); 88 | const auto zero = static_cast(0); 89 | 90 | KokkosBlas::update(alpha, x_view, beta, y_view, zero, z_view); 91 | } 92 | 93 | } 94 | #endif 95 | -------------------------------------------------------------------------------- /tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/blas1_copy_kk.hpp: -------------------------------------------------------------------------------- 1 | 2 | #ifndef LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_COPY_HPP_ 3 | #define LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_COPY_HPP_ 4 | 5 | #include "signal_kokkos_impl_called.hpp" 6 | 7 | namespace KokkosKernelsSTD { 8 | 9 | template::size_type ... ext_x, 12 | class Layout_x, 13 | class ElementType_y, 14 | std::experimental::extents<>::size_type ... ext_y, 15 | class Layout_y> 16 | requires ( (sizeof...(ext_x) == sizeof...(ext_y)) && (sizeof...(ext_x) <=2) ) 17 | void copy(kokkos_exec /*kexe*/, 18 | std::experimental::mdspan< 19 | ElementType_x, 20 | std::experimental::extents, 21 | Layout_x, 22 | std::experimental::default_accessor 23 | > x, 24 | std::experimental::mdspan< 25 | ElementType_y, 26 | std::experimental::extents, 27 | Layout_y, 28 | std::experimental::default_accessor 29 | > y) 30 | { 31 | Impl::signal_kokkos_impl_called("copy"); 32 | 33 | auto x_view = Impl::mdspan_to_view(x); 34 | auto y_view = Impl::mdspan_to_view(y); 35 | auto ex = ExeSpace(); 36 | 37 | if constexpr(std::is_same_v) { 38 | Kokkos::deep_copy(ex, y_view, x_view); 39 | } else { 40 | 41 | if constexpr(x.rank()==1){ 42 | Kokkos::parallel_for(Kokkos::RangePolicy(ex, 0, x_view.extent(0)), 43 | KOKKOS_LAMBDA (const std::size_t i){ 44 | y_view(i) = x_view(i); 45 | }); 46 | } 47 | 48 | else{ 49 | Kokkos::parallel_for(Kokkos::RangePolicy(ex, 0, x_view.extent(0)), 50 | KOKKOS_LAMBDA (const std::size_t i){ 51 | for (std::size_t j=0; j::size_type ext_x, 15 | class Layout_x, 16 | class ElementType_y, 17 | std::experimental::extents<>::size_type ext_y, 18 | class Layout_y, 19 | class Scalar> 20 | Scalar dot(kokkos_exec /*kexe*/, 21 | std::experimental::mdspan< 22 | ElementType_x, 23 | std::experimental::extents, 24 | Layout_x, 25 | std::experimental::default_accessor 26 | > x, 27 | std::experimental::mdspan< 28 | ElementType_y, 29 | std::experimental::extents, 30 | Layout_y, 31 | std::experimental::default_accessor 32 | > y, 33 | Scalar init) 34 | { 35 | // P1673 preconditions 36 | if ( x.extent(0) != y.extent(0) ){ 37 | throw std::runtime_error("KokkosBlas: dot: x.extent(0) != y.extent(0)"); 38 | } 39 | 40 | // P1673 mandates 41 | static_assert(Impl::static_extent_match(x.static_extent(0), y.static_extent(0))); 42 | 43 | Impl::signal_kokkos_impl_called("dot"); 44 | 45 | auto x_view = Impl::mdspan_to_view(x); 46 | auto y_view = Impl::mdspan_to_view(y); 47 | 48 | // This overload is for the default_accessor (see the args above). 49 | // We cannot use KokkosBlas::dot here because it would automatically 50 | // conjugate x for the complex case. 51 | // Since here we have the default accessors, we DO NOT want to conjugate x, 52 | // we just need to compute sum(x*y), even for the complex case. 53 | 54 | // Note that here we cannot use Scalar as accumulation type 55 | // because in the complex case, Scalar == std::complex type but the 56 | // value_type of x_view, y_view is Kokkos::complex, so we need to be careful. 57 | using result_type = decltype(x_view(0)*y_view(0)); 58 | result_type result = {}; 59 | Kokkos::parallel_reduce(Kokkos::RangePolicy(ExeSpace(), 0, x_view.extent(0)), 60 | KOKKOS_LAMBDA (const std::size_t i, result_type & update){ 61 | update += x_view(i)*y_view(i); 62 | }, result); 63 | 64 | // fence not needed because reducing into result 65 | 66 | // this is needed so that it works when Scalar is std::complex 67 | return Scalar(result) + init; 68 | } 69 | 70 | template::size_type ext_x, 73 | class Layout_x, 74 | class ElementType_y, 75 | std::experimental::extents<>::size_type ext_y, 76 | class Layout_y, 77 | class Scalar> 78 | Scalar dot(kokkos_exec, 79 | std::experimental::mdspan< 80 | ElementType_x, 81 | std::experimental::extents, 82 | Layout_x, 83 | std::experimental::linalg::conjugated_accessor< 84 | std::experimental::default_accessor, ElementType_x 85 | > 86 | > x, 87 | std::experimental::mdspan< 88 | ElementType_y, 89 | std::experimental::extents, 90 | Layout_y, 91 | std::experimental::default_accessor 92 | > y, 93 | Scalar init) 94 | { 95 | // P1673 preconditions 96 | if ( x.extent(0) != y.extent(0) ){ 97 | throw std::runtime_error("KokkosBlas: dot: x.extent(0) != y.extent(0)"); 98 | } 99 | 100 | // P1673 mandates 101 | static_assert(Impl::static_extent_match(x.static_extent(0), y.static_extent(0))); 102 | 103 | Impl::signal_kokkos_impl_called("dot"); 104 | 105 | auto x_view = Impl::mdspan_to_view(x); 106 | auto y_view = Impl::mdspan_to_view(y); 107 | 108 | // this overload is for x with conjugated (with nested default) accessor 109 | // so can call KokkosBlas::dot because it automatically conjugates x 110 | // and it is what we want. 111 | return Scalar(KokkosBlas::dot(x_view, y_view)) + init; 112 | } 113 | 114 | } 115 | #endif 116 | -------------------------------------------------------------------------------- /tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/blas1_idx_abs_max_kk.hpp: -------------------------------------------------------------------------------- 1 | 2 | #ifndef LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL_P1673_BITS_KOKKOSKERNELS_IDX_ABS_MAX_HPP_ 3 | #define LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL_P1673_BITS_KOKKOSKERNELS_IDX_ABS_MAX_HPP_ 4 | 5 | #include 6 | #include "signal_kokkos_impl_called.hpp" 7 | 8 | namespace KokkosKernelsSTD { 9 | 10 | // keeping this in mind: https://github.com/kokkos/stdBLAS/issues/122 11 | 12 | template::size_type ext0, 15 | class Layout> 16 | std::experimental::extents<>::size_type 17 | vector_idx_abs_max(kokkos_exec /*kexe*/, 18 | std::experimental::mdspan< 19 | ElementType, 20 | std::experimental::extents, 21 | Layout, 22 | std::experimental::default_accessor> v) 23 | { 24 | Impl::signal_kokkos_impl_called("vector_idx_abs_max"); 25 | 26 | auto v_view = Impl::mdspan_to_view(v); 27 | 28 | // note that -1 here, this is related to: 29 | // https://github.com/kokkos/stdBLAS/issues/114 30 | return KokkosBlas::iamax(v_view) - 1; 31 | } 32 | 33 | } 34 | #endif 35 | -------------------------------------------------------------------------------- /tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/blas1_matrix_frob_norm_kk.hpp: -------------------------------------------------------------------------------- 1 | 2 | #ifndef LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_MATRIX_FROB_NORM_HPP_ 3 | #define LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_MATRIX_FROB_NORM_HPP_ 4 | 5 | #include "signal_kokkos_impl_called.hpp" 6 | 7 | namespace KokkosKernelsSTD { 8 | 9 | template< 10 | class ExeSpace, 11 | class ElementType, 12 | std::experimental::extents<>::size_type numRows, 13 | std::experimental::extents<>::size_type numCols, 14 | class Layout, 15 | class Scalar> 16 | Scalar matrix_frob_norm(kokkos_exec kexe, 17 | std::experimental::mdspan< 18 | ElementType, 19 | std::experimental::extents, 20 | Layout, 21 | std::experimental::default_accessor> A, 22 | Scalar init) 23 | { 24 | 25 | Impl::signal_kokkos_impl_called("matrix_frob_norm"); 26 | 27 | // corner cases 28 | constexpr std::size_t zero = 0; 29 | constexpr std::size_t one = 1; 30 | if (A.extent(0) == zero || A.extent(1) == zero) { 31 | return init; 32 | } 33 | else if(A.extent(0) == one && A.extent(1) == one) { 34 | using std::abs; 35 | return init + abs(A(0, 0)); 36 | } 37 | 38 | auto A_view = Impl::mdspan_to_view(A); 39 | 40 | // here we use an impl similar to the scaled_sum_of_squares 41 | // but we do not call that directly because it would requre 42 | // flattening the matrix whereas this impl works for any layout 43 | 44 | using arithm_traits = Kokkos::Details::ArithTraits; 45 | 46 | std::experimental::linalg::sum_of_squares_result ssqr; 47 | ssqr.scaling_factor = {}; 48 | ssqr.scaled_sum_of_squares = {}; 49 | 50 | Kokkos::Max max_reducer(ssqr.scaling_factor); 51 | Kokkos::parallel_reduce( Kokkos::RangePolicy(ExeSpace(), 0, A_view.extent(0)*A_view.extent(1)), 52 | KOKKOS_LAMBDA (const std::size_t k, Scalar & lmax){ 53 | const auto i = k / A_view.extent(1); 54 | const auto j = k % A_view.extent(1); 55 | const auto val = arithm_traits::abs(A_view(i,j)); 56 | max_reducer.join(lmax, val); 57 | }, 58 | max_reducer); 59 | // no fence needed since reducing into scalar 60 | 61 | Kokkos::parallel_reduce(Kokkos::RangePolicy(ExeSpace(), 0, A_view.extent(0)*A_view.extent(1)), 62 | KOKKOS_LAMBDA (const std::size_t k, Scalar & update){ 63 | const auto i = k / A_view.extent(1); 64 | const auto j = k % A_view.extent(1); 65 | const auto tmp = arithm_traits::abs(A_view(i,j))/ssqr.scaling_factor; 66 | update += tmp*tmp; 67 | }, ssqr.scaled_sum_of_squares); 68 | // no fence needed since reducing into scalar 69 | 70 | return std::sqrt(init + ssqr.scaling_factor * ssqr.scaling_factor * ssqr.scaled_sum_of_squares); 71 | } 72 | 73 | } // end namespace KokkosKernelsSTD 74 | #endif 75 | -------------------------------------------------------------------------------- /tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/blas1_matrix_inf_norm_kk.hpp: -------------------------------------------------------------------------------- 1 | 2 | #ifndef LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_MATRIX_INF_NORM_HPP_ 3 | #define LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_MATRIX_INF_NORM_HPP_ 4 | 5 | #include "signal_kokkos_impl_called.hpp" 6 | 7 | namespace KokkosKernelsSTD { 8 | 9 | template< 10 | class ExeSpace, 11 | class ElementType, 12 | std::experimental::extents<>::size_type numRows, 13 | std::experimental::extents<>::size_type numCols, 14 | class Layout, 15 | class Scalar> 16 | Scalar matrix_inf_norm(kokkos_exec /*kexe*/, 17 | std::experimental::mdspan< 18 | ElementType, 19 | std::experimental::extents, 20 | Layout, 21 | std::experimental::default_accessor> A, 22 | Scalar init) 23 | { 24 | 25 | Impl::signal_kokkos_impl_called("matrix_inf_norm"); 26 | 27 | if (A.extent(0) == 0 || A.extent(1) == 0){ 28 | return init; 29 | } 30 | 31 | auto A_view = Impl::mdspan_to_view(A); 32 | 33 | Scalar result = {}; 34 | Kokkos::Max reducer(result); 35 | Kokkos::parallel_reduce(Kokkos::RangePolicy(ExeSpace(), 0, A_view.extent(0)), 36 | KOKKOS_LAMBDA (const std::size_t i, Scalar & update) 37 | { 38 | using ats = Kokkos::Details::ArithTraits; 39 | Scalar mysum = ats::abs(A_view(i,0)); 40 | for (std::size_t j=1; j::size_type numRows, 13 | std::experimental::extents<>::size_type numCols, 14 | class Layout, 15 | class Scalar> 16 | Scalar matrix_one_norm(kokkos_exec /*kexe*/, 17 | std::experimental::mdspan< 18 | ElementType, 19 | std::experimental::extents, 20 | Layout, 21 | std::experimental::default_accessor> A, 22 | Scalar init) 23 | { 24 | 25 | Impl::signal_kokkos_impl_called("matrix_one_norm"); 26 | 27 | if (A.extent(1) == 0){ 28 | return init; 29 | } 30 | 31 | auto A_view = Impl::mdspan_to_view(A); 32 | 33 | Scalar result = {}; 34 | Kokkos::Max reducer(result); 35 | Kokkos::parallel_reduce(Kokkos::RangePolicy(ExeSpace(), 0, A_view.extent(1)), 36 | KOKKOS_LAMBDA (const std::size_t j, Scalar & update) 37 | { 38 | using ats = Kokkos::Details::ArithTraits; 39 | Scalar mysum = ats::abs(A_view(0,j)); 40 | for (std::size_t i=1; i::size_type ... ext, 17 | class Layout> 18 | requires (sizeof...(ext) <= 2) 19 | void scale(kokkos_exec /*kexe*/, 20 | const Scalar alpha, 21 | std::experimental::mdspan< 22 | ElementType, 23 | std::experimental::extents, 24 | Layout, 25 | std::experimental::default_accessor 26 | > obj) 27 | { 28 | 29 | Impl::signal_kokkos_impl_called("scale"); 30 | auto obj_view = Impl::mdspan_to_view(obj); 31 | KokkosBlas::scal(obj_view, alpha, obj_view); 32 | } 33 | 34 | } 35 | #endif 36 | -------------------------------------------------------------------------------- /tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/blas1_swap_elements_kk.hpp: -------------------------------------------------------------------------------- 1 | 2 | #ifndef LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_SWAP_HPP_ 3 | #define LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_SWAP_HPP_ 4 | 5 | #include 6 | #include "signal_kokkos_impl_called.hpp" 7 | #include "static_extent_match.hpp" 8 | 9 | namespace KokkosKernelsSTD { 10 | 11 | namespace swap_impl{ 12 | 13 | // this is here until we can use kokkos 3.6 which has swap avail 14 | template 15 | requires(std::is_move_assignable::value && std::is_move_constructible::value) 16 | KOKKOS_INLINE_FUNCTION void _my_tmp_swap(T& a, T& b) noexcept 17 | { 18 | T tmp = std::move(a); 19 | a = std::move(b); 20 | b = std::move(tmp); 21 | } 22 | 23 | template 24 | void repeat_impl(F&& f, std::integer_sequence){ 25 | ( f(std::integral_constant{}), ... ); 26 | } 27 | 28 | template 29 | void repeat(F&& f){ 30 | repeat_impl(f, std::make_integer_sequence{}); 31 | } 32 | 33 | } // end namespace swap_impl 34 | 35 | // 36 | // for now, specialize for default_accessor 37 | // https://github.com/kokkos/stdBLAS/issues/122 38 | // 39 | template::size_type ... ext_x, 42 | class Layout_x, 43 | class ElementType_y, 44 | std::experimental::extents<>::size_type ... ext_y, 45 | class Layout_y> 46 | requires (sizeof...(ext_x) == sizeof...(ext_y)) 47 | void swap_elements(kokkos_exec /*kexe*/, 48 | std::experimental::mdspan< 49 | ElementType_x, 50 | std::experimental::extents, 51 | Layout_x, 52 | std::experimental::default_accessor 53 | > x, 54 | std::experimental::mdspan< 55 | ElementType_y, 56 | std::experimental::extents, 57 | Layout_y, 58 | std::experimental::default_accessor 59 | > y) 60 | { 61 | // matching rank already checked via requires above 62 | static_assert(x.rank() <= 2); 63 | 64 | // P1673 preconditions 65 | swap_impl::repeat 66 | ([=](int r){ 67 | if ( x.extent(r) != y.extent(r) ){ 68 | throw std::runtime_error("KokkosBlas: swap_elements: x.extent(r) != y.extent(r) for r=" 69 | + std::to_string(r)); 70 | } 71 | }); 72 | 73 | // P1673 mandates 74 | swap_impl::repeat 75 | ([=](int r){ 76 | Impl::static_extent_match(x.static_extent(r), y.static_extent(r)); 77 | }); 78 | 79 | Impl::signal_kokkos_impl_called("swap_elements"); 80 | 81 | auto x_view = Impl::mdspan_to_view(x); 82 | auto y_view = Impl::mdspan_to_view(y); 83 | 84 | auto ex = ExeSpace(); 85 | if constexpr(x.rank()==1){ 86 | Kokkos::parallel_for(Kokkos::RangePolicy(ex, 0, x_view.extent(0)), 87 | KOKKOS_LAMBDA (std::size_t i){ 88 | swap_impl::_my_tmp_swap(x_view(i), y_view(i)); 89 | }); 90 | } 91 | 92 | else{ 93 | Kokkos::parallel_for(Kokkos::RangePolicy(ex, 0, x_view.extent(0)), 94 | KOKKOS_LAMBDA (std::size_t i){ 95 | for (std::size_t j=0; j::size_type ext0, 14 | class Layout, 15 | class Scalar> 16 | Scalar vector_abs_sum(kokkos_exec /*kexe*/, 17 | std::experimental::mdspan< 18 | ElementType, 19 | std::experimental::extents, 20 | Layout, 21 | std::experimental::default_accessor 22 | > x, 23 | Scalar init) 24 | { 25 | 26 | Impl::signal_kokkos_impl_called("vector_abs_sum"); 27 | 28 | auto x_view = Impl::mdspan_to_view(x); 29 | using arithm_traits = Kokkos::Details::ArithTraits; 30 | Scalar result = {}; 31 | Kokkos::parallel_reduce(Kokkos::RangePolicy(ExeSpace(), 0, x_view.extent(0)), 32 | KOKKOS_LAMBDA (const std::size_t i, Scalar & update) { 33 | update += arithm_traits::abs(x_view(i)); 34 | }, result); 35 | // fence not needed because reducing into result 36 | 37 | return result + init; 38 | } 39 | 40 | } 41 | #endif 42 | -------------------------------------------------------------------------------- /tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/blas1_vector_norm2_kk.hpp: -------------------------------------------------------------------------------- 1 | 2 | #ifndef LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL_P1673_BITS_KOKKOSKERNELS_VEC_NORM2_HPP_ 3 | #define LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL_P1673_BITS_KOKKOSKERNELS_VEC_NORM2_HPP_ 4 | 5 | #include "signal_kokkos_impl_called.hpp" 6 | 7 | namespace KokkosKernelsSTD { 8 | 9 | template::size_type ext, 12 | class Layout, 13 | class Scalar> 14 | Scalar vector_norm2(kokkos_exec /*kexe*/, 15 | std::experimental::mdspan< 16 | ElementType, 17 | std::experimental::extents, 18 | Layout, 19 | std::experimental::default_accessor> x, 20 | Scalar init) 21 | { 22 | 23 | Impl::signal_kokkos_impl_called("vector_norm2"); 24 | 25 | // for the code in stBLAS/examples/kokkos-based, 26 | // when using float, the nrm2 does not work, giving: 27 | // Kokkos result = -36893488147419103232.000000 28 | // return KokkosBlas::nrm2(Impl::mdspan_to_view(x)) + init; 29 | 30 | // the following works 31 | using IPT = Kokkos::Details::InnerProductSpaceTraits; 32 | auto x_view = Impl::mdspan_to_view(x); 33 | Scalar result = {}; 34 | Kokkos::parallel_reduce(Kokkos::RangePolicy(ExeSpace(), 0, x_view.extent(0)), 35 | KOKKOS_LAMBDA (const std::size_t i, Scalar & update) { 36 | const typename IPT::mag_type tmp = IPT::norm(x_view(i)); 37 | update += tmp*tmp; 38 | }, result); 39 | 40 | // fence not needed because reducing into result 41 | 42 | return Kokkos::Details::ArithTraits::sqrt(result + init); 43 | } 44 | 45 | } 46 | #endif 47 | -------------------------------------------------------------------------------- /tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/blas1_vector_sum_of_squares_kk.hpp: -------------------------------------------------------------------------------- 1 | 2 | #ifndef LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL_P1673_BITS_KOKKOSKERNELS_VEC_SUM_OF_SQUARES_HPP_ 3 | #define LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL_P1673_BITS_KOKKOSKERNELS_VEC_SUM_OF_SQUARES_HPP_ 4 | 5 | #include "signal_kokkos_impl_called.hpp" 6 | 7 | namespace KokkosKernelsSTD { 8 | 9 | template::size_type ext0, 12 | class Layout, 13 | class Scalar> 14 | std::experimental::linalg::sum_of_squares_result 15 | vector_sum_of_squares(kokkos_exec /*kexe*/, 16 | std::experimental::mdspan< 17 | ElementType, 18 | std::experimental::extents, 19 | Layout, 20 | std::experimental::default_accessor> x, 21 | std::experimental::linalg::sum_of_squares_result init) 22 | { 23 | 24 | Impl::signal_kokkos_impl_called("vector_sum_of_squares"); 25 | 26 | auto x_view = Impl::mdspan_to_view(x); 27 | std::experimental::linalg::sum_of_squares_result result; 28 | 29 | using arithm_traits = Kokkos::Details::ArithTraits; 30 | 31 | Scalar scaling_factor = {}; 32 | Kokkos::Max max_reducer(scaling_factor); 33 | Kokkos::parallel_reduce( Kokkos::RangePolicy(ExecSpace(), 0, x_view.extent(0)), 34 | KOKKOS_LAMBDA (const std::size_t i, Scalar & lmax){ 35 | const auto val = arithm_traits::abs(x_view(i)); 36 | max_reducer.join(lmax, val); 37 | }, 38 | max_reducer); 39 | // no fence needed since reducing into scalar 40 | result.scaling_factor = std::max(scaling_factor, init.scaling_factor); 41 | 42 | Scalar ssq = {}; 43 | Kokkos::parallel_reduce(Kokkos::RangePolicy(ExecSpace(), 0, x_view.extent(0)), 44 | KOKKOS_LAMBDA (const std::size_t i, Scalar & update){ 45 | const auto tmp = arithm_traits::abs(x_view(i))/result.scaling_factor; 46 | update += tmp*tmp; 47 | }, ssq); 48 | // no fence needed since reducing into scalar 49 | 50 | result.scaled_sum_of_squares = ssq 51 | + (init.scaling_factor*init.scaling_factor*init.scaled_sum_of_squares)/(scaling_factor*scaling_factor); 52 | 53 | return result; 54 | } 55 | 56 | } 57 | #endif 58 | -------------------------------------------------------------------------------- /tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/exec_policy_wrapper_kk.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __LINALG_KOKKOSKERNELS_EXEC_POLICY_WRAPPER_KK_HPP_ 2 | #define __LINALG_KOKKOSKERNELS_EXEC_POLICY_WRAPPER_KK_HPP_ 3 | #include 4 | #include 5 | namespace KokkosKernelsSTD { 6 | 7 | template 8 | struct kokkos_exec { 9 | }; 10 | 11 | template 12 | auto execpolicy_mapper(kokkos_exec) { return kokkos_exec(); } 13 | } // namespace KokkosKernelsSTD 14 | 15 | // Remap standard execution policies to Kokkos 16 | #ifdef LINALG_ENABLE_KOKKOS_DEFAULT 17 | namespace std { 18 | namespace experimental { 19 | inline namespace __p1673_version_0 { 20 | namespace linalg { 21 | auto execpolicy_mapper(std::experimental::linalg::impl::default_exec_t) { return KokkosKernelsSTD::kokkos_exec<>(); } 22 | auto execpolicy_mapper(std::execution::parallel_policy) { return KokkosKernelsSTD::kokkos_exec<>(); } 23 | auto execpolicy_mapper(std::execution::parallel_unsequenced_policy) { return KokkosKernelsSTD::kokkos_exec<>(); } 24 | } 25 | } 26 | } 27 | } 28 | #endif 29 | #endif 30 | -------------------------------------------------------------------------------- /tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/kokkos_conjugate.hpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | #ifndef LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_CONJUGATE_IF_NEEDED_HPP_ 19 | #define LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_CONJUGATE_IF_NEEDED_HPP_ 20 | 21 | #include "experimental/__p1673_bits/conj_if_needed.hpp" 22 | 23 | namespace std { 24 | namespace experimental { 25 | namespace linalg { 26 | namespace impl{ 27 | 28 | // conj_if_needed doesn't use an is_complex trait. 29 | // Instead, it checks whether conj(x) (namespace-unqualified) is a valid expression, 30 | // calls that if so, else assumes that x represents a real number and returns x. 31 | // Thus, we don't actually need to do anything here. 32 | 33 | } // end namespace impl 34 | } // end namespace linalg 35 | } // end namespace experimental 36 | } // end namespace std 37 | 38 | #endif //LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_CONJUGATE_IF_NEEDED_HPP_ 39 | -------------------------------------------------------------------------------- /tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/parallel_matrix.hpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | #ifndef LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_PARALLEL_MATRIX_HPP_ 19 | #define LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_PARALLEL_MATRIX_HPP_ 20 | 21 | namespace KokkosKernelsSTD { 22 | namespace Impl { 23 | 24 | // manages parallel execution of independent action 25 | // called like action(i, j) for each matrix element A(i, j) 26 | template 27 | class ParallelMatrixVisitor { 28 | public: 29 | KOKKOS_INLINE_FUNCTION ParallelMatrixVisitor(ExecSpace &&exec_in, MatrixType A_in): 30 | exec(exec_in), A(A_in), ext0(A.extent(0)), ext1(A.extent(1)) 31 | {} 32 | 33 | template 34 | KOKKOS_INLINE_FUNCTION 35 | void for_each_matrix_element(ActionType action) { 36 | if (ext0 > ext1) { // parallel rows 37 | Kokkos::parallel_for(Kokkos::RangePolicy(exec, 0, ext0), 38 | KOKKOS_LAMBDA(const auto i) { 39 | using idx_type = std::remove_const_t; 40 | for (idx_type j = 0; j < ext1; ++j) { 41 | action(i, j); 42 | } 43 | }); 44 | } else { // parallel columns 45 | Kokkos::parallel_for(Kokkos::RangePolicy(exec, 0, ext1), 46 | KOKKOS_LAMBDA(const auto j) { 47 | using idx_type = std::remove_const_t; 48 | for (idx_type i = 0; i < ext0; ++i) { 49 | action(i, j); 50 | } 51 | }); 52 | } 53 | exec.fence(); 54 | } 55 | 56 | template 57 | void for_each_triangle_matrix_element(std::experimental::linalg::upper_triangle_t t, ActionType action) { 58 | Kokkos::parallel_for(Kokkos::RangePolicy(exec, 0, ext1), 59 | KOKKOS_LAMBDA(const auto j) { 60 | using idx_type = std::remove_const_t; 61 | for (idx_type i = 0; i <= j; ++i) { 62 | action(i, j); 63 | } 64 | }); 65 | exec.fence(); 66 | } 67 | 68 | template 69 | void for_each_triangle_matrix_element(std::experimental::linalg::lower_triangle_t t, ActionType action) { 70 | for_each_triangle_matrix_element(std::experimental::linalg::upper_triangle, 71 | [action](const auto i, const auto j) { 72 | action(j, i); 73 | }); 74 | } 75 | 76 | private: 77 | ExecSpace exec; 78 | MatrixType A; 79 | size_t ext0; 80 | size_t ext1; 81 | }; 82 | 83 | } // namespace Impl 84 | } // namespace KokkosKernelsSTD 85 | #endif 86 | -------------------------------------------------------------------------------- /tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/signal_kokkos_impl_called.hpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | #ifndef LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_UTILS_HPP_ 19 | #define LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_UTILS_HPP_ 20 | 21 | #include 22 | 23 | namespace KokkosKernelsSTD { 24 | namespace Impl { 25 | 26 | #if defined(KOKKOS_STDBLAS_ENABLE_TESTS) 27 | extern void signal_kokkos_impl_called(std::string_view functionName); 28 | #else 29 | void signal_kokkos_impl_called(std::string_view /* functionName */) {} 30 | #endif 31 | 32 | } // namespace Impl 33 | } // namespace KokkosKernelsSTD 34 | #endif 35 | -------------------------------------------------------------------------------- /tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/static_extent_match.hpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | #ifndef LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_STATICEXTMATCH_HPP_ 19 | #define LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_STATICEXTMATCH_HPP_ 20 | 21 | namespace KokkosKernelsSTD { 22 | namespace Impl { 23 | 24 | template 25 | constexpr bool static_extent_match(size_type extent1, size_type extent2) 26 | { 27 | return extent1 == std::experimental::dynamic_extent || 28 | extent2 == std::experimental::dynamic_extent || 29 | extent1 == extent2; 30 | } 31 | 32 | } // namespace Impl 33 | } // namespace KokkosKernelsSTD 34 | #endif 35 | -------------------------------------------------------------------------------- /tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/triangle.hpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // ************************************************************************ 3 | // 4 | // Kokkos v. 4.0 5 | // Copyright (2022) National Technology & Engineering 6 | // Solutions of Sandia, LLC (NTESS). 7 | // 8 | // Under the terms of Contract DE-NA0003525 with NTESS, 9 | // the U.S. Government retains certain rights in this software. 10 | // 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. 12 | // See https://kokkos.org/LICENSE for license information. 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 | // 15 | // ************************************************************************ 16 | //@HEADER 17 | 18 | #ifndef LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_TRIANGLE_UTILS_HPP_ 19 | #define LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_TRIANGLE_UTILS_HPP_ 20 | 21 | namespace KokkosKernelsSTD { 22 | namespace Impl { 23 | 24 | // Note: phrase it simply and the same as in specification ("has unique layout") 25 | template ::size_type numRows, 27 | std::experimental::extents<>::size_type numCols> 28 | constexpr bool is_unique_layout_v = Layout::template mapping< 29 | std::experimental::extents >::is_always_unique(); 30 | 31 | template 32 | struct is_layout_blas_packed: public std::false_type {}; 33 | 34 | template 35 | struct is_layout_blas_packed< 36 | std::experimental::linalg::layout_blas_packed>: 37 | public std::true_type {}; 38 | 39 | template 40 | constexpr bool is_layout_blas_packed_v = is_layout_blas_packed::value; 41 | 42 | // Note: will only signal failure for layout_blas_packed with different triangle 43 | template 44 | struct triangle_layout_match: public std::true_type {}; 45 | 46 | template 47 | struct triangle_layout_match< 48 | std::experimental::linalg::layout_blas_packed, 49 | Triangle2> 50 | { 51 | static constexpr bool value = std::is_same_v; 52 | }; 53 | 54 | template 55 | constexpr bool triangle_layout_match_v = triangle_layout_match::value; 56 | 57 | } // namespace Impl 58 | } // namespace KokkosKernelsSTD 59 | #endif 60 | -------------------------------------------------------------------------------- /tpl-implementations/include/experimental/linalg_kokkoskernels: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include "__p1673_bits/kokkos-kernels/mdspan_to_view_mapper_kk.hpp" 5 | #include "__p1673_bits/kokkos-kernels/kokkos_conjugate.hpp" 6 | 7 | // blas1 (according to P1673) 8 | #include "__p1673_bits/kokkos-kernels/blas1_dot_kk.hpp" 9 | #include "__p1673_bits/kokkos-kernels/blas1_add_kk.hpp" 10 | #include "__p1673_bits/kokkos-kernels/blas1_scale_kk.hpp" 11 | #include "__p1673_bits/kokkos-kernels/blas1_idx_abs_max_kk.hpp" 12 | #include "__p1673_bits/kokkos-kernels/blas1_vector_norm2_kk.hpp" 13 | #include "__p1673_bits/kokkos-kernels/blas1_vector_abs_sum_kk.hpp" 14 | #include "__p1673_bits/kokkos-kernels/blas1_vector_sum_of_squares_kk.hpp" 15 | #include "__p1673_bits/kokkos-kernels/blas1_matrix_frob_norm_kk.hpp" 16 | #include "__p1673_bits/kokkos-kernels/blas1_matrix_inf_norm_kk.hpp" 17 | #include "__p1673_bits/kokkos-kernels/blas1_matrix_one_norm_kk.hpp" 18 | #include "__p1673_bits/kokkos-kernels/blas1_swap_elements_kk.hpp" 19 | #include "__p1673_bits/kokkos-kernels/blas1_copy_kk.hpp" 20 | 21 | // blas2 (according to P1673) 22 | #include "__p1673_bits/kokkos-kernels/blas2_matrix_rank_1_update.hpp" 23 | #include "__p1673_bits/kokkos-kernels/blas2_matrix_rank_2_update.hpp" 24 | #include "__p1673_bits/kokkos-kernels/blas2_gemv_kk.hpp" 25 | #include "__p1673_bits/kokkos-kernels/blas2_symv_kk.hpp" 26 | #include "__p1673_bits/kokkos-kernels/blas2_hemv_kk.hpp" 27 | #include "__p1673_bits/kokkos-kernels/blas2_triangular_mat_vec_product.hpp" 28 | 29 | // blas3 (according to P1673) 30 | #include "__p1673_bits/kokkos-kernels/blas3_overwriting_gemm_kk.hpp" 31 | #include "__p1673_bits/kokkos-kernels/blas3_matrix_rank_k_update.hpp" 32 | #include "__p1673_bits/kokkos-kernels/blas3_matrix_rank_2k_update.hpp" 33 | #include "__p1673_bits/kokkos-kernels/blas3_matrix_product_kk.hpp" 34 | #include "__p1673_bits/kokkos-kernels/blas3_triangular_matrix_matrix_solve.hpp" 35 | --------------------------------------------------------------------------------