├── .github
    ├── dependabot.yml
    └── workflows
    │   ├── cmake.yml
    │   └── macos.yml
├── CMakeLists.txt
├── LICENSE
├── LICENSE_FILE_HEADER
├── README.md
├── cmake
    └── LinAlgConfig.cmake.in
├── compilation_tests
    ├── CMakeLists.txt
    └── ctest_common.hpp
├── do-configure
├── examples
    ├── 01_scale.cpp
    ├── 02_matrix_vector_product_basic.cpp
    ├── 03_matrix_vector_product_mixedprec.cpp
    ├── CMakeLists.txt
    └── kokkos-based
    │   ├── CMakeLists.txt
    │   ├── add_kokkos.cpp
    │   ├── dot_kokkos.cpp
    │   ├── dotc_kokkos.cpp
    │   ├── idx_abs_max_kokkos.cpp
    │   ├── matrix_vector_product_kokkos.cpp
    │   ├── scale_kokkos.cpp
    │   ├── vector_abs_sum_kokkos.cpp
    │   ├── vector_norm2_kokkos.cpp
    │   └── vector_sum_of_squares_kokkos.cpp
├── include
    └── experimental
    │   ├── __p1673_bits
    │       ├── abs_if_needed.hpp
    │       ├── blas1_dot.hpp
    │       ├── blas1_givens.hpp
    │       ├── blas1_linalg_add.hpp
    │       ├── blas1_linalg_copy.hpp
    │       ├── blas1_linalg_swap.hpp
    │       ├── blas1_matrix_frob_norm.hpp
    │       ├── blas1_matrix_inf_norm.hpp
    │       ├── blas1_matrix_one_norm.hpp
    │       ├── blas1_scale.hpp
    │       ├── blas1_vector_abs_sum.hpp
    │       ├── blas1_vector_idx_abs_max.hpp
    │       ├── blas1_vector_norm2.hpp
    │       ├── blas1_vector_sum_of_squares.hpp
    │       ├── blas2_matrix_rank_1_update.hpp
    │       ├── blas2_matrix_rank_2_update.hpp
    │       ├── blas2_matrix_vector_product.hpp
    │       ├── blas2_matrix_vector_solve.hpp
    │       ├── blas3_matrix_product.hpp
    │       ├── blas3_matrix_rank_2k_update.hpp
    │       ├── blas3_matrix_rank_k_update.hpp
    │       ├── blas3_triangular_matrix_matrix_solve.hpp
    │       ├── conj_if_needed.hpp
    │       ├── conjugate_transposed.hpp
    │       ├── conjugated.hpp
    │       ├── imag_if_needed.hpp
    │       ├── layout_tags.hpp
    │       ├── layout_triangle.hpp
    │       ├── linalg_config.h.in
    │       ├── linalg_execpolicy_mapper.hpp
    │       ├── macros.hpp
    │       ├── maybe_static_size.hpp
    │       ├── packed_layout.hpp
    │       ├── proxy_reference.hpp
    │       ├── real_if_needed.hpp
    │       ├── scaled.hpp
    │       └── transposed.hpp
    │   └── linalg
├── lewg-presentation.md
├── make_single_header.py
├── tests
    ├── CMakeLists.txt
    ├── kokkos-based
    │   ├── CMakeLists.txt
    │   ├── add_kokkos.cpp
    │   ├── copy_kokkos.cpp
    │   ├── dot_kokkos.cpp
    │   ├── dotc_kokkos.cpp
    │   ├── gemm_C_AB.cpp
    │   ├── gemm_C_ABT.cpp
    │   ├── gemm_C_ATB.cpp
    │   ├── gtest_fixtures.hpp
    │   ├── gtest_main_kokkos.cpp
    │   ├── helpers.hpp
    │   ├── hermitian_matrix_left_product_kokkos.cpp
    │   ├── hermitian_matrix_rank1_update_kokkos.cpp
    │   ├── hermitian_matrix_rank2_update_kokkos.cpp
    │   ├── hermitian_matrix_rank_2k_update_kokkos.cpp
    │   ├── hermitian_matrix_rank_k_update_kokkos.cpp
    │   ├── hermitian_matrix_right_product_kokkos.cpp
    │   ├── idx_abs_max_kokkos.cpp
    │   ├── matrix_frob_norm_kokkos.cpp
    │   ├── matrix_inf_norm_kokkos.cpp
    │   ├── matrix_one_norm_kokkos.cpp
    │   ├── matrix_rank1_update_kokkos.cpp
    │   ├── mdspan_to_view.cpp
    │   ├── overwriting_hermitian_matrix_vector_product.cpp
    │   ├── overwriting_matrix_vector_product.cpp
    │   ├── overwriting_symmetric_matrix_vector_product.cpp
    │   ├── overwriting_triangular_matrix_vector_product.cpp
    │   ├── scale_rank1_kokkos.cpp
    │   ├── scale_rank2_kokkos.cpp
    │   ├── swap_elements_rank1_kokkos.cpp
    │   ├── swap_elements_rank2_kokkos.cpp
    │   ├── symmetric_matrix_left_product_kokkos.cpp
    │   ├── symmetric_matrix_rank1_update_kokkos.cpp
    │   ├── symmetric_matrix_rank2_update_kokkos.cpp
    │   ├── symmetric_matrix_rank_2k_update_kokkos.cpp
    │   ├── symmetric_matrix_rank_k_update_kokkos.cpp
    │   ├── symmetric_matrix_right_product_kokkos.cpp
    │   ├── test.cmake
    │   ├── triangular_matrix_left_product_kokkos.cpp
    │   ├── triangular_matrix_matrix_left_solve.cpp
    │   ├── triangular_matrix_matrix_right_solve.cpp
    │   ├── triangular_matrix_right_product_kokkos.cpp
    │   ├── triangular_matrix_vector_solve.cpp
    │   ├── updating_hermitian_matrix_vector_product.cpp
    │   ├── updating_matrix_vector_product.cpp
    │   ├── updating_symmetric_matrix_vector_product.cpp
    │   ├── updating_triangular_matrix_vector_product.cpp
    │   ├── vector_abs_sum_kokkos.cpp
    │   ├── vector_norm2_kokkos.cpp
    │   └── vector_sum_of_squares_kokkos.cpp
    └── native
    │   ├── CMakeLists.txt
    │   ├── abs_if_needed.cpp
    │   ├── abs_sum.cpp
    │   ├── add.cpp
    │   ├── conj_if_needed.cpp
    │   ├── conjugate_transposed.cpp
    │   ├── conjugated.cpp
    │   ├── copy.cpp
    │   ├── dot.cpp
    │   ├── gemm.cpp
    │   ├── gemv.cpp
    │   ├── gemv_no_ambig.cpp
    │   ├── ger.cpp
    │   ├── gerc.cpp
    │   ├── givens.cpp
    │   ├── gtest_fixtures.hpp
    │   ├── hemm.cpp
    │   ├── her.cpp
    │   ├── her2.cpp
    │   ├── her2k.cpp
    │   ├── herk.cpp
    │   ├── idx_abs_max.cpp
    │   ├── imag_if_needed.cpp
    │   ├── matrix_inf_norm.cpp
    │   ├── matrix_one_norm.cpp
    │   ├── mixed_accessors.cpp
    │   ├── my_numbers.hpp
    │   ├── norm2.cpp
    │   ├── proxy_refs.cpp
    │   ├── real_if_needed.cpp
    │   ├── scale.cpp
    │   ├── scaled.cpp
    │   ├── swap.cpp
    │   ├── symm.cpp
    │   ├── syr.cpp
    │   ├── syr2.cpp
    │   ├── syr2k.cpp
    │   ├── syrk.cpp
    │   ├── transposed.cpp
    │   ├── trmm.cpp
    │   ├── trmv.cpp
    │   └── trsm.cpp
└── tpl-implementations
    └── include
        └── experimental
            ├── __p1673_bits
                └── kokkos-kernels
                │   ├── blas1_add_kk.hpp
                │   ├── blas1_copy_kk.hpp
                │   ├── blas1_dot_kk.hpp
                │   ├── blas1_idx_abs_max_kk.hpp
                │   ├── blas1_matrix_frob_norm_kk.hpp
                │   ├── blas1_matrix_inf_norm_kk.hpp
                │   ├── blas1_matrix_one_norm_kk.hpp
                │   ├── blas1_scale_kk.hpp
                │   ├── blas1_swap_elements_kk.hpp
                │   ├── blas1_vector_abs_sum_kk.hpp
                │   ├── blas1_vector_norm2_kk.hpp
                │   ├── blas1_vector_sum_of_squares_kk.hpp
                │   ├── blas2_gemv_kk.hpp
                │   ├── blas2_hemv_kk.hpp
                │   ├── blas2_matrix_rank_1_update.hpp
                │   ├── blas2_matrix_rank_2_update.hpp
                │   ├── blas2_symv_kk.hpp
                │   ├── blas2_triangular_mat_vec_product.hpp
                │   ├── blas2_triangular_matrix_vector_solve.hpp
                │   ├── blas3_matrix_product_kk.hpp
                │   ├── blas3_matrix_rank_2k_update.hpp
                │   ├── blas3_matrix_rank_k_update.hpp
                │   ├── blas3_overwriting_gemm_kk.hpp
                │   ├── blas3_triangular_matrix_matrix_solve.hpp
                │   ├── exec_policy_wrapper_kk.hpp
                │   ├── kokkos_conjugate.hpp
                │   ├── mdspan_to_view_mapper_kk.hpp
                │   ├── parallel_matrix.hpp
                │   ├── signal_kokkos_impl_called.hpp
                │   ├── static_extent_match.hpp
                │   └── triangle.hpp
            └── linalg_kokkoskernels


/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 |   - package-ecosystem: "github-actions"
4 |     directory: /
5 |     schedule:
6 |       interval: "weekly"
7 | 


--------------------------------------------------------------------------------
/.github/workflows/cmake.yml:
--------------------------------------------------------------------------------
  1 | name: CMake
  2 | 
  3 | on: [push, pull_request]
  4 | 
  5 | env:
  6 |   # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)
  7 |   BUILD_TYPE: RelWithDebInfo
  8 | 
  9 | jobs:
 10 |   build-mdspan:
 11 |     runs-on: ubuntu-latest
 12 |     strategy:
 13 |       fail-fast: false
 14 |       matrix:
 15 |         include:
 16 |         - compiler_driver: g++
 17 |           compiler_prefix: /usr/bin
 18 |     steps:
 19 |     - name: Create Build Environment
 20 |       run: cmake -E make_directory mdspan-build
 21 |       
 22 |     - name: Check Out
 23 |       uses: actions/checkout@v4
 24 |       with:
 25 |         repository: kokkos/mdspan
 26 |         path: mdspan-src
 27 |       
 28 |     - name: Configure CMake
 29 |       working-directory: mdspan-build
 30 |       run: cmake $GITHUB_WORKSPACE/mdspan-src -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DCMAKE_INSTALL_PREFIX=$GITHUB_WORKSPACE/mdspan-install
 31 |       
 32 |     - name: Build
 33 |       working-directory: mdspan-build
 34 |       run: make
 35 |       
 36 |     - name: Install
 37 |       working-directory: mdspan-build
 38 |       run: make install
 39 |       
 40 |     - name: Upload
 41 |       uses: actions/upload-artifact@v4
 42 |       with:
 43 |         name: mdspan
 44 |         path: mdspan-install
 45 | 
 46 |   build-stdblas:
 47 |     runs-on: ubuntu-latest
 48 |     needs: build-mdspan
 49 |     
 50 |     steps:
 51 |     - name: Download mdspan
 52 |       uses: actions/download-artifact@v4
 53 |       with:
 54 |         name: mdspan
 55 |         path: mdspan-install 
 56 |         
 57 |     - name: Create Build Environment
 58 |       run: cmake -E make_directory stdblas-build
 59 |         
 60 |     - name: Check Out
 61 |       uses: actions/checkout@v4
 62 |       with:
 63 |         path: stdblas-src
 64 |         
 65 |     - name: Configure CMake
 66 |       shell: bash
 67 |       working-directory: stdblas-build
 68 |       run: cmake $GITHUB_WORKSPACE/stdblas-src -Dmdspan_DIR=$GITHUB_WORKSPACE/mdspan-install/lib/cmake/mdspan -DLINALG_ENABLE_TESTS=On -DLINALG_ENABLE_EXAMPLES=On -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DCMAKE_INSTALL_PREFIX=$GITHUB_WORKSPACE/stdblas-install
 69 | 
 70 |     - name: Build
 71 |       working-directory: stdblas-build
 72 |       shell: bash
 73 |       run: make
 74 |       
 75 |     - name: Tar files
 76 |       shell: bash
 77 |       run: tar -cvf stdblas.tar *
 78 | 
 79 |     - name: Upload workspace
 80 |       uses: actions/upload-artifact@v4
 81 |       with:
 82 |         name: stdblas
 83 |         path: stdblas.tar
 84 |         
 85 |   test-stdBLAS:
 86 |     runs-on: ubuntu-latest
 87 |     needs: build-stdblas
 88 |     
 89 |     steps:
 90 |     
 91 |     - name: Download workspace
 92 |       uses: actions/download-artifact@v4
 93 |       with:
 94 |         name: stdblas
 95 |         path: .
 96 |         
 97 |     - name: Untar files
 98 |       shell: bash
 99 |       run: tar -xvf stdblas.tar
100 |         
101 |     - name: Test
102 |       working-directory: stdblas-build
103 |       shell: bash
104 |       run: ctest --output-on-failure
105 |         
106 |     - name: Install
107 |       working-directory: stdblas-build
108 |       shell: bash
109 |       run: make install
110 | 


--------------------------------------------------------------------------------
/.github/workflows/macos.yml:
--------------------------------------------------------------------------------
 1 | name: MacOS
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | env:
 6 |   # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)
 7 |   BUILD_TYPE: RelWithDebInfo
 8 | 
 9 | jobs:
10 |   osx-ci:
11 |     runs-on: [macos-latest]
12 | 
13 |     steps:
14 |     - name: Check Out mdspan
15 |       uses: actions/checkout@v4
16 |       with:
17 |         repository: kokkos/mdspan
18 |         path: mdspan-src
19 | 
20 |     - name: create directories
21 |       run: cmake -E make_directory mdspan-build stdblas-build
22 | 
23 |     - name: Configure mdspan
24 |       working-directory: mdspan-build
25 |       run: cmake -S $GITHUB_WORKSPACE/mdspan-src -B $GITHUB_WORKSPACE/mdspan-build -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DCMAKE_INSTALL_PREFIX=$GITHUB_WORKSPACE/mdspan-install
26 | 
27 |     - name: Build mdspan
28 |       working-directory: mdspan-build
29 |       run: cmake --build $GITHUB_WORKSPACE/mdspan-build -j 3
30 | 
31 |     - name: Install mdspan
32 |       working-directory: mdspan-build
33 |       run: cmake --install $GITHUB_WORKSPACE/mdspan-build
34 | 
35 |     - name: Check Out
36 |       uses: actions/checkout@v4
37 |       with:
38 |         path: stdblas-src
39 |   
40 |     - name: Configure stdblas
41 |       shell: bash
42 |       working-directory: stdblas-build
43 |       run: cmake -S $GITHUB_WORKSPACE/stdblas-src -B $GITHUB_WORKSPACE/stdblas-build -Dmdspan_ROOT=$GITHUB_WORKSPACE/mdspan-install/ -DLINALG_ENABLE_TESTS=On -DLINALG_ENABLE_EXAMPLES=On -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DCMAKE_INSTALL_PREFIX=$GITHUB_WORKSPACE/stdblas-install
44 | 
45 |     - name: Build stdblas
46 |       working-directory: stdblas-build
47 |       shell: bash
48 |       run: cmake --build $GITHUB_WORKSPACE/stdblas-build -j 3
49 | 
50 |     - name: Test stdblas
51 |       working-directory: stdblas-build
52 |       shell: bash
53 |       run: ctest --output-on-failure
54 |         
55 |     - name: Install stdblas
56 |       working-directory: stdblas-build
57 |       shell: bash
58 |       run: cmake --install $GITHUB_WORKSPACE/stdblas-build
59 | 


--------------------------------------------------------------------------------
/LICENSE_FILE_HEADER:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | //@HEADER
16 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # P1673 reference implementation
 2 | 
 3 | This is a reference implementation of P1673,
 4 | "A free function linear algebra interface based on the BLAS."
 5 | You can find the latest submitted revision of P1673
 6 | [at this URL](https://wg21.link/p1673).
 7 | 
 8 | ## Requirements
 9 | 
10 |   - CMake >= 3.17 (earlier versions may work, but are not tested)
11 |   - C++ build environment that supports C++17 or greater
12 | 
13 | ## Tested compilers
14 | 
15 | We run github's automated tests on every pull request.
16 | Automated tests use "ubuntu-latest",
17 | which presumably defaults to a fairly new GCC.
18 | Other compilers, including MSVC 2019, have been tested in the past.
19 | 
20 | ## Brief build instructions
21 | 
22 | 1. Download and install googletest (GTest)
23 |    - https://github.com/google/googletest
24 | 2. Download and install mdspan:
25 |    - git@github.com:kokkos/mdspan.git
26 | 3. Run CMake, pointing it to your googletest and mdspan install locations
27 |    - If you want to build tests, set LINALG_ENABLE_TESTS=ON
28 |    - If you want to build examples, set LINALG_ENABLE_EXAMPLES=ON
29 |    - If you have a BLAS installation, set LINALG_ENABLE_BLAS=ON.
30 |      BLAS support is currently experimental.
31 |    - If you have a TBB (Threading Building Blocks) installation
32 |      and want to use TBB, set LINALG_ENABLE_TBB=ON (and optionally
33 |      set TBB_DIR to the lib/cmake/TBB subdirectory of your TBB installation,
34 |      or wherever the TBBConfig.cmake file happens to live).
35 |      TBB support is currently experimental.
36 | 4. Build and install as usual
37 | 5. If you enabled tests, use "ctest" to run them
38 | 
39 | ## More detailed MSVC build instructions
40 | 
41 | Be sure to build mdspan and googletest in the Release configuration before installing.
42 | 
43 | The following CMake options are known to work:
44 | 
45 | - mdspan_DIR=${MDSPAN_INSTALL_DIR}\lib\cmake\mdspan
46 |   (where MDSPAN_INSTALL_DIR is the path to your mdspan installation)
47 | - GTEST_INCLUDE_DIR=${GTEST_INSTALL_DIR}\include
48 |   (where GTEST_INSTALL_DIR is the path to your googletest installation)
49 | - GTEST_LIBRARY=${GTEST_INSTALL_DIR}\lib\gtest.lib
50 | - GTEST_MAIN_LIBRARY=${GTEST_INSTALL_DIR}\lib\gtest_main.lib
51 | 
52 | When building tests, for all CMAKE_CXX_FLAGS_* options,
53 | you might need to change "/MD" to "/MT", depending on how googletest was built.
54 | 


--------------------------------------------------------------------------------
/cmake/LinAlgConfig.cmake.in:
--------------------------------------------------------------------------------
1 | @PACKAGE_INIT@
2 | 
3 | include("${CMAKE_CURRENT_LIST_DIR}/linalgTargets.cmake")
4 | 


--------------------------------------------------------------------------------
/compilation_tests/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | 
2 | macro(add_compilation_test name)
3 |   add_executable(${name} ${name}.cpp)
4 |   target_link_libraries(${name} linalg)
5 | endmacro()
6 | 
7 | #add_compilation_test(ctest_thingy)
8 | 
9 | 


--------------------------------------------------------------------------------
/compilation_tests/ctest_common.hpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | #include <mdspan/mdspan.hpp>
19 | 
20 | #include <type_traits>
21 | 
22 | #pragma once
23 | 
24 | #define MDSPAN_STATIC_TEST(...) \
25 |   static_assert(__VA_ARGS__, "MDSpan compile time test failed at "  __FILE__ ":" MDSPAN_PP_STRINGIFY(__LINE__))
26 | 
27 | 
28 | // All tests need a main so that they'll link
29 | int main() { }
30 | 


--------------------------------------------------------------------------------
/do-configure:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Requirements:
 4 | #
 5 | #   - CMake >= 3.12
 6 | #   - C++14 or greater compiler (requirement of mdspan)
 7 | 
 8 | # Build instructions
 9 | #
10 | # 1. Download and install googletest 
11 | #    - https://github.com/google/googletest
12 | #    - (master appears to work)
13 | # 2. Download and install mdspan:
14 | #    - git@github.com:kokkos/mdspan.git
15 | 
16 | # Set this to the path of your stdBLAS source directory.
17 | SRC_DIR=$HOME/Documents/Code/CPP/src/stdBLAS
18 | 
19 | # Set this to the root directory of the place where you
20 | # installed googletest and mdspan.
21 | INSTALL_ROOT=$HOME/Documents/Code/CPP/install
22 | 
23 | cmake \
24 | -D CMAKE_INSTALL_PREFIX=${INSTALL_ROOT}/stdBLAS \
25 | -D CMAKE_PREFIX_PATH="${INSTALL_ROOT}/mdspan;${INSTALL_ROOT}/googletest" \
26 | -D LINALG_ENABLE_TESTS:BOOL=ON \
27 | -D LINALG_ENABLE_EXAMPLES:BOOL=ON \
28 | ${SRC_DIR}
29 | 
30 | 


--------------------------------------------------------------------------------
/examples/01_scale.cpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | // Examples currently use parentheses (e.g., A(i,j))
19 | // for the array access operator,
20 | // instead of square brackets (e.g., A[i,j]).
21 | // This must be defined before including any mdspan headers.
22 | #define MDSPAN_USE_PAREN_OPERATOR 1
23 | 
24 | #include <mdspan/mdspan.hpp>
25 | #include "experimental/__p2630_bits/submdspan.hpp"
26 | #include <experimental/linalg>
27 | #include <iostream>
28 | #include <vector>
29 | 
30 | #ifdef LINALG_HAS_EXECUTION
31 | #  include <execution>
32 | #endif
33 | 
34 | namespace MdSpan = MDSPAN_IMPL_STANDARD_NAMESPACE;
35 | namespace LinearAlgebra = MDSPAN_IMPL_STANDARD_NAMESPACE :: MDSPAN_IMPL_PROPOSED_NAMESPACE :: linalg;
36 | 
37 | using MdSpan::mdspan;
38 | using MdSpan::extents;
39 | #if defined(__cpp_lib_span)
40 | #include <span>
41 |   using std::dynamic_extent;
42 | #else
43 |   using MdSpan::dynamic_extent;
44 | #endif
45 | 
46 | int main(int argc, char* argv[]) {
47 |   std::cout << "Scale" << std::endl;
48 |   int N = 40;
49 |   {
50 |     // Create Data
51 |     std::vector<double> x_vec(N);
52 | 
53 |     // Create and initialize mdspan
54 |     //
55 |     // With CTAD working we could do the following.
56 |     // GCC 11.1 works but some other compilers are buggy.
57 |     //
58 |     // mdspan x(x_vec.data(), N);
59 |     mdspan<double, extents<int, dynamic_extent>> x(x_vec.data(), N);
60 |     for (int i = 0; i < x.extent(0); ++i) {
61 |       x(i) = i;
62 |     }
63 | 
64 |     // Call linalg::scale x = 2.0*x;
65 |     LinearAlgebra::scale(2.0, x);
66 | #ifdef LINALG_HAS_EXECUTION
67 |     LinearAlgebra::scale(std::execution::par, 2.0, x);
68 | #else
69 |     LinearAlgebra::scale(2.0, x);
70 | #endif
71 | 
72 |     for (int i = 0; i < x.extent(0); i += 5) {
73 |       std::cout << i << " " << x(i) << std::endl;
74 |     }
75 |   }
76 | }
77 | 


--------------------------------------------------------------------------------
/examples/02_matrix_vector_product_basic.cpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | // Examples currently use parentheses (e.g., A(i,j))
19 | // for the array access operator,
20 | // instead of square brackets (e.g., A[i,j]).
21 | // This must be defined before including any mdspan headers.
22 | #define MDSPAN_USE_PAREN_OPERATOR 1
23 | 
24 | #include <mdspan/mdspan.hpp>
25 | #include <experimental/linalg>
26 | #include <iostream>
27 | #include <vector>
28 | 
29 | #ifdef LINALG_HAS_EXECUTION
30 | #  include <execution>
31 | #endif
32 | 
33 | namespace MdSpan = MDSPAN_IMPL_STANDARD_NAMESPACE;
34 | namespace LinearAlgebra = MDSPAN_IMPL_STANDARD_NAMESPACE :: MDSPAN_IMPL_PROPOSED_NAMESPACE :: linalg;
35 | 
36 | using MdSpan::mdspan;
37 | using MdSpan::extents;
38 | #if defined(__cpp_lib_span)
39 | #include <span>
40 |   using std::dynamic_extent;
41 | #else
42 |   using MdSpan::dynamic_extent;
43 | #endif
44 | 
45 | int main(int argc, char* argv[]) {
46 |   std::cout << "Matrix Vector Product Basic" << std::endl;
47 |   int N = 40, M = 20;
48 |   {
49 |     // Create Data
50 |     std::vector<double> A_vec(N*M);
51 |     std::vector<double> x_vec(M);
52 |     std::vector<double> y_vec(N);
53 | 
54 |     // Create and initialize mdspan
55 |     // Would look simple with CTAD, GCC 11.1 works but some others are buggy
56 |     mdspan<double, extents<int, dynamic_extent,dynamic_extent>> A(A_vec.data(), N, M);
57 |     mdspan<double, extents<int, dynamic_extent>> x(x_vec.data(), M);
58 |     mdspan<double, extents<int, dynamic_extent>> y(y_vec.data(), N);
59 |     for (int i = 0; i < A.extent(0); ++i) {
60 |       for (int j = 0; j < A.extent(1); ++j) {
61 |         A(i,j) = 100.0 * i + j;
62 |       }
63 |     }
64 |     for (int i = 0; i < x.extent(0); ++i) {
65 |       x(i) = 1.0 * i;
66 |     }
67 |     for (int i = 0; i < y.extent(0); ++i) {
68 |       y(i) = -1.0 * i;
69 |     }
70 | 
71 |     // y = A * x
72 |     LinearAlgebra::matrix_vector_product(A, x, y);
73 | 
74 |     // y = 0.5 * y + 2 * A * x
75 | #ifdef LINALG_HAS_EXECUTION
76 |     LinearAlgebra::matrix_vector_product(std::execution::par,
77 |       LinearAlgebra::scaled(2.0, A), x,
78 |       LinearAlgebra::scaled(0.5, y), y);
79 | #else
80 |     LinearAlgebra::matrix_vector_product(
81 |       LinearAlgebra::scaled(2.0, A), x,
82 |       LinearAlgebra::scaled(0.5, y), y);
83 | #endif
84 |     for (int i = 0; i < y.extent(0); i += 5) {
85 |       std::cout << i << " " << y(i) << std::endl;
86 |     }
87 |   }
88 | }
89 | 


--------------------------------------------------------------------------------
/examples/03_matrix_vector_product_mixedprec.cpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | // Examples currently use parentheses (e.g., A(i,j))
19 | // for the array access operator,
20 | // instead of square brackets (e.g., A[i,j]).
21 | // This must be defined before including any mdspan headers.
22 | #define MDSPAN_USE_PAREN_OPERATOR 1
23 | 
24 | #include <mdspan/mdspan.hpp>
25 | #include "experimental/__p2630_bits/submdspan.hpp"
26 | #include <experimental/linalg>
27 | #include <iostream>
28 | #include <vector>
29 | 
30 | namespace MdSpan = MDSPAN_IMPL_STANDARD_NAMESPACE;
31 | namespace LinearAlgebra = MDSPAN_IMPL_STANDARD_NAMESPACE :: MDSPAN_IMPL_PROPOSED_NAMESPACE :: linalg;
32 | 
33 | using MdSpan::mdspan;
34 | using MdSpan::extents;
35 | using MdSpan::full_extent;
36 | using MdSpan::submdspan;
37 | #if defined(__cpp_lib_span)
38 | #include <span>
39 |   using std::dynamic_extent;
40 | #else
41 |   using MdSpan::dynamic_extent;
42 | #endif
43 | 
44 | int main(int argc, char* argv[]) {
45 |   std::cout << "Matrix Vector Product MixedPrec" << std::endl;
46 |   int M = 40;
47 |   {
48 |     // Create Data
49 |     std::vector<float> A_vec(M * 8 * 4);
50 |     std::vector<double> x_vec(M * 4);
51 |     std::vector<double> y_vec(M * 8);
52 | 
53 |     // Create and initialize mdspan
54 |     mdspan<float, extents<int, dynamic_extent, 8, 4>> A(A_vec.data(), M);
55 |     mdspan<double, extents<int, 4, dynamic_extent>> x(x_vec.data(), M);
56 |     mdspan<double, extents<int, dynamic_extent, 8>> y(y_vec.data(), M);
57 |     for (int m = 0; m < A.extent(0); ++m) {
58 |       for (int i = 0; i < A.extent(1); ++i) {
59 |         for (int j = 0; j < A.extent(2); ++j) {
60 |           A(m,i,j) = 1000.0 * m + 100.0 * i + j;
61 |         }
62 |       }
63 |     }
64 |     for (int i = 0; i < x.extent(0); ++i) {
65 |       for (int m = 0; m < x.extent(1); ++m) {
66 |         x(i,m) = 33.0 * i + 0.33 * m;
67 |       }
68 |     }
69 |     for (int m = 0; m < y.extent(0); ++m) {
70 |       for (int i = 0; i < y.extent(1); ++i) {
71 |         y(m,i) = 33.0 * m + 0.33 * i;
72 |       }
73 |     }
74 | 
75 |     for (int m = 0; m < M; ++m) {
76 |       auto A_m = submdspan(A, m, full_extent, full_extent);
77 |       auto x_m = submdspan(x, full_extent, m);
78 |       auto y_m = submdspan(y, m, full_extent);
79 |       // y_m = A * x_m
80 |       LinearAlgebra::matrix_vector_product(A_m, x_m, y_m);
81 |     }
82 | 
83 |     for (int i = 0; i < y.extent(0); i += 5) {
84 |       std::cout << i << " " << y(i,1) << std::endl;
85 |     }
86 |   }
87 | }
88 | 


--------------------------------------------------------------------------------
/examples/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | function(linalg_add_example EXENAME)
 3 |   add_executable(${EXENAME} ${EXENAME}.cpp)
 4 |   target_link_libraries(${EXENAME} linalg)
 5 | endfunction(linalg_add_example)
 6 | 
 7 | linalg_add_example(01_scale)
 8 | linalg_add_example(02_matrix_vector_product_basic)
 9 | linalg_add_example(03_matrix_vector_product_mixedprec)
10 | 
11 | if(LINALG_ENABLE_KOKKOS)
12 |   add_subdirectory(kokkos-based)
13 | endif()
14 | 


--------------------------------------------------------------------------------
/examples/kokkos-based/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | linalg_add_example(add_kokkos)
 3 | linalg_add_example(dot_kokkos)
 4 | linalg_add_example(dotc_kokkos)
 5 | linalg_add_example(idx_abs_max_kokkos)
 6 | linalg_add_example(vector_norm2_kokkos)
 7 | linalg_add_example(vector_abs_sum_kokkos)
 8 | linalg_add_example(vector_sum_of_squares_kokkos)
 9 | linalg_add_example(scale_kokkos)
10 | linalg_add_example(matrix_vector_product_kokkos)
11 | 


--------------------------------------------------------------------------------
/examples/kokkos-based/add_kokkos.cpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | #include <experimental/linalg>
19 | #include <iostream>
20 | 
21 | template<class T1, class ScalarType>
22 | void print_elements(const T1 & v, const std::vector<ScalarType> & gold)
23 | {
24 |   for(std::size_t i=0; i<v.size(); i++){
25 |     std::cout << "computed = " << v(i)
26 | 	      << " , gold = "
27 | 	      << gold[i]
28 | 	      << "\n";
29 |   }
30 | }
31 | 
32 | int main(int argc, char* argv[])
33 | {
34 |   std::cout << "add example: calling kokkos-kernels" << std::endl;
35 | 
36 |   std::size_t N = 50;
37 |   Kokkos::initialize(argc,argv);
38 |   {
39 |     using value_type = double;
40 | 
41 |     Kokkos::View<value_type*> x_view("x",N);
42 |     Kokkos::View<value_type*> y_view("y",N);
43 |     Kokkos::View<value_type*> z_view("z",N);
44 | 
45 |     value_type* x_ptr = x_view.data();
46 |     value_type* y_ptr = y_view.data();
47 |     value_type* z_ptr = z_view.data();
48 | 
49 |     using dyn_1d_ext_type = std::experimental::extents<std::experimental::dynamic_extent>;
50 |     using mdspan_type  = std::experimental::mdspan<value_type, dyn_1d_ext_type>;
51 |     mdspan_type x(x_ptr,N);
52 |     mdspan_type y(y_ptr,N);
53 |     mdspan_type z(z_ptr,N);
54 | 
55 |     std::vector<value_type> gold(N);
56 |     for(std::size_t i=0; i<x.extent(0); i++){
57 |       x(i) = i;
58 |       y(i) = i + static_cast<value_type>(10);
59 |       z(i) = 0;
60 |       gold[i] = x(i) + y(i);
61 |     }
62 | 
63 |     namespace stdla = std::experimental::linalg;
64 |     const value_type init_value = 2.0;
65 | 
66 |     {
67 |       // This goes to the base implementation
68 |       stdla::add(std::execution::seq, x, y, z);
69 |     }
70 | 
71 |     {
72 |       // reset z since it is modified above
73 |       for(std::size_t i=0; i<z.extent(0); i++){ z(i) = 0; }
74 | 
75 |       // This forwards to KokkosKernels
76 |       stdla::add(KokkosKernelsSTD::kokkos_exec<>(), x,y,z);
77 |       print_elements(z, gold);
78 |     }
79 | 
80 |   }
81 |   Kokkos::finalize();
82 | }
83 | 


--------------------------------------------------------------------------------
/examples/kokkos-based/dot_kokkos.cpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | #include <experimental/linalg>
19 | #include <iostream>
20 | 
21 | int main(int argc, char* argv[])
22 | {
23 |   std::cout << "dot example: calling kokkos-kernels" << std::endl;
24 | 
25 |   std::size_t N = 50;
26 |   Kokkos::initialize(argc,argv);
27 |   {
28 |     using value_type = double;
29 | 
30 |     Kokkos::View<value_type*> a_view("A",N);
31 |     Kokkos::View<value_type*> b_view("B",N);
32 |     value_type* a_ptr = a_view.data();
33 |     value_type* b_ptr = b_view.data();
34 | 
35 |     using dyn_1d_ext_type = std::experimental::extents<std::experimental::dynamic_extent>;
36 |     using mdspan_type  = std::experimental::mdspan<value_type, dyn_1d_ext_type>;
37 |     mdspan_type a(a_ptr,N);
38 |     mdspan_type b(b_ptr,N);
39 |     for(std::size_t i=0; i<a.extent(0); i++){
40 |       a(i) = i;
41 |       b(i) = i;
42 |     }
43 | 
44 |     namespace stdla = std::experimental::linalg;
45 |     const value_type init_value(2.0);
46 | 
47 |     // This goes to the base implementation
48 |     const auto res_seq = stdla::dot(std::execution::seq, a, b, init_value);
49 |     printf("Seq result    = %lf\n", res_seq);
50 | 
51 |     // This forwards to KokkosKernels
52 |     const auto res_kk = stdla::dot(KokkosKernelsSTD::kokkos_exec<>(), a, b, init_value);
53 |     printf("Kokkos result = %lf\n", res_kk);
54 |   }
55 |   Kokkos::finalize();
56 | }
57 | 


--------------------------------------------------------------------------------
/examples/kokkos-based/dotc_kokkos.cpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | #include <experimental/linalg>
19 | #include <iostream>
20 | 
21 | int main(int argc, char* argv[])
22 | {
23 |   std::cout << "dotc example: calling kokkos-kernels" << std::endl;
24 | 
25 |   std::size_t N = 10;
26 |   Kokkos::initialize(argc,argv);
27 |   {
28 |     using value_type = std::complex<double>;
29 |     using view_t = Kokkos::View<value_type*>;
30 |     view_t a_view("A",N);
31 |     view_t b_view("B",N);
32 |     value_type* a_ptr = a_view.data();
33 |     value_type* b_ptr = b_view.data();
34 | 
35 |     using dyn_1d_ext_type = std::experimental::extents<std::experimental::dynamic_extent>;
36 |     using mdspan_type  = std::experimental::mdspan<value_type, dyn_1d_ext_type>;
37 |     mdspan_type a(a_ptr,N);
38 |     mdspan_type b(b_ptr,N);
39 |     for(std::size_t i=0; i<a.extent(0); i++)
40 |     {
41 |       const auto i_double = static_cast<double>(i);
42 |       const value_type a_i(i_double + 1.0, i_double + 1.0);
43 |       const value_type b_i(i_double - 2.0, i_double - 2.0);
44 |       a(i) = a_i;
45 |       b(i) = b_i;
46 |     }
47 | 
48 |     namespace stdla = std::experimental::linalg;
49 |     const value_type init_value(2., 3.);
50 | 
51 |     // This goes to the base implementation
52 |     const auto res_seq = stdla::dotc(std::execution::seq, a, b, init_value);
53 |     std::cout << "Seq result    = " << res_seq << "\n";
54 | 
55 |     // This forwards to KokkosKernels
56 |     const auto res_kk = stdla::dotc(KokkosKernelsSTD::kokkos_exec<>(), a, b, init_value);
57 |     std::cout << "Kokkos result = " << res_kk << "\n";
58 |   }
59 |   Kokkos::finalize();
60 | }
61 | 


--------------------------------------------------------------------------------
/examples/kokkos-based/idx_abs_max_kokkos.cpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | #include <experimental/linalg>
19 | #include <iostream>
20 | 
21 | namespace stdexp = std::experimental;
22 | namespace stdla = stdexp::linalg;
23 | using value_type = double;
24 | 
25 | void run_trivial_example()
26 | {
27 |   std::array<double, 0> arr;
28 |   using extents_type = stdexp::extents<stdexp::dynamic_extent>;
29 |   stdexp::mdspan<value_type, extents_type> a(arr.data(),0);
30 | 
31 |   const auto idx = stdla::vector_idx_abs_max(std::execution::seq, a);
32 |   std::cout << "Sequen result = " << idx << '\n';
33 | 
34 |   const auto idx_kk = stdla::vector_idx_abs_max(KokkosKernelsSTD::kokkos_exec<>(), a);
35 |   std::cout << "Kokkos result = " << idx_kk << '\n';
36 | }
37 | 
38 | void run_nontrivial_example()
39 | {
40 |   std::size_t N = 10;
41 | 
42 |   Kokkos::View<value_type*> a_view("A",N);
43 |   value_type* a_ptr = a_view.data();
44 | 
45 |   using extents_type = stdexp::extents<stdexp::dynamic_extent>;
46 |   stdexp::mdspan<value_type, extents_type> a(a_ptr,N);
47 |   a(0) =  0.5;
48 |   a(1) =  0.2;
49 |   a(2) =  0.1;
50 |   a(3) =  0.4;
51 |   a(4) = -0.8;
52 |   a(5) = -1.7;
53 |   a(6) = -0.3;
54 |   a(7) =  0.5;
55 |   a(8) = -1.7;
56 |   a(9) = -0.9;
57 | 
58 |   // This goes to the base implementation
59 |   const auto idx = stdla::vector_idx_abs_max(std::execution::seq, a);
60 |   std::cout << "Sequen result = " << idx << '\n';
61 | 
62 |   // This forwards to KokkosKernels (https://github.com/kokkos/kokkos-kernels
63 |   const auto idx_kk = stdla::vector_idx_abs_max(KokkosKernelsSTD::kokkos_exec<>(), a);
64 |   std::cout << "Kokkos result = " << idx_kk << '\n';
65 | }
66 | 
67 | int main(int argc, char* argv[])
68 | {
69 |   std::cout << "vector_idx_abs_max example: calling kokkos-kernels" << std::endl;
70 | 
71 |   Kokkos::initialize(argc,argv);
72 |   {
73 |     run_trivial_example();
74 |     run_nontrivial_example();
75 |   }
76 |   Kokkos::finalize();
77 | }
78 | 


--------------------------------------------------------------------------------
/examples/kokkos-based/matrix_vector_product_kokkos.cpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | #include <experimental/linalg>
19 | 
20 | #include <iostream>
21 | 
22 | using namespace std::experimental;
23 | 
24 | int main(int argc, char* argv[]) {
25 |   std::cout << "MatrixVectorProduct" << std::endl;
26 |   int N = 10;
27 |   int M = 20;
28 |   Kokkos::initialize(argc,argv);
29 |   {
30 |     Kokkos::View<double*> x_view("X",M);
31 |     Kokkos::View<double*> y_view("Y",N);
32 |     Kokkos::View<float**,Kokkos::LayoutRight> A_view("A",N,M);
33 | 
34 |     {
35 |       // example for y = A * x
36 | 
37 |       Kokkos::deep_copy(x_view,1.0);
38 |       Kokkos::deep_copy(A_view,2.0);
39 | 
40 |       // std::experimental::mdspan a(a_ptr,N); // Requires CDAT
41 |       mdspan<double, extents<dynamic_extent>> x(x_view.data(),M);
42 |       mdspan<double, extents<dynamic_extent>> y(y_view.data(),N);
43 |       mdspan<float, extents<dynamic_extent,dynamic_extent>> A(A_view.data(),N,M);
44 | 
45 |       // This forwards to KokkosKernels (https://github.com/kokkos/kokkos-kernels
46 |       linalg::matrix_vector_product(KokkosKernelsSTD::kokkos_exec<>(),A,x,y);
47 |       // This forwards to KokkosKernels if LINALG_ENABLE_KOKKOS_DEFAULT is ON
48 |       linalg::matrix_vector_product(A,x,y);
49 |       linalg::matrix_vector_product(std::execution::par,A,linalg::scaled(2.0,x),y);
50 |       // This goes to the base implementation
51 |       //linalg::matrix_vector_product(std::execution::seq,A,x,y);
52 | 
53 |       // note that this prints 80 for each element because of the scale(2.0, x) above
54 |       for(int i=0; i<y.extent(0); i++) printf("%i %lf, expected = %lf\n",i,y(i), 80.);
55 |     }
56 | 
57 |     {
58 |       // example for z = y + A * x
59 | 
60 |       Kokkos::View<double*> z_view("Z",N);
61 |       Kokkos::deep_copy(x_view,1.0);
62 |       Kokkos::deep_copy(y_view,2.0);
63 |       Kokkos::deep_copy(z_view,0.0);
64 |       Kokkos::deep_copy(A_view,1.0);
65 | 
66 |       mdspan<double, extents<dynamic_extent>> x(x_view.data(),M);
67 |       mdspan<double, extents<dynamic_extent>> y(y_view.data(),N);
68 |       mdspan<double, extents<dynamic_extent>> z(z_view.data(),N);
69 |       mdspan<float, extents<dynamic_extent,dynamic_extent>> A(A_view.data(),N,M);
70 | 
71 |       // 1.
72 |       linalg::matrix_vector_product(KokkosKernelsSTD::kokkos_exec<>(),A,x,y,z);
73 |       // should print 22
74 |       for(int i=0; i<y.extent(0); i++) printf("%i %lf, expected = %lf\n",i,z(i), 22.);
75 | 
76 |       // 2.
77 |       // scale y by 4 when passing it to kernel
78 |       linalg::matrix_vector_product(KokkosKernelsSTD::kokkos_exec<>(), A, x, linalg::scaled(4.,y), z);
79 |       // should print 28
80 |       for(int i=0; i<y.extent(0); i++) printf("%i %lf, expected = %lf\n",i,z(i), 28.);
81 | 
82 |       // 3.
83 |       // scale y by 4 and x by 2 when passing it to kernel
84 |       linalg::matrix_vector_product(KokkosKernelsSTD::kokkos_exec<>(), A, linalg::scaled(2., x), linalg::scaled(4.,y), z);
85 |       // should print 48
86 |       for(int i=0; i<y.extent(0); i++) printf("%i %lf, expected = %lf\n",i,z(i), 48.);
87 |     }
88 | 
89 |   }
90 |   Kokkos::finalize();
91 | }
92 | 


--------------------------------------------------------------------------------
/examples/kokkos-based/scale_kokkos.cpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | #include <experimental/linalg>
19 | 
20 | #include <iostream>
21 | 
22 | 
23 | int main(int argc, char* argv[])
24 | {
25 |   std::cout << "dot example: calling kokkos-kernels" << std::endl;
26 | 
27 |   std::size_t N = 40;
28 |   Kokkos::initialize(argc,argv);
29 |   {
30 |     Kokkos::View<double*> a_view("A",N);
31 |     double* a_ptr = a_view.data();
32 | 
33 |     // Requires CTAD working, GCC 11.1 works but some others are buggy
34 |     // std::experimental::mdspan a(a_ptr,N);
35 |     std::experimental::mdspan<double,std::experimental::extents<std::experimental::dynamic_extent>> a(a_ptr,N);
36 |     for(std::size_t i=0; i<a.extent(0); i++) a(i) = i;
37 | 
38 |     // This forwards to KokkosKernels (https://github.com/kokkos/kokkos-kernels
39 |     std::experimental::linalg::scale(KokkosKernelsSTD::kokkos_exec<>(),2.0,a);
40 |     // This forwards to KokkosKernels if LINALG_ENABLE_KOKKOS_DEFAULT is ON
41 |     std::experimental::linalg::scale(std::execution::par,2.0,a);
42 |     // This goes to the base implementation
43 |     std::experimental::linalg::scale(std::execution::seq,2.0,a);
44 |     for(std::size_t i=0; i<a.extent(0); i++) printf("%i %lf\n",i,a(i));
45 |   }
46 |   Kokkos::finalize();
47 | }
48 | 


--------------------------------------------------------------------------------
/examples/kokkos-based/vector_abs_sum_kokkos.cpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | #include <experimental/linalg>
19 | #include <iostream>
20 | 
21 | int main(int argc, char* argv[])
22 | {
23 |   std::cout << "vector_abs_sum example: calling kokkos-kernels" << std::endl;
24 | 
25 |   std::size_t N = 20;
26 |   Kokkos::initialize(argc,argv);
27 |   {
28 |     using value_type = double;
29 |     Kokkos::View<value_type*> x_view("x",N);
30 |     value_type* x_ptr = x_view.data();
31 | 
32 |     using dyn_1d_ext_type = std::experimental::extents<std::experimental::dynamic_extent>;
33 |     using mdspan_type  = std::experimental::mdspan<value_type, dyn_1d_ext_type>;
34 |     mdspan_type x(x_ptr,N);
35 |     for(std::size_t i=0; i<x.extent(0); i++){
36 |       if (i % 2 == 0){
37 | 	x(i) = i * static_cast<value_type>(-1);
38 |       }
39 |       else{
40 | 	x(i) = static_cast<value_type>(i);
41 |       }
42 |     }
43 | 
44 |     namespace stdla = std::experimental::linalg;
45 |     const value_type init_value(2);
46 | 
47 |     const auto res = stdla::vector_abs_sum(x, init_value);
48 |     printf("Default result    = %lf\n", res);
49 | 
50 |     // FRIZZI: Oct 27: kk currently not impl yet, just placeholder to ensure hook forwards correctly
51 |     const auto res_kk = stdla::vector_abs_sum(KokkosKernelsSTD::kokkos_exec<>(), x, init_value);
52 |     (void)res_kk;
53 |     //printf("Kokkos result = %lf\n", res_kk);
54 | 
55 |   }
56 |   Kokkos::finalize();
57 | }
58 | 


--------------------------------------------------------------------------------
/examples/kokkos-based/vector_norm2_kokkos.cpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | #include <experimental/linalg>
19 | #include <iostream>
20 | 
21 | int main(int argc, char* argv[])
22 | {
23 |   std::cout << "vector_norm2 example: calling kokkos-kernels" << std::endl;
24 | 
25 |   std::size_t N = 20;
26 |   Kokkos::initialize(argc,argv);
27 |   {
28 |     using value_type = double;
29 |     Kokkos::View<value_type*> x_view("x",N);
30 |     value_type* x_ptr = x_view.data();
31 | 
32 |     using dyn_1d_ext_type = std::experimental::extents<std::experimental::dynamic_extent>;
33 |     using mdspan_type  = std::experimental::mdspan<value_type, dyn_1d_ext_type>;
34 |     mdspan_type x(x_ptr,N);
35 |     for(std::size_t i=0; i<x.extent(0); i++){
36 |       x(i) = static_cast<value_type>(i);
37 |     }
38 | 
39 |     namespace stdla = std::experimental::linalg;
40 |     const value_type init_value(2);
41 | 
42 |     const auto res = stdla::vector_norm2(x, init_value);
43 |     printf("Default result    = %lf\n", res);
44 | 
45 |     const auto res_kk = stdla::vector_norm2(KokkosKernelsSTD::kokkos_exec<>(), x, init_value);
46 |     printf("Kokkos result = %lf\n", res_kk);
47 | 
48 |   }
49 |   Kokkos::finalize();
50 | }
51 | 


--------------------------------------------------------------------------------
/examples/kokkos-based/vector_sum_of_squares_kokkos.cpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | #include <experimental/linalg>
19 | #include <iostream>
20 | 
21 | int main(int argc, char* argv[])
22 | {
23 |   std::cout << "vector_sum_of_squares example: calling kokkos-kernels" << std::endl;
24 | 
25 |   std::size_t N = 20;
26 |   Kokkos::initialize(argc,argv);
27 |   {
28 |     using value_type = double;
29 |     Kokkos::View<value_type*> x_view("x",N);
30 |     value_type* x_ptr = x_view.data();
31 | 
32 |     using dyn_1d_ext_type = std::experimental::extents<std::experimental::dynamic_extent>;
33 |     using mdspan_type  = std::experimental::mdspan<value_type, dyn_1d_ext_type>;
34 |     mdspan_type x(x_ptr,N);
35 |     for(std::size_t i=0; i<x.extent(0); i++){
36 |       x(i) = i;
37 |     }
38 | 
39 |     namespace stdla = std::experimental::linalg;
40 |     stdla::sum_of_squares_result<value_type> init_value{1., 1.};
41 | 
42 |     const auto res = stdla::vector_sum_of_squares(x, init_value);
43 |     std::cout << "Default result: " << res.scaling_factor << " " << res.scaled_sum_of_squares << '\n';
44 | 
45 |     // FRIZZI: Oct 27: kk currently not impl yet, just placeholder to ensure hook forwards correctly
46 |     const auto res_kk = stdla::vector_sum_of_squares(KokkosKernelsSTD::kokkos_exec<>(), x, init_value);
47 |     (void)res_kk;
48 |     //std::cout << "Kokkos result: " << res_kk.scaling_factor << " " << res_kk.scaled_sum_of_squares << '\n';
49 | 
50 |   }
51 |   Kokkos::finalize();
52 | }
53 | 


--------------------------------------------------------------------------------
/include/experimental/__p1673_bits/abs_if_needed.hpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | #ifndef LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_ABS_IF_NEEDED_HPP_
19 | #define LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_ABS_IF_NEEDED_HPP_
20 | 
21 | #include <cmath>
22 | #include <complex>
23 | #include <type_traits>
24 | 
25 | namespace MDSPAN_IMPL_STANDARD_NAMESPACE {
26 | namespace MDSPAN_IMPL_PROPOSED_NAMESPACE {
27 | inline namespace __p1673_version_0 {
28 | namespace linalg {
29 | namespace impl {
30 | 
31 | // E if T is an unsigned integer;
32 | //
33 | // (1.2) otherwise, std::abs(E) if T is an arithmetic type,
34 | //
35 | // (1.3) otherwise, abs(E), if that expression is valid, with overload
36 | //   resolution performed in a context that includes the declaration
37 | //   template<class T> T abs(T) = delete;. If the function selected by
38 | //   overload resolution does not return the absolute value of its
39 | //   input, the program is ill-formed, no diagnostic required.
40 | 
41 | // Inline static variables require C++17.
42 | constexpr inline auto abs_if_needed = [](auto t)
43 | {
44 |   using T = std::remove_const_t<std::remove_reference_t<decltype(t)>>;
45 |   if constexpr (std::is_arithmetic_v<T>) {
46 |     if constexpr (std::is_unsigned_v<T>) {
47 |       return t;
48 |     }
49 |     else {
50 |       return std::abs(t);
51 |     }
52 |   }
53 |   else {
54 |     return abs(t);
55 |   }
56 | };
57 | 
58 | } // end namespace impl
59 | } // end namespace linalg
60 | } // end inline namespace __p1673_version_0
61 | } // end namespace MDSPAN_IMPL_PROPOSED_NAMESPACE
62 | } // end namespace MDSPAN_IMPL_STANDARD_NAMESPACE
63 | 
64 | #endif //LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_ABS_IF_NEEDED_HPP_
65 | 


--------------------------------------------------------------------------------
/include/experimental/__p1673_bits/conj_if_needed.hpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | #ifndef LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_CONJUGATE_IF_NEEDED_HPP_
19 | #define LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_CONJUGATE_IF_NEEDED_HPP_
20 | 
21 | #include <complex>
22 | #include <type_traits>
23 | 
24 | namespace MDSPAN_IMPL_STANDARD_NAMESPACE {
25 | namespace MDSPAN_IMPL_PROPOSED_NAMESPACE {
26 | inline namespace __p1673_version_0 {
27 | namespace linalg {
28 | namespace impl {
29 | 
30 | template<class T> struct is_complex : std::false_type{};
31 | 
32 | template<> struct is_complex<std::complex<float>> : std::true_type{};
33 | template<> struct is_complex<std::complex<double>> : std::true_type{};
34 | template<> struct is_complex<std::complex<long double>> : std::true_type{};
35 | 
36 | template<class T> inline constexpr bool is_complex_v = is_complex<T>::value;
37 | 
38 | template<class T, class = void>
39 | struct has_conj : std::false_type {};
40 | 
41 | // If I can find unqualified conj via overload resolution,
42 | // then assume that conj(t) returns the conjugate of t.
43 | template<class T>
44 | struct has_conj<T, decltype(conj(std::declval<T>()), void())> : std::true_type {};
45 | 
46 | template<class T>
47 | T conj_if_needed_impl(const T& t, std::false_type)
48 | {
49 |   return t;
50 | }
51 | 
52 | template<class T>
53 | auto conj_if_needed_impl(const T& t, std::true_type)
54 | {
55 |   if constexpr (std::is_arithmetic_v<T>) {
56 |     return t;
57 |   } else {
58 |     return conj(t);
59 |   }
60 | }
61 | 
62 | // Inline static variables require C++17.
63 | constexpr inline auto conj_if_needed = [](const auto& t)
64 | {
65 |   using T = std::remove_const_t<decltype(t)>;
66 |   return conj_if_needed_impl(t, has_conj<T>{});
67 | };
68 | 
69 | } // end namespace impl
70 | } // end namespace linalg
71 | } // end inline namespace __p1673_version_0
72 | } // end namespace MDSPAN_IMPL_PROPOSED_NAMESPACE
73 | } // end namespace MDSPAN_IMPL_STANDARD_NAMESPACE
74 | 
75 | #endif //LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_CONJUGATE_IF_NEEDED_HPP_
76 | 


--------------------------------------------------------------------------------
/include/experimental/__p1673_bits/conjugate_transposed.hpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | #ifndef LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_CONJUGATE_TRANSPOSED_HPP_
19 | #define LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_CONJUGATE_TRANSPOSED_HPP_
20 | 
21 | #include "conjugated.hpp"
22 | #include "transposed.hpp"
23 | 
24 | namespace MDSPAN_IMPL_STANDARD_NAMESPACE {
25 | namespace MDSPAN_IMPL_PROPOSED_NAMESPACE {
26 | inline namespace __p1673_version_0 {
27 | namespace linalg {
28 | 
29 | template<class ElementType, class Extents, class Layout, class Accessor>
30 | auto conjugate_transposed(mdspan<ElementType, Extents, Layout, Accessor> a)
31 | {
32 |   return conjugated(transposed(a));
33 | }
34 | 
35 | } // end namespace linalg
36 | } // end inline namespace __p1673_version_0
37 | } // end namespace MDSPAN_IMPL_PROPOSED_NAMESPACE
38 | } // end namespace MDSPAN_IMPL_STANDARD_NAMESPACE
39 | 
40 | #endif //LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_CONJUGATE_TRANSPOSED_HPP_
41 | 


--------------------------------------------------------------------------------
/include/experimental/__p1673_bits/imag_if_needed.hpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | #ifndef LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_IMAG_IF_NEEDED_HPP_
19 | #define LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_IMAG_IF_NEEDED_HPP_
20 | 
21 | #include <complex>
22 | #include <type_traits>
23 | 
24 | namespace MDSPAN_IMPL_STANDARD_NAMESPACE {
25 | namespace MDSPAN_IMPL_PROPOSED_NAMESPACE {
26 | inline namespace __p1673_version_0 {
27 | namespace linalg {
28 | namespace impl{
29 | 
30 | template<class T, class = void>
31 | struct has_imag : std::false_type {};
32 | 
33 | // If I can find unqualified imag via overload resolution,
34 | // then assume that imag(t) returns the imag part of t.
35 | template<class T>
36 | struct has_imag<T, decltype(imag(std::declval<T>()), void())> : std::true_type {};
37 | 
38 | template<class T>
39 | T imag_if_needed_impl(const T& t, std::false_type)
40 | {
41 |   // If imag(t) can't be ADL-found, then assume
42 |   // that T represents a noncomplex number type.
43 |   return T{};
44 | }
45 | 
46 | template<class T>
47 | auto imag_if_needed_impl(const T& t, std::true_type)
48 | {
49 |   if constexpr (std::is_arithmetic_v<T>) {
50 |     // Overloads for integers have a return type of double.
51 |     // We want to preserve the input type T.
52 |     return T{};
53 |   } else {
54 |     return imag(t);
55 |   }
56 | }
57 | 
58 | // Inline static variables require C++17.
59 | constexpr inline auto imag_if_needed = [](const auto& t)
60 | {
61 |   using T = std::remove_const_t<decltype(t)>;
62 |   return imag_if_needed_impl(t, has_imag<T>{});
63 | };
64 | 
65 | } // end namespace impl
66 | } // end namespace linalg
67 | } // end inline namespace __p1673_version_0
68 | } // end namespace MDSPAN_IMPL_PROPOSED_NAMESPACE
69 | } // end namespace MDSPAN_IMPL_STANDARD_NAMESPACE
70 | 
71 | #endif //LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_IMAG_IF_NEEDED_HPP_
72 | 


--------------------------------------------------------------------------------
/include/experimental/__p1673_bits/layout_tags.hpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | #ifndef LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_LAYOUT_TAGS_HPP_
19 | #define LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_LAYOUT_TAGS_HPP_
20 | 
21 | #include <mdspan/mdspan.hpp>
22 | 
23 | namespace MDSPAN_IMPL_STANDARD_NAMESPACE {
24 | namespace MDSPAN_IMPL_PROPOSED_NAMESPACE {
25 | inline namespace __p1673_version_0 {
26 | namespace linalg {
27 | 
28 | // TODO @proposal-bug make sure these can't convert from `{}`
29 | 
30 | struct column_major_t { };
31 | MDSPAN_IMPL_INLINE_VARIABLE constexpr auto column_major = column_major_t{};
32 | struct row_major_t { };
33 | MDSPAN_IMPL_INLINE_VARIABLE constexpr auto row_major = row_major_t{};
34 | 
35 | struct upper_triangle_t { };
36 | MDSPAN_IMPL_INLINE_VARIABLE constexpr auto upper_triangle = upper_triangle_t{};
37 | struct lower_triangle_t { };
38 | MDSPAN_IMPL_INLINE_VARIABLE constexpr auto lower_triangle = lower_triangle_t{};
39 | 
40 | struct implicit_unit_diagonal_t { };
41 | MDSPAN_IMPL_INLINE_VARIABLE constexpr auto implicit_unit_diagonal = implicit_unit_diagonal_t{};
42 | struct explicit_diagonal_t { };
43 | MDSPAN_IMPL_INLINE_VARIABLE constexpr auto explicit_diagonal = explicit_diagonal_t{};
44 | 
45 | struct left_side_t { };
46 | MDSPAN_IMPL_INLINE_VARIABLE constexpr auto left_side = left_side_t{};
47 | struct right_side_t { };
48 | MDSPAN_IMPL_INLINE_VARIABLE constexpr auto right_side = right_side_t{};
49 | 
50 | } // end namespace linalg
51 | } // end inline namespace __p1673_version_0
52 | } // end namespace MDSPAN_IMPL_PROPOSED_NAMESPACE
53 | } // end namespace MDSPAN_IMPL_STANDARD_NAMESPACE
54 | 
55 | 
56 | #endif //LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_LAYOUT_TAGS_HPP_
57 | 


--------------------------------------------------------------------------------
/include/experimental/__p1673_bits/layout_triangle.hpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | #ifndef LINALG_INCLUDE_EXPERIMENTAL_BITS_LAYOUT_TRIANGLE_HPP_
19 | #define LINALG_INCLUDE_EXPERIMENTAL_BITS_LAYOUT_TRIANGLE_HPP_
20 | 
21 | #include "layout_tags.hpp"
22 | 
23 | #include <type_traits>
24 | #include <cstdint>
25 | 
26 | namespace MDSPAN_IMPL_STANDARD_NAMESPACE {
27 | namespace MDSPAN_IMPL_PROPOSED_NAMESPACE {
28 | inline namespace __p1673_version_0 {
29 | namespace linalg {
30 | 
31 | namespace __triangular_layouts_impl {
32 | 
33 | template <class, class, class, class, class, class>
34 | struct __lower_triangle_layout_impl;
35 | 
36 | // FIXME work-around for #4.
37 | #if 0
38 | 
39 | // lower triangular offsets are triangular numbers (n*(n+1)/2)
40 | template <
41 |   ptrdiff_t ExtLast, ptrdiff_t... Exts, class BaseMap, class LastTwoMap,
42 |   size_t... ExtIdxs, size_t... ExtMinus2Idxs
43 | >
44 | struct __lower_triangle_layout_impl<
45 |   extents<Exts..., ExtLast, ExtLast>,
46 |   BaseMap, LastTwoMap,
47 |   std::integer_sequence<size_t, ExtIdxs...>,
48 |   std::integer_sequence<size_t, ExtMinus2Idxs...>
49 | > {
50 | 
51 | private:
52 | 
53 |   static constexpr auto __rank = sizeof...(Exts) + 2;
54 | 
55 |   _MDSPAN_NO_UNIQUE_ADDRESS LastTwoMap _trimap;
56 |   _MDSPAN_NO_UNIQUE_ADDRESS BaseMap _base_map;
57 | 
58 | public:
59 | 
60 | 
61 |   template <class... Integral>
62 |   MDSPAN_FORCE_INLINE_FUNCTION
63 |   constexpr ptrdiff_t operator()(Integral... idxs) const noexcept {
64 |     auto base_val = _base_map(
65 |       [&](size_t N) {
66 |         _MDSPAN_FOLD_PLUS_RIGHT(((ExtIdxs == N) ? idx : 0), /* + ... + */ 0)
67 |       }(ExtMinus2Idxs)...
68 |     );
69 |     auto triang_val = _trimap(
70 |       _MDSPAN_FOLD_PLUS_RIGHT(((ExtIdxs == __rank - 2) ? idx : 0), /* + ... + */ 0),
71 |       _MDSPAN_FOLD_PLUS_RIGHT(((ExtIdxs == __rank - 1) ? idx : 0), /* + ... + */ 0)
72 |     );
73 |     return base_val * triang_val;
74 |   }
75 | 
76 | };
77 | 
78 | #endif // 0  
79 | 
80 | } // end namespace __triangular_layouts_impl
81 | 
82 | template <class Triangle, class StorageOrder>
83 | class layout_blas_packed;
84 | 
85 | } // end namespace linalg
86 | } // end inline namespace __p1673_version_0
87 | } // end namespace MDSPAN_IMPL_PROPOSED_NAMESPACE
88 | } // end namespace MDSPAN_IMPL_STANDARD_NAMESPACE
89 | 
90 | #endif //LINALG_INCLUDE_EXPERIMENTAL_BITS_LAYOUT_TRIANGLE_HPP_
91 | 


--------------------------------------------------------------------------------
/include/experimental/__p1673_bits/linalg_config.h.in:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #cmakedefine LINALG_ENABLE_ATOMIC_REF
 4 | #cmakedefine LINALG_ENABLE_BLAS
 5 | #cmakedefine LINALG_ENABLE_CONCEPTS
 6 | #cmakedefine LINALG_ENABLE_KOKKOS
 7 | #cmakedefine LINALG_ENABLE_KOKKOS_DEFAULT
 8 | #cmakedefine LINALG_ENABLE_TBB
 9 | #cmakedefine LINALG_FIX_CONJUGATED_FOR_NONCOMPLEX
10 | #cmakedefine LINALG_FIX_RANK_UPDATES
11 | #cmakedefine LINALG_FIX_TRANSPOSED_FOR_PADDED_LAYOUTS
12 | 


--------------------------------------------------------------------------------
/include/experimental/__p1673_bits/macros.hpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | #ifndef LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_MACROS_HPP_
19 | #define LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_MACROS_HPP_
20 | 
21 | #include "__p1673_bits/linalg_config.h"
22 | 
23 | // Work around a known MSVC issue, that by default
24 | // it always defines __cplusplus as for C++98,
25 | // even if building in a more recent C++ mode.
26 | #ifdef _MSVC_LANG
27 | #define _LINALG_CPLUSPLUS _MSVC_LANG
28 | #else
29 | #define _LINALG_CPLUSPLUS __cplusplus
30 | #endif
31 | 
32 | #define _LINALG_CXX_STD_14 201402L
33 | #define _LINALG_CXX_STD_17 201703L
34 | #define _LINALG_CXX_STD_20 202002L
35 | 
36 | #define _LINALG_HAS_CXX_14 (_LINALG_CPLUSPLUS >= _LINALG_CXX_STD_14)
37 | #define _LINALG_HAS_CXX_17 (_LINALG_CPLUSPLUS >= _LINALG_CXX_STD_17)
38 | #define _LINALG_HAS_CXX_20 (_LINALG_CPLUSPLUS >= _LINALG_CXX_STD_20)
39 | 
40 | static_assert(_LINALG_CPLUSPLUS >= _LINALG_CXX_STD_17, "stdBLAS requires C++17 or later.");
41 | 
42 | // A sufficiently recent nvc++ comes with <execution>.
43 | // GCC (even 13.1.0) needs TBB, else std::execution::* won't even compile.
44 | // Other compilers like to define __GNUC__ to claim GCC compatibility,
45 | // even if they aren't GCC (and don't have GCC's issue of needing TBB).
46 | #if defined(__NVCOMPILER)
47 | #  define LINALG_HAS_EXECUTION 1
48 | #elif ! defined(__clang__) && ! defined(_MSC_VER) && ! defined(_INTEL_COMPILER) && ! defined(_INTEL_LLMV_COMPILER) && defined(__GNUC__)
49 | #  if defined(LINALG_ENABLE_TBB)
50 | #    define LINALG_HAS_EXECUTION 1
51 | #  endif
52 | #elif ! defined(__apple_build_version__)
53 | #  define LINALG_HAS_EXECUTION 1
54 | #endif
55 | 
56 | #define P1673_MATRIX_EXTENTS_TEMPLATE_PARAMETERS( MATRIX_NAME ) \
57 |   class SizeType_ ## MATRIX_NAME , \
58 |   ::std::size_t numRows_ ## MATRIX_NAME , \
59 |   ::std::size_t numCols_ ## MATRIX_NAME
60 | 
61 | #define P1673_MATRIX_TEMPLATE_PARAMETERS( MATRIX_NAME ) \
62 |     class ElementType_ ## MATRIX_NAME , \
63 |     P1673_MATRIX_EXTENTS_TEMPLATE_PARAMETERS( MATRIX_NAME ) , \
64 |     class Layout_ ## MATRIX_NAME , \
65 |     class Accessor_ ## MATRIX_NAME
66 | 
67 | #define P1673_MATRIX_EXTENTS_PARAMETER( MATRIX_NAME ) \
68 |   extents< \
69 |     SizeType_ ## MATRIX_NAME , \
70 |     numRows_ ## MATRIX_NAME , \
71 |     numCols_ ## MATRIX_NAME \
72 |   >
73 | 
74 | #define P1673_MATRIX_PARAMETER( MATRIX_NAME ) \
75 |   mdspan< \
76 |     ElementType_ ## MATRIX_NAME , \
77 |     P1673_MATRIX_EXTENTS_PARAMETER( MATRIX_NAME ), \
78 |     Layout_ ## MATRIX_NAME , \
79 |     Accessor_ ## MATRIX_NAME \
80 |   > MATRIX_NAME
81 | 
82 | #endif //LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_MACROS_HPP_
83 | 


--------------------------------------------------------------------------------
/include/experimental/__p1673_bits/maybe_static_size.hpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | #ifndef LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_MAYBE_STATIC_SIZE_HPP_
19 | #define LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_MAYBE_STATIC_SIZE_HPP_
20 | 
21 | #include <mdspan/mdspan.hpp>
22 | 
23 | namespace MDSPAN_IMPL_STANDARD_NAMESPACE {
24 | namespace MDSPAN_IMPL_PROPOSED_NAMESPACE {
25 | inline namespace __p1673_version_0 {
26 | namespace linalg {
27 | namespace impl {
28 | 
29 | template <class T, T Value, T DynSentinel>
30 | struct __maybe_static_value {
31 | 
32 |   MDSPAN_INLINE_FUNCTION constexpr
33 |   __maybe_static_value(T) noexcept { }
34 |   MDSPAN_INLINE_FUNCTION MDSPAN_IMPL_CONSTEXPR_14
35 |   __maybe_static_value& operator=(T) noexcept { }
36 | 
37 |   MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr
38 |   __maybe_static_value() noexcept = default;
39 |   MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr
40 |   __maybe_static_value(__maybe_static_value const&) noexcept = default;
41 |   MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr
42 |   __maybe_static_value(__maybe_static_value&&) noexcept = default;
43 |   MDSPAN_INLINE_FUNCTION_DEFAULTED MDSPAN_IMPL_CONSTEXPR_14_DEFAULTED
44 |   __maybe_static_value& operator=(__maybe_static_value const&) noexcept = default;
45 |   MDSPAN_INLINE_FUNCTION_DEFAULTED MDSPAN_IMPL_CONSTEXPR_14_DEFAULTED
46 |   __maybe_static_value& operator=(__maybe_static_value&&) noexcept = default;
47 |   MDSPAN_INLINE_FUNCTION_DEFAULTED
48 |   ~__maybe_static_value() = default;
49 | 
50 |   static constexpr auto value = Value;
51 |   static constexpr auto is_static = true;
52 |   static constexpr auto value_static = Value;
53 | };
54 | 
55 | template <class T, T DynSentinel>
56 | struct __maybe_static_value<T, DynSentinel, DynSentinel> {
57 |   T value{};
58 |   static constexpr auto is_static = false;
59 |   static constexpr auto value_static = DynSentinel;
60 | };
61 | 
62 | template <::std::size_t StaticSize, ::std::size_t Sentinel=dynamic_extent>
63 | using __maybe_static_extent = __maybe_static_value<::std::size_t, StaticSize, Sentinel>;
64 | 
65 | } // end namespace impl
66 | } // end namespace linalg
67 | } // end inline namespace __p1673_version_0
68 | } // end namespace MDSPAN_IMPL_PROPOSED_NAMESPACE
69 | } // end namespace MDSPAN_IMPL_STANDARD_NAMESPACE
70 | 
71 | #endif //LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_MAYBE_STATIC_SIZE_HPP_
72 | 


--------------------------------------------------------------------------------
/include/experimental/__p1673_bits/packed_layout.hpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | #ifndef LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_PACKED_LAYOUT_HPP_
19 | #define LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_PACKED_LAYOUT_HPP_
20 | 
21 | #include <mdspan/mdspan.hpp>
22 | #include "layout_triangle.hpp"
23 | 
24 | namespace MDSPAN_IMPL_STANDARD_NAMESPACE {
25 | namespace MDSPAN_IMPL_PROPOSED_NAMESPACE {
26 | inline namespace __p1673_version_0 {
27 | namespace linalg {
28 | 
29 | // TODO declarations need extents-see-returns-below defined
30 | 
31 | #if 0
32 | template<class EltType,
33 |          class Extents,
34 |          class Layout,
35 |          class Accessor,
36 |          class Triangle,
37 |          class StorageOrder>
38 | constexpr mdspan<EltType,
39 |   <i>extents-see-returns-below</i>,
40 |   layout_blas_packed<
41 |     Triangle,
42 |     StorageOrder>,
43 |   Accessor>
44 | packed(
45 |   const mdspan<EltType, Extents, Layout, Accessor>& m,
46 |   typename mdspan<EltType, Extents, Layout, Accessor>::index_type num_rows,
47 |   Triangle,
48 |   StorageOrder);
49 | #endif // 0
50 | 
51 | } // end namespace linalg
52 | } // end inline namespace __p1673_version_0
53 | } // end namespace MDSPAN_IMPL_PROPOSED_NAMESPACE
54 | } // end namespace MDSPAN_IMPL_STANDARD_NAMESPACE
55 | 
56 | #endif //LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_PACKED_LAYOUT_HPP_
57 | 


--------------------------------------------------------------------------------
/include/experimental/__p1673_bits/real_if_needed.hpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | #ifndef LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_REAL_IF_NEEDED_HPP_
19 | #define LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_REAL_IF_NEEDED_HPP_
20 | 
21 | #include <complex>
22 | #include <type_traits>
23 | 
24 | namespace MDSPAN_IMPL_STANDARD_NAMESPACE {
25 | namespace MDSPAN_IMPL_PROPOSED_NAMESPACE {
26 | inline namespace __p1673_version_0 {
27 | namespace linalg {
28 | namespace impl{
29 | 
30 | template<class T, class = void>
31 | struct has_real : std::false_type {};
32 | 
33 | // If I can find unqualified real via overload resolution,
34 | // then assume that real(t) returns the real part of t.
35 | template<class T>
36 | struct has_real<T, decltype(real(std::declval<T>()), void())> : std::true_type {};
37 | 
38 | template<class T>
39 | T real_if_needed_impl(const T& t, std::false_type)
40 | {
41 |   // If real(t) can't be ADL-found, then assume
42 |   // that T represents a noncomplex number type.
43 |   return t;
44 | }
45 | 
46 | template<class T>
47 | auto real_if_needed_impl(const T& t, std::true_type)
48 | {
49 |   if constexpr (std::is_arithmetic_v<T>) {
50 |     // Overloads for integers have a return type of double.
51 |     // We want to preserve the input type T.
52 |     return t;
53 |   } else {
54 |     return real(t);
55 |   }
56 | }
57 | 
58 | // Inline static variables require C++17.
59 | constexpr inline auto real_if_needed = [](const auto& t)
60 | {
61 |   using T = std::remove_const_t<decltype(t)>;
62 |   return real_if_needed_impl(t, has_real<T>{});
63 | };
64 | 
65 | } // end namespace impl
66 | } // end namespace linalg
67 | } // end inline namespace __p1673_version_0
68 | } // end namespace MDSPAN_IMPL_PROPOSED_NAMESPACE
69 | } // end namespace MDSPAN_IMPL_STANDARD_NAMESPACE
70 | 
71 | #endif //LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_REAL_IF_NEEDED_HPP_
72 | 


--------------------------------------------------------------------------------
/include/experimental/__p1673_bits/scaled.hpp:
--------------------------------------------------------------------------------
  1 | //@HEADER
  2 | // ************************************************************************
  3 | //
  4 | //                        Kokkos v. 4.0
  5 | //       Copyright (2022) National Technology & Engineering
  6 | //               Solutions of Sandia, LLC (NTESS).
  7 | //
  8 | // Under the terms of Contract DE-NA0003525 with NTESS,
  9 | // the U.S. Government retains certain rights in this software.
 10 | //
 11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
 12 | // See https://kokkos.org/LICENSE for license information.
 13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 14 | //
 15 | // ************************************************************************
 16 | //@HEADER
 17 | 
 18 | #ifndef LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_SCALED_HPP_
 19 | #define LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_SCALED_HPP_
 20 | 
 21 | #include <mdspan/mdspan.hpp>
 22 | 
 23 | namespace MDSPAN_IMPL_STANDARD_NAMESPACE {
 24 | namespace MDSPAN_IMPL_PROPOSED_NAMESPACE {
 25 | inline namespace __p1673_version_0 {
 26 | namespace linalg {
 27 | 
 28 | template<class ScalingFactor, class NestedAccessor>
 29 | class scaled_accessor {
 30 | public:
 31 |   using element_type =
 32 |     std::add_const_t<decltype(std::declval<ScalingFactor>() * std::declval<typename NestedAccessor::element_type>())>;
 33 |   using reference = std::remove_const_t<element_type>;
 34 |   using data_handle_type = typename NestedAccessor::data_handle_type;
 35 |   using offset_policy =
 36 |     scaled_accessor<ScalingFactor, typename NestedAccessor::offset_policy>;
 37 | 
 38 |   constexpr scaled_accessor() = default;
 39 | 
 40 |   MDSPAN_TEMPLATE_REQUIRES(
 41 |     class OtherScalingFactor,
 42 |     class OtherNestedAccessor,
 43 |     /* requires */ (
 44 |       std::is_constructible_v<NestedAccessor, const OtherNestedAccessor&> &&
 45 |       std::is_constructible_v<ScalingFactor, OtherScalingFactor>
 46 |     )
 47 |   )
 48 | #if defined(__cpp_conditional_explicit)
 49 |   explicit(!std::is_convertible_v<OtherNestedAccessor, NestedAccessor>)
 50 | #endif
 51 |   constexpr scaled_accessor(const scaled_accessor<OtherScalingFactor, OtherNestedAccessor>& other) :
 52 |     scaling_factor_(other.scaling_factor()),
 53 |     nested_accessor_(other.nested_accessor())
 54 |   {}
 55 | 
 56 |   constexpr scaled_accessor(const ScalingFactor& s, const NestedAccessor& a) :
 57 |     scaling_factor_(s),
 58 |     nested_accessor_(a)
 59 |   {}
 60 | 
 61 |   constexpr reference access(data_handle_type p, ::std::size_t i) const {
 62 |     return scaling_factor_ * typename NestedAccessor::element_type(nested_accessor_.access(p, i));
 63 |   }
 64 | 
 65 |   typename offset_policy::data_handle_type
 66 |   constexpr offset(data_handle_type p, ::std::size_t i) const {
 67 |     return nested_accessor_.offset(p, i);
 68 |   }
 69 | 
 70 |   constexpr NestedAccessor nested_accessor() const noexcept {
 71 |     return nested_accessor_;
 72 |   }
 73 | 
 74 |   constexpr ScalingFactor scaling_factor() const noexcept {
 75 |     return scaling_factor_;
 76 |   }
 77 | 
 78 | private:
 79 |   ScalingFactor scaling_factor_;
 80 |   NestedAccessor nested_accessor_;
 81 | };
 82 | 
 83 | namespace impl {
 84 | 
 85 | template<class ScalingFactor,
 86 |          class NestedAccessor>
 87 | using scaled_element_type =
 88 |   std::add_const_t<typename scaled_accessor<ScalingFactor, NestedAccessor>::element_type>;
 89 | 
 90 | } // namespace impl
 91 | 
 92 | template<class ScalingFactor,
 93 |          class ElementType,
 94 |          class Extents,
 95 |          class Layout,
 96 |          class Accessor>
 97 | mdspan<impl::scaled_element_type<ScalingFactor, Accessor>,
 98 |        Extents,
 99 |        Layout,
100 |        scaled_accessor<ScalingFactor, Accessor>>
101 | scaled(ScalingFactor scaling_factor,
102 |        mdspan<ElementType, Extents, Layout, Accessor> x)
103 | {
104 |   using acc_type = scaled_accessor<ScalingFactor, Accessor>;
105 |   return {x.data_handle(), x.mapping(), acc_type{scaling_factor, x.accessor()}};
106 | }
107 | 
108 | } // end namespace linalg
109 | } // end inline namespace __p1673_version_0
110 | } // end namespace MDSPAN_IMPL_PROPOSED_NAMESPACE
111 | } // end namespace MDSPAN_IMPL_STANDARD_NAMESPACE
112 | 
113 | #endif //LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_SCALED_HPP_
114 | 


--------------------------------------------------------------------------------
/include/experimental/linalg:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | #pragma once
19 | 
20 | #include "__p1673_bits/linalg_config.h"
21 | #include "__p1673_bits/macros.hpp"
22 | #include "__p1673_bits/linalg_execpolicy_mapper.hpp"
23 | #include "__p1673_bits/maybe_static_size.hpp"
24 | #include "__p1673_bits/layout_tags.hpp"
25 | #include "__p1673_bits/layout_triangle.hpp"
26 | #include "__p1673_bits/packed_layout.hpp"
27 | #include "__p1673_bits/abs_if_needed.hpp"
28 | #include "__p1673_bits/conj_if_needed.hpp"
29 | #include "__p1673_bits/real_if_needed.hpp"
30 | #include "__p1673_bits/imag_if_needed.hpp"
31 | #include "__p1673_bits/scaled.hpp"
32 | #include "__p1673_bits/conjugated.hpp"
33 | #include "__p1673_bits/transposed.hpp"
34 | #include "__p1673_bits/conjugate_transposed.hpp"
35 | #include "__p1673_bits/blas1_givens.hpp"
36 | #include "__p1673_bits/blas1_linalg_swap.hpp"
37 | #include "__p1673_bits/blas1_matrix_frob_norm.hpp"
38 | #include "__p1673_bits/blas1_matrix_inf_norm.hpp"
39 | #include "__p1673_bits/blas1_matrix_one_norm.hpp"
40 | #include "__p1673_bits/blas1_scale.hpp"
41 | #include "__p1673_bits/blas1_linalg_copy.hpp"
42 | #include "__p1673_bits/blas1_linalg_add.hpp"
43 | #include "__p1673_bits/blas1_dot.hpp"
44 | #include "__p1673_bits/blas1_vector_norm2.hpp"
45 | #include "__p1673_bits/blas1_vector_abs_sum.hpp"
46 | #include "__p1673_bits/blas1_vector_idx_abs_max.hpp"
47 | #include "__p1673_bits/blas1_vector_sum_of_squares.hpp"
48 | #include "__p1673_bits/blas2_matrix_vector_product.hpp"
49 | #include "__p1673_bits/blas2_matrix_vector_solve.hpp"
50 | #include "__p1673_bits/blas2_matrix_rank_1_update.hpp"
51 | #include "__p1673_bits/blas2_matrix_rank_2_update.hpp"
52 | #include "__p1673_bits/blas3_matrix_product.hpp"
53 | #include "__p1673_bits/blas3_matrix_rank_k_update.hpp"
54 | #include "__p1673_bits/blas3_matrix_rank_2k_update.hpp"
55 | #include "__p1673_bits/blas3_triangular_matrix_matrix_solve.hpp"
56 | #ifdef LINALG_ENABLE_KOKKOS
57 | #include <experimental/linalg_kokkoskernels>
58 | #endif
59 | 


--------------------------------------------------------------------------------
/make_single_header.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import re
 4 | import sys
 5 | import os
 6 | from os.path import dirname, join as path_join, abspath, exists
 7 | 
 8 | extra_paths = [path_join(dirname(abspath(__file__)), "include")]
 9 | 
10 | def find_file(included_name, current_file):
11 |     current_dir = dirname(abspath(current_file))
12 |     for idir in [current_dir] + extra_paths:
13 |         try_path = path_join(idir, included_name)
14 |         if exists(try_path):
15 |             return try_path
16 |     return None
17 | 
18 | def process_file(file_path, out_lines=[], front_matter_lines=[], processed_files=[]):
19 |     with open(file_path, "r") as f:
20 |         for line in f:
21 |             m_inc = re.match(r'#include\s*[<"](.+)[>"]\s*', line)
22 |             if m_inc:
23 |                 inc_name = m_inc.group(1)
24 |                 inc_path = find_file(inc_name, file_path)
25 |                 if inc_path not in processed_files:
26 |                     if inc_path is not None:
27 |                         processed_files += [inc_path]
28 |                         process_file(inc_path, out_lines, front_matter_lines, processed_files)
29 |                     else:
30 |                         # assume it's a system header; add it to the front matter just to be clean
31 |                         front_matter_lines += [line]
32 |                 continue
33 |             m_once = re.match(r"#pragma once\s*", line)
34 |             # ignore pragma once; we're handling it here
35 |             if m_once:
36 |                 continue
37 |             # otherwise, just add the line to the output
38 |             if line[-1] != "\n": line = line + "\n"
39 |             out_lines += [line]
40 |     return "".join(front_matter_lines) + "\n" + "".join(out_lines)
41 | 
42 | if __name__ == "__main__":
43 |     print(process_file(abspath(sys.argv[1]), [], ["#pragma once\n"], [abspath(sys.argv[1])]))
44 | 
45 | 
46 | 
47 | 
48 | 


--------------------------------------------------------------------------------
/tests/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | find_package(GTest)
 2 | if (NOT GTest_FOUND)
 3 |   message(STATUS "No installed GTest found, fetching from Github")
 4 |   include(FetchContent)
 5 |   FetchContent_Declare(
 6 |     googletest
 7 |     GIT_REPOSITORY https://github.com/google/googletest.git
 8 |     GIT_TAG        release-1.11.0
 9 |   )
10 |   # need to set the variables in CACHE due to CMP0077
11 |   set(gtest_disable_pthreads ON CACHE INTERNAL "")
12 |   if(MSVC)
13 |     set(gtest_force_shared_crt ON CACHE INTERNAL "")
14 |   endif()
15 |   FetchContent_GetProperties(googletest)
16 |   if(NOT googletest_POPULATED)
17 |     FetchContent_Populate(googletest)
18 |     add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR} EXCLUDE_FROM_ALL)
19 |   endif()
20 |   add_library(GTest::Main ALIAS gtest_main)
21 |   add_library(GTest::GTest ALIAS gtest)
22 | endif()
23 | 
24 | add_subdirectory(native)
25 | 
26 | if(LINALG_ENABLE_KOKKOS)
27 |   add_subdirectory(kokkos-based)
28 | endif()
29 | 


--------------------------------------------------------------------------------
/tests/kokkos-based/add_kokkos.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "gtest_fixtures.hpp"
 3 | #include "helpers.hpp"
 4 | 
 5 | namespace{
 6 | 
 7 | template<class x_t, class y_t, class z_t>
 8 | void add_gold_solution(x_t x, y_t y, z_t z)
 9 | {
10 |   for (std::size_t i=0; i<x.extent(0); ++i){
11 |     z(i) = x(i) + y(i);
12 |   }
13 | }
14 | 
15 | template<class x_t, class y_t, class z_t>
16 | void kokkos_blas1_add_test_impl(x_t x, y_t y, z_t z)
17 | {
18 |   namespace stdla = std::experimental::linalg;
19 | 
20 |   using value_type = typename x_t::value_type;
21 |   const std::size_t extent = x.extent(0);
22 | 
23 |   // copy x and y to verify they are not changed after kernel
24 |   auto x_preKernel = kokkostesting::create_stdvector_and_copy(x);
25 |   auto y_preKernel = kokkostesting::create_stdvector_and_copy(y);
26 | 
27 |   // compute gold
28 |   std::vector<value_type> gold(extent);
29 |   using mdspan_t = mdspan<value_type, extents<dynamic_extent>>;
30 |   mdspan_t z_gold(gold.data(), extent);
31 |   add_gold_solution(x, y, z_gold);
32 | 
33 |   stdla::add(KokkosKernelsSTD::kokkos_exec<>(), x, y, z);
34 | 
35 |   if constexpr(std::is_same_v<value_type, float>){
36 |     for (std::size_t i=0; i<extent; ++i){
37 |       EXPECT_FLOAT_EQ(x(i), x_preKernel[i]);
38 |       EXPECT_FLOAT_EQ(y(i), y_preKernel[i]);
39 |       EXPECT_FLOAT_EQ(z(i), z_gold(i));
40 |     }
41 |   }
42 | 
43 |   if constexpr(std::is_same_v<value_type, double>){
44 |     for (std::size_t i=0; i<extent; ++i){
45 |       EXPECT_DOUBLE_EQ(x(i), x_preKernel[i]);
46 |       EXPECT_DOUBLE_EQ(y(i), y_preKernel[i]);
47 |       EXPECT_DOUBLE_EQ(z(i), z_gold(i));
48 |     }
49 |   }
50 | 
51 |   if constexpr(std::is_same_v<value_type, std::complex<double>>){
52 |     for (std::size_t i=0; i<extent; ++i){
53 |       EXPECT_TRUE(x(i) == x_preKernel[i]);
54 |       EXPECT_TRUE(y(i) == y_preKernel[i]);
55 |       EXPECT_DOUBLE_EQ(z(i).real(), z_gold[i].real());
56 |       EXPECT_DOUBLE_EQ(z(i).imag(), z_gold[i].imag());
57 |     }
58 |   }
59 | 
60 | }
61 | }//end anonym namespace
62 | 
63 | TEST_F(blas1_signed_float_fixture, kokkos_add)
64 | {
65 |   kokkos_blas1_add_test_impl(x, y, z);
66 | }
67 | 
68 | TEST_F(blas1_signed_double_fixture, kokkos_add)
69 | {
70 |   kokkos_blas1_add_test_impl(x, y, z);
71 | }
72 | 
73 | TEST_F(blas1_signed_complex_double_fixture, kokkos_add)
74 | {
75 |   using kc_t   = Kokkos::complex<double>;
76 |   using stdc_t = value_type;
77 |   if (alignof(value_type) == alignof(kc_t)){
78 |    kokkos_blas1_add_test_impl(x, y, z);
79 |   }
80 | }
81 | 


--------------------------------------------------------------------------------
/tests/kokkos-based/copy_kokkos.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "gtest_fixtures.hpp"
 3 | #include "helpers.hpp"
 4 | 
 5 | namespace{
 6 | 
 7 | template<class x_t, class y_t>
 8 | void kokkos_blas1_copy_test_impl(x_t x, y_t y)
 9 | {
10 |   namespace stdla = std::experimental::linalg;
11 | 
12 |   using value_type = typename x_t::value_type;
13 |   const std::size_t extent = x.extent(0);
14 | 
15 |   // verify that x, y are different before running kernel
16 |   for (std::size_t i=0; i<extent; ++i){
17 |     EXPECT_TRUE(x(i)!=y(i));
18 |   }
19 | 
20 |   // copy x and y
21 |   auto x_preKernel = kokkostesting::create_stdvector_and_copy(x);
22 |   auto y_preKernel = kokkostesting::create_stdvector_and_copy(y);
23 | 
24 |   stdla::copy(KokkosKernelsSTD::kokkos_exec<>(), x, y);
25 | 
26 |   // after kernel, x should be unchanged, y should be equal to x
27 |   if constexpr(std::is_same_v<value_type, float>){
28 |     for (std::size_t i=0; i<extent; ++i){
29 |       EXPECT_FLOAT_EQ(x(i), x_preKernel[i]);
30 |       EXPECT_FLOAT_EQ(y(i), x(i));
31 |       EXPECT_FALSE(y(i) == y_preKernel[i]);
32 |     }
33 |   }
34 | 
35 |   if constexpr(std::is_same_v<value_type, double>){
36 |     for (std::size_t i=0; i<extent; ++i){
37 |       EXPECT_DOUBLE_EQ(x(i), x_preKernel[i]);
38 |       EXPECT_DOUBLE_EQ(y(i), x(i));
39 |       EXPECT_FALSE(y(i) == y_preKernel[i]);
40 |     }
41 |   }
42 | 
43 |   if constexpr(std::is_same_v<value_type, std::complex<double>>){
44 |     for (std::size_t i=0; i<extent; ++i){
45 |       EXPECT_TRUE(x(i) == x_preKernel[i]);
46 |       EXPECT_DOUBLE_EQ(y(i).real(), x(i).real());
47 |       EXPECT_DOUBLE_EQ(y(i).imag(), x(i).imag());
48 |       EXPECT_FALSE(y(i) == y_preKernel[i]);
49 |     }
50 |   }
51 | 
52 | }
53 | }//end anonym namespace
54 | 
55 | TEST_F(blas1_signed_float_fixture, kokkos_copy)
56 | {
57 |   kokkos_blas1_copy_test_impl(x, y);
58 | }
59 | 
60 | TEST_F(blas1_signed_double_fixture, kokkos_copy)
61 | {
62 |   kokkos_blas1_copy_test_impl(x, y);
63 | }
64 | 
65 | TEST_F(blas1_signed_complex_double_fixture, kokkos_copy)
66 | {
67 |   using kc_t   = Kokkos::complex<double>;
68 |   using stdc_t = value_type;
69 |   if constexpr(alignof(value_type) == alignof(kc_t)){
70 |     kokkos_blas1_copy_test_impl(x, y);
71 |   }
72 | }
73 | 


--------------------------------------------------------------------------------
/tests/kokkos-based/dot_kokkos.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "gtest_fixtures.hpp"
  3 | #include "helpers.hpp"
  4 | 
  5 | namespace
  6 | {
  7 | 
  8 | template<class x_t, class y_t, class T>
  9 | auto dot_gold_solution(x_t x, y_t y, T initValue, bool useInit)
 10 | {
 11 | 
 12 |   T result = {};
 13 |   for (std::size_t i=0; i<x.extent(0); ++i){
 14 |     result += x(i) * y(i);
 15 |   }
 16 | 
 17 |   if (useInit) result += initValue;
 18 |   return result;
 19 | }
 20 | 
 21 | template<class x_t, class y_t, class T>
 22 | void kokkos_blas1_dot_test_impl(x_t x, y_t y, T initValue, bool useInit)
 23 | {
 24 |   namespace stdla = std::experimental::linalg;
 25 | 
 26 |   using value_type = typename x_t::value_type;
 27 |   const std::size_t extent = x.extent(0);
 28 | 
 29 |   // copy x and y to verify they are not changed after kernel
 30 |   auto x_preKernel = kokkostesting::create_stdvector_and_copy(x);
 31 |   auto y_preKernel = kokkostesting::create_stdvector_and_copy(y);
 32 | 
 33 |   // compute gold
 34 |   const T gold = dot_gold_solution(x, y, initValue, useInit);
 35 | 
 36 |   T result = {};
 37 |   if (useInit){
 38 |     result = stdla::dot(KokkosKernelsSTD::kokkos_exec<>(),
 39 | 			x, y, initValue);
 40 |   }else{
 41 |     result = stdla::dot(KokkosKernelsSTD::kokkos_exec<>(),
 42 | 			x, y);
 43 |   }
 44 | 
 45 |   if constexpr(std::is_same_v<value_type, float>){
 46 |     // cannot use EXPECT_FLOAT_EQ because
 47 |     // in some cases that fails on third digit or similr
 48 |     EXPECT_NEAR(result, gold, 1e-2);
 49 |   }
 50 | 
 51 |   if constexpr(std::is_same_v<value_type, double>){
 52 |     // similarly to float
 53 |     EXPECT_NEAR(result, gold, 1e-9);
 54 |   }
 55 | 
 56 |   if constexpr(std::is_same_v<value_type, std::complex<double>>){
 57 |     EXPECT_NEAR(result.real(), gold.real(), 1e-9);
 58 |     EXPECT_NEAR(result.imag(), gold.imag(), 1e-9);
 59 |   }
 60 | 
 61 |   // x,y should not change after kernel
 62 |   for (std::size_t i=0; i<extent; ++i){
 63 |     EXPECT_TRUE(x(i) == x_preKernel[i]);
 64 |     EXPECT_TRUE(y(i) == y_preKernel[i]);
 65 |   }
 66 | }
 67 | }//end anonym namespace
 68 | 
 69 | TEST_F(blas1_signed_float_fixture, kokkos_dot_noinitvalue)
 70 | {
 71 |   kokkos_blas1_dot_test_impl(x, y, static_cast<float>(0), false);
 72 | }
 73 | 
 74 | TEST_F(blas1_signed_float_fixture, kokkos_dot_initvalue)
 75 | {
 76 |   kokkos_blas1_dot_test_impl(x, y, static_cast<float>(3), true);
 77 | }
 78 | 
 79 | TEST_F(blas1_signed_double_fixture, kokkos_dot_noinitvalue)
 80 | {
 81 |   kokkos_blas1_dot_test_impl(x, y, static_cast<double>(0), false);
 82 | }
 83 | 
 84 | TEST_F(blas1_signed_double_fixture, kokkos_dot_initvalue)
 85 | {
 86 |   kokkos_blas1_dot_test_impl(x, y, static_cast<double>(5), true);
 87 | }
 88 | 
 89 | TEST_F(blas1_signed_complex_double_fixture, kokkos_dot_noinitvalue)
 90 | {
 91 |   using kc_t   = Kokkos::complex<double>;
 92 |   using stdc_t = value_type;
 93 |   if constexpr (alignof(value_type) == alignof(kc_t)){
 94 |     const value_type init{0., 0.};
 95 |     kokkos_blas1_dot_test_impl(x, y, init, false);
 96 |   }
 97 | }
 98 | 
 99 | TEST_F(blas1_signed_complex_double_fixture, kokkos_dot_initvalue)
100 | {
101 |   using kc_t   = Kokkos::complex<double>;
102 |   using stdc_t = value_type;
103 |   if constexpr (alignof(value_type) == alignof(kc_t)){
104 |     const value_type init{-2., 4.};
105 |     kokkos_blas1_dot_test_impl(x, y, init, true);
106 |   }
107 | }
108 | 


--------------------------------------------------------------------------------
/tests/kokkos-based/dotc_kokkos.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "gtest_fixtures.hpp"
 3 | #include "helpers.hpp"
 4 | 
 5 | namespace
 6 | {
 7 | 
 8 | template<class x_t, class y_t, class T>
 9 | auto dotc_gold_solution(x_t x, y_t y, T initValue, bool useInit)
10 | {
11 | 
12 |   T result = {};
13 |   if (useInit) result = initValue;
14 | 
15 |   for (std::size_t i=0; i<x.extent(0); ++i){
16 |     result += std::conj(x(i)) * y(i);
17 |   }
18 | 
19 |   return result;
20 | }
21 | 
22 | template<class x_t, class y_t, class T>
23 | void kokkos_blas1_dotc_test_impl(x_t x, y_t y, T initValue, bool useInit)
24 | {
25 |   namespace stdla = std::experimental::linalg;
26 | 
27 |   using value_type = typename x_t::value_type;
28 |   const std::size_t extent = x.extent(0);
29 | 
30 |   // copy x and y to verify they are not changed after kernel
31 |   auto x_preKernel = kokkostesting::create_stdvector_and_copy(x);
32 |   auto y_preKernel = kokkostesting::create_stdvector_and_copy(y);
33 | 
34 |   // compute gold
35 |   const auto gold = dotc_gold_solution(x, y, initValue, useInit);
36 | 
37 |   T result = {};
38 |   if (useInit){
39 |     result = stdla::dotc(KokkosKernelsSTD::kokkos_exec<>(), x, y, initValue);
40 |   }else{
41 |     result = stdla::dotc(KokkosKernelsSTD::kokkos_exec<>(), x, y);
42 |   }
43 | 
44 |   if constexpr(std::is_same_v<value_type, std::complex<double>>)
45 |   {
46 |     EXPECT_NEAR(result.real(), gold.real(), 1e-9);
47 |     EXPECT_NEAR(result.imag(), gold.imag(), 1e-9);
48 | 
49 |     for (std::size_t i=0; i<extent; ++i){
50 |       EXPECT_TRUE(x(i) == x_preKernel[i]);
51 |       EXPECT_TRUE(y(i) == y_preKernel[i]);
52 |     }
53 |   }
54 | }
55 | }//end anonym namespace
56 | 
57 | TEST_F(blas1_signed_complex_double_fixture, kokkos_dotc_noinitvalue)
58 | {
59 |   using kc_t   = Kokkos::complex<double>;
60 |   using stdc_t = value_type;
61 |   if constexpr (alignof(value_type) == alignof(kc_t)){
62 |     const value_type init{0., 0.};
63 |     kokkos_blas1_dotc_test_impl(x, y, init, false);
64 |   }
65 | }
66 | 
67 | TEST_F(blas1_signed_complex_double_fixture, kokkos_dotc_initvalue)
68 | {
69 |   using kc_t   = Kokkos::complex<double>;
70 |   using stdc_t = value_type;
71 |   if constexpr (alignof(value_type) == alignof(kc_t)){
72 |     const value_type init{-4., 5.};
73 |     kokkos_blas1_dotc_test_impl(x, y, init, true);
74 |   }
75 | }
76 | 


--------------------------------------------------------------------------------
/tests/kokkos-based/gtest_main_kokkos.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <iostream>
 3 | #include <gtest/gtest.h>
 4 | #include <Kokkos_Core.hpp>
 5 | 
 6 | namespace KokkosKernelsSTD {
 7 | namespace Impl {
 8 | 
 9 | #if defined(KOKKOS_STDBLAS_ENABLE_TESTS)
10 | void signal_kokkos_impl_called(std::string_view functionName)
11 | {
12 |   std::cout << functionName << ": kokkos impl" << std::endl;
13 | }
14 | #endif
15 | 
16 | } // namespace Impl
17 | } // namespace KokkosKernelsSTD
18 | 
19 | int main(int argc, char *argv[])
20 | {
21 |   ::testing::InitGoogleTest(&argc,argv);
22 |   int err = 0;
23 |   {
24 |     Kokkos::initialize (argc, argv);
25 |     err = RUN_ALL_TESTS();
26 |     Kokkos::finalize();
27 |   }
28 |   return err;
29 | }
30 | 


--------------------------------------------------------------------------------
/tests/kokkos-based/hermitian_matrix_rank1_update_kokkos.cpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | #include "gtest_fixtures.hpp"
19 | #include "helpers.hpp"
20 | 
21 | namespace{
22 | 
23 | using namespace kokkostesting;
24 | 
25 | template<class x_t, class A_t, class Triangle>
26 | void hermitian_matrix_rank_1_update_gold_solution(const x_t &x, A_t &A, Triangle /* t */)
27 | {
28 |   using size_type = std::experimental::extents<>::size_type;
29 |   using std::experimental::linalg::impl::conj_if_needed;
30 |   constexpr bool low = std::is_same_v<Triangle, std::experimental::linalg::lower_triangle_t>;
31 |   for (size_type j = 0; j < A.extent(1); ++j) {
32 |     const size_type i1 = low ? A.extent(0) : j + 1;
33 |     for (size_type i = low ? j : 0; i < i1; ++i) {
34 |       A(i,j) += x(i) * conj_if_needed(x(j));
35 |     }
36 |   }
37 | }
38 | 
39 | template<class x_t, class A_t, class Triangle, class Scalar = typename x_t::element_type>
40 | void test_kokkos_hermitian_matrix_rank1_update_impl(const x_t &x, A_t &A, Triangle t)
41 | {
42 |   const auto get_gold = [&](auto A_gold) {
43 |       hermitian_matrix_rank_1_update_gold_solution(x, A_gold, t);
44 |     };
45 |   const auto compute = [&]() {
46 |       std::experimental::linalg::hermitian_matrix_rank_1_update(
47 |         KokkosKernelsSTD::kokkos_exec<>(), x, A, t);
48 |     };
49 |   const auto tol = tolerance<typename x_t::value_type>(1e-20, 1e-10f);
50 |   test_op_Ax(x, A, tol, get_gold, compute);
51 | }
52 | 
53 | } // anonymous namespace
54 | 
55 | #define DEFINE_TESTS(blas_val_type)                                          \
56 | TEST_F(blas2_signed_##blas_val_type##_fixture,                               \
57 |        kokkos_hermitian_matrix_rank1_update) {                               \
58 |   using val_t = typename blas2_signed_##blas_val_type##_fixture::value_type; \
59 |   run_checked_tests<val_t>("kokkos_", "hermitian_matrix_rank1_update", "",   \
60 |                            #blas_val_type, [&]() {                           \
61 |                                                                              \
62 |     test_kokkos_hermitian_matrix_rank1_update_impl(x_e0, A_sym_e0,           \
63 |                             std::experimental::linalg::lower_triangle);      \
64 |     test_kokkos_hermitian_matrix_rank1_update_impl(x_e0, A_sym_e0,           \
65 |                             std::experimental::linalg::upper_triangle);      \
66 |                                                                              \
67 |   });                                                                        \
68 | }
69 | 
70 | DEFINE_TESTS(double)
71 | DEFINE_TESTS(float)
72 | DEFINE_TESTS(complex_double)
73 | 


--------------------------------------------------------------------------------
/tests/kokkos-based/hermitian_matrix_rank2_update_kokkos.cpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | #include "gtest_fixtures.hpp"
19 | #include "helpers.hpp"
20 | 
21 | namespace{
22 | 
23 | using namespace kokkostesting;
24 | 
25 | template<class x_t, class y_t, class A_t, class Triangle>
26 | void hermitian_matrix_rank_2_update_gold_solution(const x_t &x, const y_t &y, A_t &A, Triangle /* t */)
27 | {
28 |   using std::experimental::linalg::impl::conj_if_needed;
29 |   using size_type = std::experimental::extents<>::size_type;
30 |   constexpr bool low = std::is_same_v<Triangle, std::experimental::linalg::lower_triangle_t>;
31 |   for (size_type j = 0; j < A.extent(1); ++j) {
32 |     const size_type i1 = low ? A.extent(0) : j + 1;
33 |     for (size_type i = low ? j : 0; i < i1; ++i) {
34 |       A(i,j) += x(i) * conj_if_needed(y(j)) + y(i) * conj_if_needed(x(j));
35 |     }
36 |   }
37 | }
38 | 
39 | template<class x_t, class y_t, class A_t, class Triangle, class Scalar = typename x_t::element_type>
40 | void test_kokkos_hermitian_matrix_rank2_update_impl(const x_t &x, const y_t &y, A_t &A, Triangle t)
41 | {
42 |   const auto get_gold = [&](auto A_gold) {
43 |       hermitian_matrix_rank_2_update_gold_solution(x, y, A_gold, t);
44 |     };
45 |   const auto compute = [&]() {
46 |       std::experimental::linalg::hermitian_matrix_rank_2_update(
47 |         KokkosKernelsSTD::kokkos_exec<>(), x, y, A, t);
48 |     };
49 |   const auto tol = tolerance<typename x_t::value_type>(1e-20, 1e-10f);
50 |   test_op_Axy(x, y, A, tol, get_gold, compute);
51 | }
52 | 
53 | } // anonymous namespace
54 | 
55 | #define DEFINE_TESTS(blas_val_type)                                          \
56 | TEST_F(blas2_signed_##blas_val_type##_fixture,                               \
57 |        kokkos_hermitian_matrix_rank2_update) {                               \
58 |   using val_t = typename blas2_signed_##blas_val_type##_fixture::value_type; \
59 |   run_checked_tests<val_t>("kokkos_", "hermitian_matrix_rank2_update", "",   \
60 |                            #blas_val_type, [&]() {                           \
61 |                                                                              \
62 |     test_kokkos_hermitian_matrix_rank2_update_impl(x_e0, y_e0, A_sym_e0,     \
63 |                             std::experimental::linalg::lower_triangle);      \
64 |     test_kokkos_hermitian_matrix_rank2_update_impl(x_e0, y_e0, A_sym_e0,     \
65 |                             std::experimental::linalg::upper_triangle);      \
66 |                                                                              \
67 |   });                                                                        \
68 | }
69 | 
70 | DEFINE_TESTS(double)
71 | DEFINE_TESTS(float)
72 | DEFINE_TESTS(complex_double)
73 | 


--------------------------------------------------------------------------------
/tests/kokkos-based/hermitian_matrix_rank_2k_update_kokkos.cpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | #include "gtest_fixtures.hpp"
19 | #include "helpers.hpp"
20 | 
21 | namespace{
22 | 
23 | using namespace kokkostesting;
24 | 
25 | template<class A_t, class B_t, class C_t, class Triangle>
26 | void hermitian_matrix_rank_2k_update_gold_solution(const A_t &A, const B_t &B, C_t &C, Triangle /* t */)
27 | {
28 |   using std::experimental::linalg::impl::conj_if_needed;
29 |   using size_type = std::experimental::extents<>::size_type;
30 |   constexpr bool low = std::is_same_v<Triangle, std::experimental::linalg::lower_triangle_t>;
31 |   const auto size = A.extent(1);
32 |   for (size_type j = 0; j < size; ++j) {
33 |     const size_type i1 = low ? size : j + 1;
34 |     for (size_type i = low ? j : 0; i < i1; ++i) {
35 |       for (size_type k = 0; k < size; ++k) {
36 |         C(i, j) += A(i, k) * conj_if_needed(B(j, k)) + B(i, k) * conj_if_needed(A(j, k));
37 |       }
38 |     }
39 |   }
40 | }
41 | 
42 | template<class A_t, class B_t, class C_t, class Triangle>
43 | void test_kokkos_hermitian_matrix_rank2k_update_impl(const A_t &A, const B_t &B, C_t &C, Triangle t)
44 | {
45 |   const auto get_gold = [&](auto C_gold) {
46 |       hermitian_matrix_rank_2k_update_gold_solution(A, B, C_gold, t);
47 |     };
48 |   const auto compute = [&]() {
49 |       std::experimental::linalg::hermitian_matrix_rank_2k_update(
50 |         KokkosKernelsSTD::kokkos_exec<>(), A, B, C, t);
51 |     };
52 |   const auto tol = tolerance<typename C_t::value_type>(1e-20, 1e-10f);
53 |   test_op_CAB(A, B, C, tol, get_gold, compute);
54 | }
55 | 
56 | } // anonymous namespace
57 | 
58 | #define DEFINE_TESTS(blas_val_type)                                          \
59 | TEST_F(blas2_signed_##blas_val_type##_fixture,                               \
60 |        kokkos_hermitian_matrix_rank2k_update) {                              \
61 |   using val_t = typename blas2_signed_##blas_val_type##_fixture::value_type; \
62 |   run_checked_tests<val_t>("kokkos_", "hermitian_matrix_rank2k_update", "",  \
63 |                            #blas_val_type, [&]() {                           \
64 |                                                                              \
65 |     test_kokkos_hermitian_matrix_rank2k_update_impl(A_sym_e0, A_sym_e0, A_hem_e0,    \
66 |                             std::experimental::linalg::lower_triangle);      \
67 |     test_kokkos_hermitian_matrix_rank2k_update_impl(A_sym_e0, A_sym_e0, A_hem_e0,    \
68 |                             std::experimental::linalg::upper_triangle);      \
69 |                                                                              \
70 |   });                                                                        \
71 | }
72 | 
73 | DEFINE_TESTS(double)
74 | DEFINE_TESTS(float)
75 | DEFINE_TESTS(complex_double)
76 | 


--------------------------------------------------------------------------------
/tests/kokkos-based/hermitian_matrix_rank_k_update_kokkos.cpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | #include "gtest_fixtures.hpp"
19 | #include "helpers.hpp"
20 | 
21 | namespace{
22 | 
23 | using namespace kokkostesting;
24 | 
25 | template<class AlphaType, class A_t, class C_t, class Triangle>
26 | void hermitian_matrix_rank_k_update_gold_solution(AlphaType alpha,
27 |     const A_t &A, C_t &C, Triangle /* t */)
28 | {
29 |   using std::experimental::linalg::impl::conj_if_needed;
30 |   using size_type = std::experimental::extents<>::size_type;
31 |   constexpr bool low = std::is_same_v<Triangle, std::experimental::linalg::lower_triangle_t>;
32 |   const auto C_ext = C.extent(0);
33 |   const auto A_ext1 = A.extent(1);
34 |   for (size_type j = 0; j < C_ext; ++j) {
35 |     const size_type i1 = low ? C_ext : j + 1;
36 |     for (size_type i = low ? j : 0; i < i1; ++i) {
37 |       for (size_type k = 0; k < A_ext1; ++k) {
38 |         C(i, j) += alpha * A(i, k) * conj_if_needed(A(j, k));
39 |       }
40 |     }
41 |   }
42 | }
43 | 
44 | template<class A_t, class C_t, class Triangle>
45 | void test_kokkos_hermitian_matrix_rank_k_update_impl(const A_t A, C_t C, Triangle t)
46 | {
47 |   const auto alpha = static_cast<A_t::element_type>(2.0); // check if alpha is applied
48 |   const auto get_gold = [&](auto C_gold) {
49 |       hermitian_matrix_rank_k_update_gold_solution(alpha, A, C_gold, t);
50 |     };
51 |   const auto compute = [&]() {
52 |       std::experimental::linalg::hermitian_matrix_rank_k_update(
53 |         KokkosKernelsSTD::kokkos_exec<>(), alpha, A, C, t);
54 |     };
55 |   const auto tol = tolerance<typename C_t::value_type>(1e-20, 1e-10f);
56 |   test_op_CA(A, C, tol, get_gold, compute);
57 | }
58 | 
59 | } // anonymous namespace
60 | 
61 | #define DEFINE_TESTS(blas_val_type)                                          \
62 | TEST_F(blas2_signed_##blas_val_type##_fixture,                               \
63 |        kokkos_hermitian_matrix_rank_k_update) {                              \
64 |   using val_t = typename blas2_signed_##blas_val_type##_fixture::value_type; \
65 |   run_checked_tests<val_t>("kokkos_", "hermitian_matrix_rank_k_update", "",  \
66 |                            #blas_val_type, [&]() {                           \
67 |                                                                              \
68 |     test_kokkos_hermitian_matrix_rank_k_update_impl(A_sym_e0, A_hem_e0,      \
69 |                             std::experimental::linalg::lower_triangle);      \
70 |     test_kokkos_hermitian_matrix_rank_k_update_impl(A_sym_e0, A_hem_e0,      \
71 |                             std::experimental::linalg::upper_triangle);      \
72 |                                                                              \
73 |   });                                                                        \
74 | }
75 | 
76 | DEFINE_TESTS(double)
77 | DEFINE_TESTS(float)
78 | DEFINE_TESTS(complex_double)
79 | 


--------------------------------------------------------------------------------
/tests/kokkos-based/idx_abs_max_kokkos.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "gtest_fixtures.hpp"
 3 | #include "helpers.hpp"
 4 | 
 5 | namespace
 6 | {
 7 | 
 8 | template<class x_t>
 9 | std::experimental::extents<>::size_type
10 | vector_idx_abs_max_gold_solution(x_t x)
11 | {
12 | 
13 |   using std::abs;
14 |   using size_type = std::experimental::extents<>::size_type;
15 | 
16 |   size_type maxInd = 0;
17 |   decltype(abs(x(0))) maxVal = abs(x(0));
18 |   for (size_type i = 1; i < x.extent(0); ++i) {
19 |     if (maxVal < abs(x(i))) {
20 |       maxVal = abs(x(i));
21 |       maxInd = i;
22 |     }
23 |   }
24 | 
25 |   return maxInd;
26 | }
27 | 
28 | template<class x_t>
29 | void kokkos_blas1_vector_idx_abs_max_test_impl(x_t x)
30 | {
31 | 
32 |   namespace stdla = std::experimental::linalg;
33 | 
34 |   // copy x to verify it is not changed after kernel
35 |   auto x_preKernel = kokkostesting::create_stdvector_and_copy(x);
36 | 
37 |   const auto gold = vector_idx_abs_max_gold_solution(x);
38 |   const auto result = stdla::vector_idx_abs_max(KokkosKernelsSTD::kokkos_exec<>(), x);
39 |   EXPECT_TRUE(gold == result);
40 |   static_assert(std::is_same_v<decltype(gold), decltype(result)>,
41 | 		"test:vector_idx_abs_max: gold and result types not same");
42 | 
43 |   // x should not change after kernel
44 |   const std::size_t extent = x.extent(0);
45 |   for (std::size_t i=0; i<extent; ++i){
46 |     EXPECT_TRUE(x(i) == x_preKernel[i]);
47 |   }
48 | 
49 | }
50 | }//end anonym namespace
51 | 
52 | 
53 | TEST_F(blas1_signed_float_fixture, kokkos_vector_idx_abs_max)
54 | {
55 |   kokkos_blas1_vector_idx_abs_max_test_impl(x);
56 | }
57 | 
58 | TEST_F(blas1_signed_double_fixture, kokkos_vector_idx_abs_max)
59 | {
60 |   kokkos_blas1_vector_idx_abs_max_test_impl(x);
61 | }
62 | 
63 | TEST_F(blas1_signed_complex_double_fixture, kokkos_vector_idx_abs_max)
64 | {
65 |   using kc_t   = Kokkos::complex<double>;
66 |   using stdc_t = value_type;
67 |   if constexpr(alignof(value_type) == alignof(kc_t)){
68 |     kokkos_blas1_vector_idx_abs_max_test_impl(x);
69 |   }
70 | }
71 | 


--------------------------------------------------------------------------------
/tests/kokkos-based/matrix_rank1_update_kokkos.cpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | #include "gtest_fixtures.hpp"
19 | #include "helpers.hpp"
20 | 
21 | namespace{
22 | 
23 | using namespace kokkostesting;
24 | 
25 | template<class x_t, class y_t, class A_t>
26 | void matrix_rank_1_update_gold_solution(const x_t &x, const y_t &y, A_t &A)
27 | {
28 |   using size_type = std::experimental::extents<>::size_type;
29 |   for (size_type i = 0; i < A.extent(0); ++i) {
30 |     for (size_type j = 0; j < A.extent(1); ++j) {
31 |       A(i, j) += x(i) * y(j);
32 |     }
33 |   }
34 | }
35 | 
36 | template<class x_t, class y_t, class A_t>
37 | void test_kokkos_matrix_rank1_update_impl(const x_t &x, const y_t &y, A_t &A)
38 | {
39 |   const auto get_gold = [&](auto A_gold) {
40 |       matrix_rank_1_update_gold_solution(x, y, A_gold);
41 |     };
42 |   const auto compute = [&]() {
43 |       std::experimental::linalg::matrix_rank_1_update(
44 |           KokkosKernelsSTD::kokkos_exec<>(), x, y, A);
45 |     };
46 |   const auto tol = tolerance<typename x_t::value_type>(1e-20, 1e-10f);
47 |   test_op_Axy(x, y, A, tol, get_gold, compute);
48 | }
49 | 
50 | template<class x_t, class y_t, class A_t>
51 | void test_kokkos_matrix_rank1_update_conj_impl(const x_t &x, const y_t &y, A_t &A)
52 | {
53 |   const auto get_gold = [&](auto A_gold) {
54 |       matrix_rank_1_update_gold_solution(x,
55 |         std::experimental::linalg::conjugated(y), A_gold);
56 |     };
57 |   const auto compute = [&]() {
58 |       std::experimental::linalg::matrix_rank_1_update_c(
59 |           KokkosKernelsSTD::kokkos_exec<>(), x, y, A);
60 |     };
61 |   const auto tol = tolerance<typename x_t::value_type>(1e-20, 1e-10f);
62 |   test_op_Axy(x, y, A, tol, get_gold, compute);
63 | }
64 | 
65 | } // anonymous namespace
66 | 
67 | #define DEFINE_TESTS(blas_val_type)                                          \
68 | TEST_F(blas2_signed_##blas_val_type##_fixture, kokkos_matrix_rank1_update) { \
69 |   using val_t = typename blas2_signed_##blas_val_type##_fixture::value_type; \
70 |   run_checked_tests<val_t>("kokkos_", "matrix_rank1_update", "",             \
71 |                            #blas_val_type, [&]() {                           \
72 |                                                                              \
73 |    test_kokkos_matrix_rank1_update_impl(x_e0, x_e1, A_e0e1);                 \
74 |                                                                              \
75 |   });                                                                        \
76 | }                                                                            \
77 | TEST_F(blas2_signed_##blas_val_type##_fixture,                               \
78 |        kokkos_matrix_rank1_update_conjugated) {                              \
79 |   using val_t = typename blas2_signed_##blas_val_type##_fixture::value_type; \
80 |   run_checked_tests<val_t>("kokkos_", "matrix_rank1_update", "_conjugated",  \
81 |                            #blas_val_type, [&]() {                           \
82 |                                                                              \
83 |    test_kokkos_matrix_rank1_update_conj_impl(x_e0, x_e1, A_e0e1);            \
84 |                                                                              \
85 |   });                                                                        \
86 | }
87 | 
88 | DEFINE_TESTS(double)
89 | DEFINE_TESTS(float)
90 | DEFINE_TESTS(complex_double)
91 | 


--------------------------------------------------------------------------------
/tests/kokkos-based/overwriting_matrix_vector_product.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "gtest_fixtures.hpp"
  3 | #include "helpers.hpp"
  4 | 
  5 | namespace
  6 | {
  7 | 
  8 | template<class A_t, class x_t, class y_t>
  9 | void gemv_gold_solution(A_t A, x_t x, y_t y)
 10 | {
 11 |   for (std::size_t i=0; i<A.extent(0); ++i){
 12 |     y(i) = typename y_t::value_type{};
 13 |     for (std::size_t j=0; j<A.extent(1); ++j){
 14 |       y(i) += A(i,j) * x(j);
 15 |     }
 16 |   }
 17 | }
 18 | 
 19 | template<class A_t, class x_t, class y_t>
 20 | void kokkos_blas_overwriting_gemv_impl(A_t A, x_t x, y_t y)
 21 | {
 22 |   namespace stdla = std::experimental::linalg;
 23 | 
 24 |   using value_type = typename A_t::value_type;
 25 |   const std::size_t extent0 = A.extent(0);
 26 |   const std::size_t extent1 = A.extent(1);
 27 | 
 28 |   // copy operands before running the kernel
 29 |   auto A_preKernel = kokkostesting::create_stdvector_and_copy_rowwise(A);
 30 |   auto x_preKernel = kokkostesting::create_stdvector_and_copy(x);
 31 |   auto y_preKernel = kokkostesting::create_stdvector_and_copy(y);
 32 | 
 33 |   // compute y gold gemv
 34 |   std::vector<value_type> gold(y.extent(0));
 35 |   using mdspan_t = mdspan<value_type, extents<dynamic_extent>>;
 36 |   mdspan_t y_gold(gold.data(), y.extent(0));
 37 |   gemv_gold_solution(A, x, y_gold);
 38 | 
 39 |   stdla::matrix_vector_product(KokkosKernelsSTD::kokkos_exec<>(), A, x, y);
 40 | 
 41 |   // after kernel, A,x should be unchanged, y should be equal to y_gold.
 42 |   // note that for A we need to visit all elements rowwise
 43 |   // since that is how we stored above the preKernel values
 44 | 
 45 |   if constexpr(std::is_same_v<value_type, float>){
 46 |     // check x
 47 |     for (std::size_t j=0; j<extent1; ++j){
 48 |       EXPECT_FLOAT_EQ(x(j), x_preKernel[j]);
 49 |     }
 50 | 
 51 |     // check A and y
 52 |     std::size_t count=0;
 53 |     for (std::size_t i=0; i<extent0; ++i){
 54 |       EXPECT_NEAR(y(i), y_gold(i), 1e-2);
 55 |       for (std::size_t j=0; j<extent1; ++j){
 56 | 	EXPECT_FLOAT_EQ(A(i,j), A_preKernel[count++]);
 57 |       }
 58 |     }
 59 |   }
 60 | 
 61 |   else if constexpr(std::is_same_v<value_type, double>){
 62 |     // check x
 63 |     for (std::size_t j=0; j<extent1; ++j){
 64 |       EXPECT_DOUBLE_EQ(x(j), x_preKernel[j]);
 65 |     }
 66 | 
 67 |     // check A and y
 68 |     std::size_t count=0;
 69 |     for (std::size_t i=0; i<extent0; ++i){
 70 |       EXPECT_NEAR(y(i), y_gold(i), 1e-9);
 71 |       for (std::size_t j=0; j<extent1; ++j){
 72 | 	EXPECT_DOUBLE_EQ(A(i,j), A_preKernel[count++]);
 73 |       }
 74 |     }
 75 |   }
 76 | 
 77 |   else if constexpr(std::is_same_v<value_type, std::complex<double>>){
 78 |     // check x
 79 |     for (std::size_t j=0; j<extent1; ++j){
 80 |       EXPECT_DOUBLE_EQ(x(j).real(), x_preKernel[j].real());
 81 |       EXPECT_DOUBLE_EQ(x(j).imag(), x_preKernel[j].imag());
 82 |     }
 83 | 
 84 |     // check A and y
 85 |     std::size_t count=0;
 86 |     for (std::size_t i=0; i<extent0; ++i){
 87 |       EXPECT_NEAR(y(i).real(), y_gold(i).real(), 1e-9);
 88 |       EXPECT_NEAR(y(i).imag(), y_gold(i).imag(), 1e-9);
 89 | 
 90 |       for (std::size_t j=0; j<extent1; ++j){
 91 | 	EXPECT_DOUBLE_EQ(A(i,j).real(), A_preKernel[count].real());
 92 | 	EXPECT_DOUBLE_EQ(A(i,j).imag(), A_preKernel[count++].imag());
 93 |       }
 94 |     }
 95 |   }
 96 | 
 97 | }
 98 | }//end anonym namespace
 99 | 
100 | TEST_F(blas2_signed_float_fixture, kokkos_overwriting_matrix_vector_product)
101 | {
102 |   kokkos_blas_overwriting_gemv_impl(A_e0e1, x_e1, x_e0);
103 | }
104 | 
105 | TEST_F(blas2_signed_double_fixture, kokkos_overwriting_matrix_vector_product)
106 | {
107 |   kokkos_blas_overwriting_gemv_impl(A_e0e1, x_e1, x_e0);
108 | }
109 | 
110 | TEST_F(blas2_signed_complex_double_fixture, kokkos_overwriting_matrix_vector_product)
111 | {
112 |   using kc_t = Kokkos::complex<double>;
113 |   using stdc_t = value_type;
114 |   if constexpr (alignof(value_type) == alignof(kc_t)){
115 |     kokkos_blas_overwriting_gemv_impl(A_e0e1, x_e1, x_e0);
116 |   }
117 | }
118 | 


--------------------------------------------------------------------------------
/tests/kokkos-based/scale_rank1_kokkos.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "gtest_fixtures.hpp"
 3 | #include "helpers.hpp"
 4 | 
 5 | namespace
 6 | {
 7 | 
 8 | template<class x_t, class FactorT>
 9 | void scale_gold_solution(x_t x, FactorT factor)
10 | {
11 |   FactorT result = {};
12 |   for (std::size_t i=0; i<x.extent(0); ++i){
13 |     x(i) *= factor;
14 |   }
15 | }
16 | 
17 | template<class x_t, class FactorT>
18 | void kokkos_blas1_scale_test_impl(x_t x, FactorT factor)
19 | {
20 |   namespace stdla = std::experimental::linalg;
21 | 
22 |   using value_type = typename x_t::value_type;
23 |   const std::size_t extent = x.extent(0);
24 | 
25 |   // compute gold
26 |   std::vector<value_type> gold(extent);
27 |   using mdspan_t = mdspan<value_type, extents<dynamic_extent>>;
28 |   mdspan_t x_gold(gold.data(), extent);
29 |   for (std::size_t i=0; i<x.extent(0); ++i){
30 |     x_gold(i) = x(i);
31 |   }
32 |   scale_gold_solution(x_gold, factor);
33 | 
34 |   stdla::scale(KokkosKernelsSTD::kokkos_exec<>(), factor, x);
35 | 
36 |   if constexpr(std::is_same_v<value_type, float>){
37 |     for (std::size_t i=0; i<extent; ++i){
38 |       EXPECT_FLOAT_EQ(x(i), x_gold(i));
39 |     }
40 |   }
41 | 
42 |   if constexpr(std::is_same_v<value_type, double>){
43 |     for (std::size_t i=0; i<extent; ++i){
44 |       EXPECT_DOUBLE_EQ(x(i), x_gold(i));
45 |     }
46 |   }
47 | 
48 |   if constexpr(std::is_same_v<value_type, std::complex<double>>){
49 |     for (std::size_t i=0; i<extent; ++i){
50 |       EXPECT_DOUBLE_EQ(x(i).real(), x_gold(i).real());
51 |       EXPECT_DOUBLE_EQ(x(i).imag(), x_gold(i).imag());
52 |     }
53 |   }
54 | 
55 | }
56 | }//end anonym namespace
57 | 
58 | TEST_F(blas1_signed_float_fixture, kokkos_scale)
59 | {
60 |   kokkos_blas1_scale_test_impl(x, static_cast<value_type>(2));
61 | }
62 | 
63 | TEST_F(blas1_signed_double_fixture, kokkos_scale)
64 | {
65 |   kokkos_blas1_scale_test_impl(x, static_cast<value_type>(2));
66 | }
67 | 
68 | TEST_F(blas1_signed_complex_double_fixture, kokkos_scale_complex_factor)
69 | {
70 |   using kc_t   = Kokkos::complex<double>;
71 |   using stdc_t = value_type;
72 |   if constexpr(alignof(value_type) == alignof(kc_t)){
73 |     const value_type factor{2., 0.};
74 |     kokkos_blas1_scale_test_impl(x, factor);
75 |   }
76 | }
77 | 
78 | TEST_F(blas1_signed_complex_double_fixture, kokkos_scale_double_factor)
79 | {
80 |   using kc_t   = Kokkos::complex<double>;
81 |   using stdc_t = value_type;
82 |   if constexpr(alignof(value_type) == alignof(kc_t)){
83 |     kokkos_blas1_scale_test_impl(x, 2.);
84 |   }
85 | }
86 | 


--------------------------------------------------------------------------------
/tests/kokkos-based/scale_rank2_kokkos.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "gtest_fixtures.hpp"
 3 | #include "helpers.hpp"
 4 | 
 5 | namespace
 6 | {
 7 | 
 8 | template<class A_t, class FactorT>
 9 | void scale_gold_solution(A_t A, FactorT factor)
10 | {
11 |   FactorT result = {};
12 |   for (std::size_t i=0; i<A.extent(0); ++i){
13 |     for (std::size_t j=0; j<A.extent(1); ++j){
14 |       A(i,j) *= factor;
15 |     }
16 |   }
17 | }
18 | 
19 | template<class A_t, class FactorT>
20 | void kokkos_blas_scale_test_impl(A_t A, FactorT factor)
21 | {
22 |   namespace stdla = std::experimental::linalg;
23 | 
24 |   using value_type = typename A_t::value_type;
25 |   const std::size_t extent0 = A.extent(0);
26 |   const std::size_t extent1 = A.extent(1);
27 | 
28 |   // compute gold
29 |   std::vector<value_type> gold(extent0*extent1);
30 |   using mdspan_t = mdspan<value_type, extents<dynamic_extent, dynamic_extent>>;
31 |   mdspan_t A_gold(gold.data(), extent0, extent1);
32 |   for (std::size_t i=0; i<extent0; ++i){
33 |     for (std::size_t j=0; j<extent1; ++j){
34 |       A_gold(i,j) = A(i,j);
35 |     }
36 |   }
37 |   scale_gold_solution(A_gold, factor);
38 | 
39 |   stdla::scale(KokkosKernelsSTD::kokkos_exec<>(), factor, A);
40 | 
41 |   if constexpr(std::is_same_v<value_type, float>){
42 |     for (std::size_t i=0; i<extent0; ++i){
43 |       for (std::size_t j=0; j<extent1; ++j){
44 | 	EXPECT_FLOAT_EQ(A(i,j), A_gold(i,j));
45 |       }
46 |     }
47 |   }
48 | 
49 |   if constexpr(std::is_same_v<value_type, double>){
50 |     for (std::size_t i=0; i<extent0; ++i){
51 |       for (std::size_t j=0; j<extent1; ++j){
52 | 	EXPECT_DOUBLE_EQ(A(i,j), A_gold(i,j));
53 |       }
54 |     }
55 |   }
56 | 
57 |   if constexpr(std::is_same_v<value_type, std::complex<double>>){
58 |     for (std::size_t i=0; i<extent0; ++i){
59 |       for (std::size_t j=0; j<extent1; ++j){
60 | 	EXPECT_DOUBLE_EQ(A(i,j).real(), A_gold(i,j).real());
61 | 	EXPECT_DOUBLE_EQ(A(i,j).imag(), A_gold(i,j).imag());
62 |       }
63 |     }
64 |   }
65 | 
66 | }
67 | }//end anonym namespace
68 | 
69 | TEST_F(blas2_signed_float_fixture, kokkos_scale)
70 | {
71 |   kokkos_blas_scale_test_impl(A_e0e1, static_cast<value_type>(2));
72 | }
73 | 
74 | TEST_F(blas2_signed_double_fixture, kokkos_scale)
75 | {
76 |   kokkos_blas_scale_test_impl(A_e0e1, static_cast<value_type>(2));
77 | }
78 | 
79 | TEST_F(blas2_signed_complex_double_fixture, kokkos_scale_complex_factor)
80 | {
81 |   using kc_t   = Kokkos::complex<double>;
82 |   using stdc_t = value_type;
83 |   if constexpr (alignof(value_type) == alignof(kc_t)){
84 |     const value_type factor{2., 0.};
85 |     kokkos_blas_scale_test_impl(A_e0e1, factor);
86 |   }
87 | }
88 | 
89 | TEST_F(blas2_signed_complex_double_fixture, kokkos_scale_double_factor)
90 | {
91 |   using kc_t   = Kokkos::complex<double>;
92 |   using stdc_t = value_type;
93 |   if constexpr (alignof(value_type) == alignof(kc_t)){
94 |     kokkos_blas_scale_test_impl(A_e0e1, 2.);
95 |   }
96 | }
97 | 


--------------------------------------------------------------------------------
/tests/kokkos-based/swap_elements_rank1_kokkos.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "gtest_fixtures.hpp"
 3 | #include "helpers.hpp"
 4 | 
 5 | namespace{
 6 | 
 7 | template<class x_t, class y_t>
 8 | void kokkos_blas_swap_test_rank1_impl(x_t x, y_t y)
 9 | {
10 |   namespace stdla = std::experimental::linalg;
11 | 
12 |   using value_type = typename x_t::value_type;
13 |   const std::size_t extent = x.extent(0);
14 | 
15 |   // verify that x, y are different before running kernel
16 |   for (std::size_t i=0; i<extent; ++i){
17 |     EXPECT_TRUE(x(i)!=y(i));
18 |   }
19 | 
20 |   // copy x and y before kernel
21 |   auto x_preKernel = kokkostesting::create_stdvector_and_copy(x);
22 |   auto y_preKernel = kokkostesting::create_stdvector_and_copy(y);
23 | 
24 |   stdla::swap_elements(KokkosKernelsSTD::kokkos_exec<>(), x, y);
25 | 
26 |   // after kernel, x should be unchanged, y should be equal to x
27 |   if constexpr(std::is_same_v<value_type, float>){
28 |     for (std::size_t i=0; i<extent; ++i){
29 |       EXPECT_FLOAT_EQ(x(i), y_preKernel[i]);
30 |       EXPECT_FLOAT_EQ(y(i), x_preKernel[i]);
31 |     }
32 |   }
33 | 
34 |   if constexpr(std::is_same_v<value_type, double>){
35 |     for (std::size_t i=0; i<extent; ++i){
36 |       EXPECT_DOUBLE_EQ(x(i), y_preKernel[i]);
37 |       EXPECT_DOUBLE_EQ(y(i), x_preKernel[i]);
38 |     }
39 |   }
40 | 
41 |   if constexpr(std::is_same_v<value_type, std::complex<double>>){
42 |     for (std::size_t i=0; i<extent; ++i){
43 |       EXPECT_DOUBLE_EQ(x(i).real(), y_preKernel[i].real());
44 |       EXPECT_DOUBLE_EQ(x(i).imag(), y_preKernel[i].imag());
45 | 
46 |       EXPECT_DOUBLE_EQ(y(i).real(), x_preKernel[i].real());
47 |       EXPECT_DOUBLE_EQ(y(i).imag(), x_preKernel[i].imag());
48 |     }
49 |   }
50 | }
51 | }//end anonym namespace
52 | 
53 | TEST_F(blas1_signed_float_fixture, kokkos_swap)
54 | {
55 |   kokkos_blas_swap_test_rank1_impl(x, y);
56 | }
57 | 
58 | TEST_F(blas1_signed_double_fixture, kokkos_swap)
59 | {
60 |   kokkos_blas_swap_test_rank1_impl(x, y);
61 | }
62 | 
63 | TEST_F(blas1_signed_complex_double_fixture, kokkos_swap)
64 | {
65 |   using kc_t   = Kokkos::complex<double>;
66 |   using stdc_t = value_type;
67 |   if constexpr(alignof(value_type) == alignof(kc_t)){
68 |     kokkos_blas_swap_test_rank1_impl(x, y);
69 |   }
70 | }
71 | 


--------------------------------------------------------------------------------
/tests/kokkos-based/swap_elements_rank2_kokkos.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "gtest_fixtures.hpp"
 3 | #include "helpers.hpp"
 4 | 
 5 | namespace{
 6 | 
 7 | template<class A_t, class B_t>
 8 | void kokkos_blas_swap_test_rank2_impl(A_t A, B_t B)
 9 | {
10 |   namespace stdla = std::experimental::linalg;
11 | 
12 |   using value_type = typename A_t::value_type;
13 |   const std::size_t extent0 = A.extent(0);
14 |   const std::size_t extent1 = A.extent(1);
15 | 
16 |   // verify that A, B are different before running kernel
17 |   for (std::size_t i=0; i<extent0; ++i){
18 |     for (std::size_t j=0; j<extent1; ++j){
19 |       EXPECT_TRUE(A(i,j)!=B(i,j));
20 |     }
21 |   }
22 | 
23 |   // copy A and B before kernel
24 |   auto A_preKernel = kokkostesting::create_stdvector_and_copy_rowwise(A);
25 |   auto B_preKernel = kokkostesting::create_stdvector_and_copy_rowwise(B);
26 | 
27 |   stdla::swap_elements(KokkosKernelsSTD::kokkos_exec<>(), A, B);
28 | 
29 |   // after kernel, A should be unchanged, B should be equal to A
30 |   // note that we need to visit all elements rowwise since that is
31 |   // how we stored above the preKernel values
32 |   if constexpr(std::is_same_v<value_type, float>){
33 |     std::size_t count=0;
34 |     for (std::size_t i=0; i<extent0; ++i){
35 |       for (std::size_t j=0; j<extent1; ++j){
36 | 	EXPECT_FLOAT_EQ(A(i,j), B_preKernel[count]);
37 | 	EXPECT_FLOAT_EQ(B(i,j), A_preKernel[count++]);
38 |       }
39 |     }
40 |   }
41 | 
42 |   if constexpr(std::is_same_v<value_type, double>){
43 |     std::size_t count=0;
44 |     for (std::size_t i=0; i<extent0; ++i){
45 |       for (std::size_t j=0; j<extent1; ++j){
46 | 	EXPECT_DOUBLE_EQ(A(i,j), B_preKernel[count]);
47 | 	EXPECT_DOUBLE_EQ(B(i,j), A_preKernel[count++]);
48 |       }
49 |     }
50 |   }
51 | 
52 |   if constexpr(std::is_same_v<value_type, std::complex<double>>){
53 |     std::size_t count=0;
54 |     for (std::size_t i=0; i<extent0; ++i){
55 |       for (std::size_t j=0; j<extent1; ++j){
56 | 	EXPECT_DOUBLE_EQ(A(i,j).real(), B_preKernel[count].real());
57 | 	EXPECT_DOUBLE_EQ(A(i,j).imag(), B_preKernel[count].imag());
58 | 
59 | 	EXPECT_DOUBLE_EQ(B(i,j).real(), A_preKernel[count].real());
60 | 	EXPECT_DOUBLE_EQ(B(i,j).imag(), A_preKernel[count++].imag());
61 |       }
62 |     }
63 |   }
64 | }
65 | }//end anonym namespace
66 | 
67 | TEST_F(blas2_signed_float_fixture, kokkos_swap)
68 | {
69 |   kokkos_blas_swap_test_rank2_impl(A_e0e1, B_e0e1);
70 | }
71 | 
72 | TEST_F(blas2_signed_double_fixture, kokkos_swap)
73 | {
74 |   kokkos_blas_swap_test_rank2_impl(A_e0e1, B_e0e1);
75 | }
76 | 
77 | TEST_F(blas2_signed_complex_double_fixture, kokkos_swap)
78 | {
79 |   using kc_t   = Kokkos::complex<double>;
80 |   using stdc_t = value_type;
81 |   if constexpr(alignof(value_type) == alignof(kc_t)){
82 |     kokkos_blas_swap_test_rank2_impl(A_e0e1, B_e0e1);
83 |   }
84 | }
85 | 


--------------------------------------------------------------------------------
/tests/kokkos-based/symmetric_matrix_rank1_update_kokkos.cpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | #include "gtest_fixtures.hpp"
19 | #include "helpers.hpp"
20 | 
21 | namespace{
22 | 
23 | using namespace kokkostesting;
24 | 
25 | template<class x_t, class A_t, class Triangle>
26 | void symmetric_matrix_rank_1_update_gold_solution(const x_t &x, A_t &A, Triangle /* t */)
27 | {
28 |   using size_type = std::experimental::extents<>::size_type;
29 |   constexpr bool low = std::is_same_v<Triangle, std::experimental::linalg::lower_triangle_t>;
30 |   for (size_type j = 0; j < A.extent(1); ++j) {
31 |     const size_type i1 = low ? A.extent(0) : j + 1;
32 |     for (size_type i = low ? j : 0; i < i1; ++i) {
33 |       A(i,j) += x(i) * x(j);
34 |     }
35 |   }
36 | }
37 | 
38 | template<class x_t, class A_t, class Triangle, class Scalar = typename x_t::element_type>
39 | void test_kokkos_symmetric_matrix_rank1_update_impl(const x_t &x, A_t &A, Triangle t)
40 | {
41 |   const auto get_gold = [&](auto A_gold) {
42 |       symmetric_matrix_rank_1_update_gold_solution(x, A_gold, t);
43 |     };
44 |   const auto compute = [&]() {
45 |       std::experimental::linalg::symmetric_matrix_rank_1_update(
46 |         KokkosKernelsSTD::kokkos_exec<>(), x, A, t);
47 |     };
48 |   const auto tol = tolerance<typename x_t::value_type>(1e-20, 1e-10f);
49 |   test_op_Ax(x, A, tol, get_gold, compute);
50 | }
51 | 
52 | } // anonymous namespace
53 | 
54 | #define DEFINE_TESTS(blas_val_type)                                          \
55 | TEST_F(blas2_signed_##blas_val_type##_fixture,                               \
56 |        kokkos_symmetric_matrix_rank1_update) {                               \
57 |   using val_t = typename blas2_signed_##blas_val_type##_fixture::value_type; \
58 |   run_checked_tests<val_t>("kokkos_", "symmetric_matrix_rank1_update", "",   \
59 |                            #blas_val_type, [&]() {                           \
60 |                                                                              \
61 |     test_kokkos_symmetric_matrix_rank1_update_impl(x_e0, A_sym_e0,           \
62 |                             std::experimental::linalg::lower_triangle);      \
63 |     test_kokkos_symmetric_matrix_rank1_update_impl(x_e0, A_sym_e0,           \
64 |                             std::experimental::linalg::upper_triangle);      \
65 |                                                                              \
66 |   });                                                                        \
67 | }
68 | 
69 | DEFINE_TESTS(double)
70 | DEFINE_TESTS(float)
71 | DEFINE_TESTS(complex_double)
72 | 


--------------------------------------------------------------------------------
/tests/kokkos-based/symmetric_matrix_rank2_update_kokkos.cpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | #include "gtest_fixtures.hpp"
19 | #include "helpers.hpp"
20 | 
21 | namespace{
22 | 
23 | using namespace kokkostesting;
24 | 
25 | template<class x_t, class y_t, class A_t, class Triangle>
26 | void symmetric_matrix_rank_2_update_gold_solution(const x_t &x, const y_t &y, A_t &A, Triangle /* t */)
27 | {
28 |   using size_type = std::experimental::extents<>::size_type;
29 |   constexpr bool low = std::is_same_v<Triangle, std::experimental::linalg::lower_triangle_t>;
30 |   for (size_type j = 0; j < A.extent(1); ++j) {
31 |     const size_type i1 = low ? A.extent(0) : j + 1;
32 |     for (size_type i = low ? j : 0; i < i1; ++i) {
33 |       A(i,j) += x(i) * y(j) + y(i) * x(j);
34 |     }
35 |   }
36 | }
37 | 
38 | template<class x_t, class y_t, class A_t, class Triangle, class Scalar = typename x_t::element_type>
39 | void test_kokkos_symmetric_matrix_rank2_update_impl(const x_t &x, const y_t &y, A_t &A, Triangle t)
40 | {
41 |   const auto get_gold = [&](auto A_gold) {
42 |       symmetric_matrix_rank_2_update_gold_solution(x, y, A_gold, t);
43 |     };
44 |   const auto compute = [&]() {
45 |       std::experimental::linalg::symmetric_matrix_rank_2_update(
46 |         KokkosKernelsSTD::kokkos_exec<>(), x, y, A, t);
47 |     };
48 |   const auto tol = tolerance<typename x_t::value_type>(1e-20, 1e-10f);
49 |   test_op_Axy(x, y, A, tol, get_gold, compute);
50 | }
51 | 
52 | } // anonymous namespace
53 | 
54 | #define DEFINE_TESTS(blas_val_type)                                          \
55 | TEST_F(blas2_signed_##blas_val_type##_fixture,                               \
56 |        kokkos_symmetric_matrix_rank2_update) {                               \
57 |   using val_t = typename blas2_signed_##blas_val_type##_fixture::value_type; \
58 |   run_checked_tests<val_t>("kokkos_", "symmetric_matrix_rank2_update", "",   \
59 |                            #blas_val_type, [&]() {                           \
60 |                                                                              \
61 |     test_kokkos_symmetric_matrix_rank2_update_impl(x_e0, y_e0, A_sym_e0,     \
62 |                             std::experimental::linalg::lower_triangle);      \
63 |     test_kokkos_symmetric_matrix_rank2_update_impl(x_e0, y_e0, A_sym_e0,     \
64 |                             std::experimental::linalg::upper_triangle);      \
65 |                                                                              \
66 |   });                                                                        \
67 | }
68 | 
69 | DEFINE_TESTS(double)
70 | DEFINE_TESTS(float)
71 | DEFINE_TESTS(complex_double)
72 | 


--------------------------------------------------------------------------------
/tests/kokkos-based/symmetric_matrix_rank_2k_update_kokkos.cpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | #include "gtest_fixtures.hpp"
19 | #include "helpers.hpp"
20 | 
21 | namespace{
22 | 
23 | using namespace kokkostesting;
24 | 
25 | template<class A_t, class B_t, class C_t, class Triangle>
26 | void symmetric_matrix_rank_2k_update_gold_solution(const A_t &A, const B_t &B, C_t &C, Triangle /* t */)
27 | {
28 |   using size_type = std::experimental::extents<>::size_type;
29 |   constexpr bool low = std::is_same_v<Triangle, std::experimental::linalg::lower_triangle_t>;
30 |   const auto size = A.extent(1);
31 |   for (size_type j = 0; j < size; ++j) {
32 |     const size_type i1 = low ? size : j + 1;
33 |     for (size_type i = low ? j : 0; i < i1; ++i) {
34 |       for (size_type k = 0; k < size; ++k) {
35 |         C(i, j) += A(i, k) * B(j, k) + B(i, k) * A(j, k);
36 |       }
37 |     }
38 |   }
39 | }
40 | 
41 | template<class A_t, class B_t, class C_t, class Triangle>
42 | void test_kokkos_symmetric_matrix_rank2k_update_impl(const A_t &A, const B_t &B, C_t &C, Triangle t)
43 | {
44 |   const auto get_gold = [&](auto C_gold) {
45 |       symmetric_matrix_rank_2k_update_gold_solution(A, B, C_gold, t);
46 |     };
47 |   const auto compute = [&]() {
48 |       std::experimental::linalg::symmetric_matrix_rank_2k_update(
49 |         KokkosKernelsSTD::kokkos_exec<>(), A, B, C, t);
50 |     };
51 |   const auto tol = tolerance<typename C_t::value_type>(1e-20, 1e-10f);
52 |   test_op_CAB(A, B, C, tol, get_gold, compute);
53 | }
54 | 
55 | } // anonymous namespace
56 | 
57 | #define DEFINE_TESTS(blas_val_type)                                          \
58 | TEST_F(blas2_signed_##blas_val_type##_fixture,                               \
59 |        kokkos_symmetric_matrix_rank2k_update) {                              \
60 |   using val_t = typename blas2_signed_##blas_val_type##_fixture::value_type; \
61 |   run_checked_tests<val_t>("kokkos_", "symmetric_matrix_rank2k_update", "",  \
62 |                            #blas_val_type, [&]() {                           \
63 |                                                                              \
64 |     test_kokkos_symmetric_matrix_rank2k_update_impl(A_sym_e0, A_sym_e0, A_hem_e0,    \
65 |                             std::experimental::linalg::lower_triangle);      \
66 |     test_kokkos_symmetric_matrix_rank2k_update_impl(A_sym_e0, A_sym_e0, A_hem_e0,    \
67 |                             std::experimental::linalg::upper_triangle);      \
68 |                                                                              \
69 |   });                                                                        \
70 | }
71 | 
72 | DEFINE_TESTS(double)
73 | DEFINE_TESTS(float)
74 | DEFINE_TESTS(complex_double)
75 | 


--------------------------------------------------------------------------------
/tests/kokkos-based/symmetric_matrix_rank_k_update_kokkos.cpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | #include "gtest_fixtures.hpp"
19 | #include "helpers.hpp"
20 | 
21 | namespace{
22 | 
23 | using namespace kokkostesting;
24 | 
25 | template<class AlphaType, class A_t, class C_t, class Triangle>
26 | void symmetric_matrix_rank_k_update_gold_solution(AlphaType alpha,
27 |     const A_t &A, C_t &C, Triangle /* t */)
28 | {
29 |   using size_type = std::experimental::extents<>::size_type;
30 |   constexpr bool low = std::is_same_v<Triangle, std::experimental::linalg::lower_triangle_t>;
31 |   const auto C_ext = C.extent(0);
32 |   const auto A_ext1 = A.extent(1);
33 |   for (size_type j = 0; j < C_ext; ++j) {
34 |     const size_type i1 = low ? C_ext : j + 1;
35 |     for (size_type i = low ? j : 0; i < i1; ++i) {
36 |       for (size_type k = 0; k < A_ext1; ++k) {
37 |         C(i, j) += alpha * A(i, k) * A(j, k);
38 |       }
39 |     }
40 |   }
41 | }
42 | 
43 | template<class A_t, class C_t, class Triangle>
44 | void test_kokkos_symmetric_matrix_rank_k_update_impl(const A_t A, C_t C, Triangle t)
45 | {
46 |   const auto alpha = static_cast<A_t::element_type>(2.0); // check if alpha is applied
47 |   const auto get_gold = [&](auto C_gold) {
48 |       symmetric_matrix_rank_k_update_gold_solution(alpha, A, C_gold, t);
49 |     };
50 |   const auto compute = [&]() {
51 |       std::experimental::linalg::symmetric_matrix_rank_k_update(
52 |         KokkosKernelsSTD::kokkos_exec<>(), alpha, A, C, t);
53 |     };
54 |   const auto tol = tolerance<typename C_t::value_type>(1e-20, 1e-10f);
55 |   test_op_CA(A, C, tol, get_gold, compute);
56 | }
57 | 
58 | } // anonymous namespace
59 | 
60 | #define DEFINE_TESTS(blas_val_type)                                          \
61 | TEST_F(blas2_signed_##blas_val_type##_fixture,                               \
62 |        kokkos_symmetric_matrix_rank_k_update) {                              \
63 |   using val_t = typename blas2_signed_##blas_val_type##_fixture::value_type; \
64 |   run_checked_tests<val_t>("kokkos_", "symmetric_matrix_rank_k_update", "",  \
65 |                            #blas_val_type, [&]() {                           \
66 |                                                                              \
67 |     test_kokkos_symmetric_matrix_rank_k_update_impl(A_sym_e0, A_hem_e0,      \
68 |                             std::experimental::linalg::lower_triangle);      \
69 |     test_kokkos_symmetric_matrix_rank_k_update_impl(A_sym_e0, A_hem_e0,      \
70 |                             std::experimental::linalg::upper_triangle);      \
71 |                                                                              \
72 |   });                                                                        \
73 | }
74 | 
75 | DEFINE_TESTS(double)
76 | DEFINE_TESTS(float)
77 | DEFINE_TESTS(complex_double)
78 | 


--------------------------------------------------------------------------------
/tests/kokkos-based/test.cmake:
--------------------------------------------------------------------------------
 1 | include(FindUnixCommands)
 2 | 
 3 | # run test executable
 4 | execute_process(
 5 |   COMMAND ${EXE_NAME}
 6 |   RESULT_VARIABLE RES_A
 7 |   OUTPUT_FILE ${LOG_FILE})
 8 | 
 9 | # first check that numerically the test passes
10 | if(RES_A)
11 |   message(FATAL_ERROR "numerical test failed")
12 | else()
13 |   message("numerical test succeeded")
14 | endif()
15 | 
16 | # just checking the numerics is not enough 
17 | # because we need to ensure that this result 
18 | # comes from the kokkos impl 
19 | # so we check that the proper string is found
20 | # which signals that the correct Kokkos impl was found/called
21 | set(CMD "grep -R '${TEST_STRING_FIND}' ${LOG_FILE} > /dev/null")
22 | execute_process(COMMAND ${BASH} -c ${CMD} RESULT_VARIABLE RES_B)
23 | if(RES_B)
24 |   message(FATAL_ERROR
25 |     "test failed: ${ALGO_NAME} did not call the correct Kokkos impl")
26 | else()
27 |   message("${ALGO_NAME} called the correct Kokkos impl")
28 | endif()
29 | 


--------------------------------------------------------------------------------
/tests/kokkos-based/triangular_matrix_vector_solve.cpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | #include "gtest_fixtures.hpp"
19 | #include "helpers.hpp"
20 | 
21 | namespace{
22 | 
23 | using namespace kokkostesting;
24 | 
25 | template<class A_t,
26 |          class x_t,
27 |          class Triangle,
28 |          class DiagonalStorage>
29 | void triangular_matrix_vector_solve_gold_solution(A_t A, Triangle t, DiagonalStorage d, x_t x)
30 | {
31 |   using size_type = typename std::experimental::extents<>::size_type;
32 |   constexpr bool lower_triangle = std::is_same_v<
33 |       Triangle, std::experimental::linalg::lower_triangle_t>;
34 |   constexpr bool explicit_diagonal = std::is_same_v<
35 |       DiagonalStorage, std::experimental::linalg::explicit_diagonal_t>;
36 | 
37 |   const size_type ext0 = A.extent(0);
38 |   const size_type ext1 = A.extent(1);
39 | 
40 |   for (size_type ii = 0; ii < ext0; ++ii) {
41 |     const size_type i = lower_triangle ? ii : ext0 - 1 - ii;
42 |     const size_type j0 = lower_triangle ? 0 : i + 1;
43 |     const size_type j1 = lower_triangle ? i : ext1;
44 |     for (size_type j = j0; j < j1; ++j) {
45 |       x(i) -= A(i, j) * x(j);
46 |     }
47 |     if constexpr (explicit_diagonal) {
48 |       x(i) /= A(i, i);
49 |     }
50 |   }
51 | }
52 | 
53 | template<class A_t,
54 |          class b_t,
55 |          class x_t,
56 |          class Triangle,
57 |          class DiagonalStorage>
58 | void test_triangular_matrix_vector_solve_impl(A_t A, b_t b, x_t x0, Triangle t, DiagonalStorage d)
59 | {
60 |   // copy x to leave original fixture intact
61 |   auto x_data = create_stdvector_and_copy(x0);
62 |   auto x = make_mdspan(x_data);
63 | 
64 |   const auto get_gold = [&](auto x_gold) {
65 |     std::experimental::linalg::copy(b, x_gold);
66 |     triangular_matrix_vector_solve_gold_solution(A, t, d, x_gold);
67 |   };
68 |   const auto compute = [&]() {
69 |       std::experimental::linalg::triangular_matrix_vector_solve(
70 |         KokkosKernelsSTD::kokkos_exec<>(), A, t, d, b, x);
71 |     };
72 |   const auto tol = tolerance<typename x_t::value_type>(1e-12, 1e-4f);
73 |   test_op_xAb(A, b, x, tol, get_gold, compute);
74 | }
75 | 
76 | } // anonymous namespace
77 | 
78 | #define DEFINE_TESTS(blas_val_type)                                          \
79 | TEST_F(blas2_signed_##blas_val_type##_fixture,                               \
80 |        kokkos_triangular_matrix_vector_solve) {                              \
81 |   using val_t = typename blas2_signed_##blas_val_type##_fixture::value_type; \
82 |   run_checked_tests<val_t>("kokkos_", "triangular_matrix_vector_solve", "",  \
83 |                            #blas_val_type, [&]() {                           \
84 |                                                                              \
85 |     test_triangular_matrix_vector_solve_impl(A_sym_e0, x_e0, x_e0,           \
86 |                          std::experimental::linalg::lower_triangle,          \
87 |                          std::experimental::linalg::implicit_unit_diagonal); \
88 |     test_triangular_matrix_vector_solve_impl(A_sym_e0, x_e0, x_e0,           \
89 |                          std::experimental::linalg::upper_triangle,          \
90 |                          std::experimental::linalg::explicit_diagonal);      \
91 |                                                                              \
92 |   });                                                                        \
93 | }
94 | 
95 | DEFINE_TESTS(double)
96 | DEFINE_TESTS(float)
97 | DEFINE_TESTS(complex_double)
98 | 


--------------------------------------------------------------------------------
/tests/kokkos-based/vector_abs_sum_kokkos.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "gtest_fixtures.hpp"
  3 | #include "helpers.hpp"
  4 | 
  5 | namespace{
  6 | 
  7 | template<class x_t, class T>
  8 | auto vector_abs_sum_gold_solution(x_t x,
  9 | 				  T initValue,
 10 | 				  bool useInit)
 11 | {
 12 |   using std::abs;
 13 | 
 14 |   T result = {};
 15 |   for (std::size_t i=0; i<x.extent(0); ++i){
 16 |     result += abs(x(i));
 17 |   }
 18 | 
 19 |   if (useInit) result += initValue;
 20 |   return result;
 21 | }
 22 | 
 23 | template<class x_t, class T>
 24 | void kokkos_blas1_vector_abs_sum_test_impl(x_t x,
 25 | 					   T initValue,
 26 | 					   bool useInit)
 27 | {
 28 | 
 29 |   namespace stdla = std::experimental::linalg;
 30 | 
 31 |   using value_type = typename x_t::value_type;
 32 |   const std::size_t extent = x.extent(0);
 33 | 
 34 |   // copy x to verify it is not changed after kernel
 35 |   auto x_preKernel = kokkostesting::create_stdvector_and_copy(x);
 36 | 
 37 |   // compute gold
 38 |   const T gold = vector_abs_sum_gold_solution(x, initValue, useInit);
 39 | 
 40 |   T result = {};
 41 |   if (useInit){
 42 |     result = stdla::vector_abs_sum(KokkosKernelsSTD::kokkos_exec<>(),
 43 | 				   x, initValue);
 44 |   }else{
 45 |     result = stdla::vector_abs_sum(KokkosKernelsSTD::kokkos_exec<>(),
 46 | 				   x);
 47 |   }
 48 | 
 49 |   if constexpr(std::is_same_v<value_type, float>){
 50 |     // cannot use EXPECT_FLOAT_EQ because
 51 |     // in some cases that fails on third digit or similr
 52 |     EXPECT_NEAR(result, gold, 1e-2);
 53 |   }
 54 | 
 55 |   if constexpr(std::is_same_v<value_type, double>){
 56 |     // similarly to float
 57 |     EXPECT_NEAR(result, gold, 1e-9);
 58 |   }
 59 | 
 60 |   if constexpr(std::is_same_v<value_type, std::complex<double>>){
 61 |     EXPECT_NEAR(result, gold, 1e-9);
 62 |   }
 63 | 
 64 |   // x,y should not change after kernel
 65 |   for (std::size_t i=0; i<extent; ++i){
 66 |     EXPECT_TRUE(x(i) == x_preKernel[i]);
 67 |   }
 68 | 
 69 | }
 70 | }//end anonym namespace
 71 | 
 72 | 
 73 | TEST_F(blas1_signed_float_fixture, kokkos_vector_abs_sum_noinitvalue)
 74 | {
 75 |   kokkos_blas1_vector_abs_sum_test_impl(x, static_cast<float>(0), false);
 76 | }
 77 | 
 78 | TEST_F(blas1_signed_float_fixture, kokkos_vector_abs_sum_initvalue)
 79 | {
 80 |   kokkos_blas1_vector_abs_sum_test_impl(x, static_cast<float>(3), true);
 81 | }
 82 | 
 83 | TEST_F(blas1_signed_double_fixture, kokkos_vector_abs_sum_noinitvalue)
 84 | {
 85 |   kokkos_blas1_vector_abs_sum_test_impl(x, static_cast<double>(0), false);
 86 | }
 87 | 
 88 | TEST_F(blas1_signed_double_fixture, kokkos_vector_abs_sum_initvalue)
 89 | {
 90 |   kokkos_blas1_vector_abs_sum_test_impl(x, static_cast<double>(5), true);
 91 | }
 92 | 
 93 | TEST_F(blas1_signed_complex_double_fixture, kokkos_vector_abs_sum_noinitvalue)
 94 | {
 95 |   using kc_t   = Kokkos::complex<double>;
 96 |   using stdc_t = value_type;
 97 |   if constexpr(alignof(value_type) == alignof(kc_t)){
 98 |     // for complex values, abs returns magnitude
 99 |     const double init = 0.;
100 |     kokkos_blas1_vector_abs_sum_test_impl(x, init, false);
101 |   }
102 | }
103 | 
104 | TEST_F(blas1_signed_complex_double_fixture, kokkos_vector_abs_sum_initvalue)
105 | {
106 |   using kc_t   = Kokkos::complex<double>;
107 |   using stdc_t = value_type;
108 |   if constexpr(alignof(value_type) == alignof(kc_t)){
109 |     // for complex values, abs returns magnitude
110 |     const double init = -2.;
111 |     kokkos_blas1_vector_abs_sum_test_impl(x, init, true);
112 |   }
113 | }
114 | 


--------------------------------------------------------------------------------
/tests/kokkos-based/vector_norm2_kokkos.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "gtest_fixtures.hpp"
  3 | #include "helpers.hpp"
  4 | #include <cmath>
  5 | 
  6 | namespace
  7 | {
  8 | 
  9 | template<class x_t, class T>
 10 | T vector_two_norm_gold_solution(x_t x, T initValue, bool useInit)
 11 | {
 12 |   using std::abs;
 13 |   using value_type = typename x_t::value_type;
 14 | 
 15 |   T result = {};
 16 |   for (std::size_t i=0; i<x.extent(0); ++i){
 17 |     if constexpr(std::is_same_v<value_type, std::complex<double>>){
 18 |       result += std::norm(x(i));
 19 |     }
 20 |     else{
 21 |       result += x(i) * x(i);
 22 |     }
 23 |   }
 24 | 
 25 |   using std::sqrt;
 26 |   if (useInit){
 27 |     return sqrt(initValue + result);
 28 |   }
 29 |   else{
 30 |     return sqrt(result);
 31 |   }
 32 | }
 33 | 
 34 | template<class x_t, class T>
 35 | void kokkos_blas1_vector_two_norm_test_impl(x_t x, T initValue, bool useInit)
 36 | {
 37 |   namespace stdla = std::experimental::linalg;
 38 | 
 39 |   using value_type = typename x_t::value_type;
 40 |   const std::size_t extent = x.extent(0);
 41 | 
 42 |   // copy x to verify they are not changed after kernel
 43 |   auto x_preKernel = kokkostesting::create_stdvector_and_copy(x);
 44 | 
 45 |   const T gold = vector_two_norm_gold_solution(x, initValue, useInit);
 46 | 
 47 |   T result = {};
 48 |   if (useInit){
 49 |     result = stdla::vector_two_norm(KokkosKernelsSTD::kokkos_exec<>(),
 50 | 				 x, initValue);
 51 |   }else{
 52 |     result = stdla::vector_two_norm(KokkosKernelsSTD::kokkos_exec<>(),
 53 | 				 x);
 54 |   }
 55 | 
 56 |   if constexpr(std::is_same_v<value_type, float>){
 57 |     EXPECT_NEAR(result, gold, 1e-2);
 58 |   }
 59 | 
 60 |   if constexpr(std::is_same_v<value_type, double>){
 61 |     EXPECT_NEAR(result, gold, 1e-9);
 62 |   }
 63 | 
 64 |   if constexpr(std::is_same_v<value_type, std::complex<double>>){
 65 |     EXPECT_NEAR(result, gold, 1e-9);
 66 |   }
 67 | 
 68 |   // x should not change after kernel
 69 |   for (std::size_t i=0; i<extent; ++i){
 70 |     EXPECT_TRUE(x(i) == x_preKernel[i]);
 71 |   }
 72 | 
 73 | }
 74 | }//end anonym namespace
 75 | 
 76 | TEST_F(blas1_signed_float_fixture, kokkos_vector_two_norm_noinitvalue)
 77 | {
 78 |   kokkos_blas1_vector_two_norm_test_impl(x, static_cast<float>(0), false);
 79 | }
 80 | 
 81 | TEST_F(blas1_signed_float_fixture, kokkos_vector_two_norm_initvalue)
 82 | {
 83 |   kokkos_blas1_vector_two_norm_test_impl(x, static_cast<float>(3), true);
 84 | }
 85 | 
 86 | TEST_F(blas1_signed_double_fixture, kokkos_vector_two_norm_noinitvalue)
 87 | {
 88 |   kokkos_blas1_vector_two_norm_test_impl(x, static_cast<double>(0), false);
 89 | }
 90 | 
 91 | TEST_F(blas1_signed_double_fixture, kokkos_vector_two_norm_initvalue)
 92 | {
 93 |   kokkos_blas1_vector_two_norm_test_impl(x, static_cast<double>(5), true);
 94 | }
 95 | 
 96 | TEST_F(blas1_signed_complex_double_fixture, kokkos_vector_two_norm_noinitvalue)
 97 | {
 98 |   namespace stdla = std::experimental::linalg;
 99 |   using kc_t = Kokkos::complex<double>;
100 |   using stdc_t = value_type;
101 |   if constexpr (alignof(value_type) == alignof(kc_t)){
102 |     kokkos_blas1_vector_two_norm_test_impl(x, static_cast<double>(0), false);
103 |   }
104 | }
105 | 
106 | TEST_F(blas1_signed_complex_double_fixture, kokkos_vector_two_norm_initvalue)
107 | {
108 |   namespace stdla = std::experimental::linalg;
109 |   using kc_t = Kokkos::complex<double>;
110 |   using stdc_t = value_type;
111 |   if constexpr (alignof(value_type) == alignof(kc_t)){
112 |     kokkos_blas1_vector_two_norm_test_impl(x, static_cast<double>(5), true);
113 |   }
114 | }
115 | 


--------------------------------------------------------------------------------
/tests/kokkos-based/vector_sum_of_squares_kokkos.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "gtest_fixtures.hpp"
  3 | #include "helpers.hpp"
  4 | #include <cmath>
  5 | 
  6 | namespace
  7 | {
  8 | 
  9 | template<class x_t, class T>
 10 | std::experimental::linalg::sum_of_squares_result<T>
 11 | vector_sum_of_squares_gold_solution(x_t x,
 12 | 				    std::experimental::linalg::sum_of_squares_result<T> init)
 13 | {
 14 |   using std::abs;
 15 | 
 16 |   T scale = init.scaling_factor;
 17 |   for (std::size_t i = 0; i < x.extent(0); ++i) {
 18 |     scale = std::max(scale, abs(x(i)));
 19 |   }
 20 | 
 21 |   T ssq = (init.scaling_factor*init.scaling_factor*init.scaled_sum_of_squares)/(scale*scale);
 22 |   T s=0.;
 23 |   for (std::size_t i = 0; i < x.extent(0); ++i) {
 24 |     const auto absxi = abs(x(i));
 25 |     const auto quotient = absxi/scale;
 26 |     ssq = ssq + quotient * quotient;
 27 |     s += absxi*absxi;
 28 |   }
 29 | 
 30 |   std::experimental::linalg::sum_of_squares_result<T> result;
 31 |   result.scaled_sum_of_squares = ssq;
 32 |   result.scaling_factor = scale;
 33 | 
 34 |   // verify that things are consistent according to definition
 35 |   // scaled_sum_of_squares: is a value such that
 36 |   // scaling_factor^2 * scaled_sum_of_squares equals the
 37 |   // sum of squares of abs(x[i]) plus init.scaling_factor^2 * init.scaled_sum_of_squares.
 38 |   //
 39 |   const auto lhs = scale*scale*ssq;
 40 |   const auto rhs = s+init.scaling_factor*init.scaling_factor*init.scaled_sum_of_squares;
 41 |   std::cout << "Gold check : " << lhs << " " << rhs << std::endl;
 42 |   if constexpr(std::is_same_v<T, float>){
 43 |     EXPECT_NEAR(lhs, rhs, 1e-2);
 44 |   }
 45 |   if constexpr(std::is_same_v<T, double>){
 46 |     EXPECT_NEAR(lhs, rhs, 1e-9);
 47 |   }
 48 | 
 49 |   return result;
 50 | }
 51 | 
 52 | template<class x_t, class T>
 53 | void kokkos_blas1_vector_sum_of_squares_test_impl(x_t x,
 54 | 						  std::experimental::linalg::sum_of_squares_result<T> initValue)
 55 | {
 56 |   namespace stdla = std::experimental::linalg;
 57 | 
 58 |   using value_type = typename x_t::value_type;
 59 |   const std::size_t extent = x.extent(0);
 60 | 
 61 |   // copy x to verify they are not changed after kernel
 62 |   auto x_preKernel = kokkostesting::create_stdvector_and_copy(x);
 63 | 
 64 |   const auto gold = vector_sum_of_squares_gold_solution(x, initValue);
 65 |   auto result = stdla::vector_sum_of_squares(KokkosKernelsSTD::kokkos_exec<>(),
 66 | 					  x, initValue);
 67 | 
 68 |   if constexpr(std::is_same_v<T, float>)
 69 |   {
 70 |     EXPECT_NEAR(result.scaled_sum_of_squares, gold.scaled_sum_of_squares, 1e-3);
 71 |     EXPECT_NEAR(result.scaling_factor,	      gold.scaling_factor,	  1e-3);
 72 |   }
 73 | 
 74 |   if constexpr(std::is_same_v<T, double>)
 75 |   {
 76 |     EXPECT_NEAR(result.scaled_sum_of_squares, gold.scaled_sum_of_squares, 1e-9);
 77 |     EXPECT_NEAR(result.scaling_factor,	      gold.scaling_factor,	  1e-9);
 78 |   }
 79 | 
 80 |   // x should not change after kernel
 81 |   for (std::size_t i=0; i<extent; ++i){
 82 |     EXPECT_TRUE(x(i) == x_preKernel[i]);
 83 |   }
 84 | 
 85 | }
 86 | }//end anonym namespace
 87 | 
 88 | TEST_F(blas1_signed_float_fixture, kokkos_vector_sum_of_squares)
 89 | {
 90 |   namespace stdla = std::experimental::linalg;
 91 |   stdla::sum_of_squares_result<value_type> init_value{2.5f, 1.2f};
 92 |   kokkos_blas1_vector_sum_of_squares_test_impl(x, init_value);
 93 | }
 94 | 
 95 | TEST_F(blas1_signed_double_fixture, kokkos_vector_sum_of_squares)
 96 | {
 97 |   namespace stdla = std::experimental::linalg;
 98 |   stdla::sum_of_squares_result<value_type> init_value{3.0, 1.2};
 99 |   kokkos_blas1_vector_sum_of_squares_test_impl(x, init_value);
100 | }
101 | 
102 | TEST_F(blas1_signed_complex_double_fixture, kokkos_vector_sum_of_squares)
103 | {
104 |   namespace stdla = std::experimental::linalg;
105 |   using kc_t = Kokkos::complex<double>;
106 |   using stdc_t = value_type;
107 |   if constexpr (alignof(value_type) == alignof(kc_t)){
108 |     stdla::sum_of_squares_result<double> init_value{2.5, 1.2};
109 |     kokkos_blas1_vector_sum_of_squares_test_impl(x, init_value);
110 |   }
111 | }
112 | 


--------------------------------------------------------------------------------
/tests/native/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | macro(linalg_add_test name)
 3 |   add_executable(${name} ${name}.cpp)
 4 |   if(BLAS_FOUND)
 5 |     target_link_libraries(${name} linalg GTest::GTest GTest::Main ${BLAS_LIBRARIES})
 6 |   else()
 7 |     # BLAS_LIBRARIES is literally "FALSE" if the BLAS was not found.
 8 |     # Linking against that causes linker errors involving "FALSE.lib".
 9 |     # Thus, we exclude BLAS_LIBRARIES completely if the BLAS was not found.
10 |     target_link_libraries(${name} linalg GTest::GTest GTest::Main)
11 |   endif()
12 |   add_test(${name} ${name})
13 | endmacro()
14 | 
15 | linalg_add_test(abs_if_needed)
16 | linalg_add_test(abs_sum)
17 | linalg_add_test(add)
18 | linalg_add_test(conj_if_needed)
19 | linalg_add_test(conjugate_transposed)
20 | linalg_add_test(conjugated)
21 | linalg_add_test(copy)
22 | linalg_add_test(dot)
23 | linalg_add_test(gemm)
24 | linalg_add_test(gemv)
25 | linalg_add_test(gemv_no_ambig)
26 | linalg_add_test(ger)
27 | linalg_add_test(gerc)
28 | linalg_add_test(givens)
29 | linalg_add_test(hemm)
30 | linalg_add_test(her)
31 | linalg_add_test(her2)
32 | linalg_add_test(herk)
33 | linalg_add_test(her2k)
34 | linalg_add_test(idx_abs_max)
35 | linalg_add_test(imag_if_needed)
36 | linalg_add_test(matrix_inf_norm)
37 | linalg_add_test(matrix_one_norm)
38 | linalg_add_test(mixed_accessors)
39 | linalg_add_test(norm2)
40 | linalg_add_test(proxy_refs)
41 | linalg_add_test(real_if_needed)
42 | linalg_add_test(scale)
43 | linalg_add_test(scaled)
44 | linalg_add_test(swap)
45 | linalg_add_test(symm)
46 | linalg_add_test(syr)
47 | linalg_add_test(syr2)
48 | linalg_add_test(syrk)
49 | linalg_add_test(syr2k)
50 | linalg_add_test(transposed)
51 | linalg_add_test(trmm)
52 | linalg_add_test(trmv)
53 | linalg_add_test(trsm)
54 | 


--------------------------------------------------------------------------------
/tests/native/add.cpp:
--------------------------------------------------------------------------------
  1 | #include "./gtest_fixtures.hpp"
  2 | 
  3 | namespace {
  4 |   using LinearAlgebra::add;
  5 | 
  6 |   TEST(BLAS1_add, vector_double)
  7 |   {
  8 |     using scalar_t = double;
  9 |     using vector_t = mdspan<scalar_t, extents<std::size_t, dynamic_extent>>;
 10 | 
 11 |     constexpr std::size_t vectorSize(5);
 12 |     constexpr std::size_t storageSize = std::size_t(3) * vectorSize;
 13 |     std::vector<scalar_t> storage(storageSize);
 14 | 
 15 |     vector_t x(storage.data(), vectorSize);
 16 |     vector_t y(storage.data() + vectorSize, vectorSize);
 17 |     vector_t z(storage.data() + 2*vectorSize, vectorSize);
 18 | 
 19 |     for (std::size_t k = 0; k < vectorSize; ++k) {
 20 |       const scalar_t x_k = scalar_t (k) + 1.0;
 21 |       const scalar_t y_k = scalar_t (k) + 2.0;
 22 |       x(k) = x_k;
 23 |       y(k) = y_k;
 24 |       z(k) = 0.0;
 25 |     }
 26 | 
 27 |     add(x, y, z);
 28 |     for (std::size_t k = 0; k < vectorSize; ++k) {
 29 |       const scalar_t x_k = scalar_t (k) + 1.0;
 30 |       const scalar_t y_k = scalar_t (k) + 2.0;
 31 |       // Make sure the function didn't modify the input.
 32 |       EXPECT_EQ( x(k), x_k );
 33 |       EXPECT_EQ( y(k), y_k );
 34 |       EXPECT_EQ( z(k), x_k + y_k ); // check the output
 35 |     }
 36 |   }
 37 | 
 38 |   TEST(BLAS1_add, vector_complex_double)
 39 |   {
 40 |     using real_t = double;
 41 |     using scalar_t = std::complex<real_t>;
 42 |     using vector_t = mdspan<scalar_t, extents<std::size_t, dynamic_extent>>;
 43 | 
 44 |     constexpr std::size_t vectorSize(5);
 45 |     constexpr std::size_t storageSize = std::size_t(3) * vectorSize;
 46 |     std::vector<scalar_t> storage(storageSize);
 47 | 
 48 |     vector_t x(storage.data(), vectorSize);
 49 |     vector_t y(storage.data() + vectorSize, vectorSize);
 50 |     vector_t z(storage.data() + 2*vectorSize, vectorSize);
 51 | 
 52 |     for (std::size_t k = 0; k < vectorSize; ++k) {
 53 |       const scalar_t x_k(real_t(k) + 4.0, -real_t(k) - 1.0);
 54 |       const scalar_t y_k(real_t(k) + 5.0, -real_t(k) - 2.0);
 55 |       x(k) = x_k;
 56 |       y(k) = y_k;
 57 |       z(k) = scalar_t(0.0, 0.0);
 58 |     }
 59 | 
 60 |     add(x, y, z);
 61 |     for (std::size_t k = 0; k < vectorSize; ++k) {
 62 |       const scalar_t x_k(real_t(k) + 4.0, -real_t(k) - 1.0);
 63 |       const scalar_t y_k(real_t(k) + 5.0, -real_t(k) - 2.0);
 64 |       // Make sure the function didn't modify the input.
 65 |       EXPECT_EQ( x(k), x_k );
 66 |       EXPECT_EQ( y(k), y_k );
 67 |       EXPECT_EQ( z(k), x_k + y_k ); // check the output
 68 |     }
 69 |   }
 70 | 
 71 |   TEST(BLAS1_add, matrix_double)
 72 |   {
 73 |     using scalar_t = double;
 74 |     constexpr std::size_t numRows(5);
 75 |     constexpr std::size_t numCols(6);
 76 |     constexpr std::size_t matrixSize = numRows * numCols;
 77 |     std::array<scalar_t, matrixSize> A_storage;
 78 |     std::array<scalar_t, matrixSize> B_storage;
 79 |     std::array<scalar_t, matrixSize> C_storage;
 80 | 
 81 |     using matrix_t = mdspan<scalar_t, extents<std::size_t, numRows, numCols>>;
 82 |     matrix_t A(A_storage.data());
 83 |     matrix_t B(B_storage.data());
 84 |     matrix_t C(C_storage.data());
 85 | 
 86 |     for (std::size_t c = 0; c < numCols; ++c) {
 87 |       for (std::size_t r = 0; r < numRows; ++r) {
 88 | 	const scalar_t A_rc = scalar_t(c) + scalar_t(numCols) * scalar_t(r);
 89 | 	const scalar_t B_rc = scalar_t(2.0) * A_rc;
 90 | 	A(r,c) = A_rc;
 91 | 	B(r,c) = B_rc;
 92 | 	C(r,c) = scalar_t{};
 93 |       }
 94 |     }
 95 |     add(A, B, C);
 96 |     for (std::size_t c = 0; c < numCols; ++c) {
 97 |       for (std::size_t r = 0; r < numRows; ++r) {
 98 | 	const scalar_t A_rc = scalar_t(c) + scalar_t(numCols) * scalar_t(r);
 99 | 	const scalar_t B_rc = scalar_t(2.0) * A_rc;
100 | 	// Make sure the function didn't modify the input.
101 | 	EXPECT_EQ( A(r,c), A_rc );
102 | 	EXPECT_EQ( B(r,c), B_rc );
103 | 	EXPECT_EQ( C(r,c), A_rc + B_rc ); // check the output
104 |       }
105 |     }
106 |   }
107 | }
108 | 


--------------------------------------------------------------------------------
/tests/native/conj_if_needed.cpp:
--------------------------------------------------------------------------------
 1 | #include "./my_numbers.hpp"
 2 | 
 3 | namespace {
 4 |   template<class Real>
 5 |   void test_real_conj_if_needed()
 6 |   {
 7 |     using LinearAlgebra::impl::conj_if_needed;
 8 | 
 9 |     Real z(2.0);
10 |     const Real z_conj_expected(2.0);
11 | 
12 |     auto z_conj = conj_if_needed(z);
13 |     static_assert(std::is_same_v<decltype(z_conj), Real>);
14 |     EXPECT_EQ(z_conj, z_conj_expected);
15 |   }
16 | 
17 |   template<class Complex>
18 |   void test_any_complex_conj_if_needed()
19 |   {
20 |     using LinearAlgebra::impl::conj_if_needed;
21 | 
22 |     Complex z(2.0, -3.0);
23 |     Complex z_orig(2.0, -3.0);
24 |     const Complex z_conj_expected(2.0, 3.0);
25 | 
26 |     auto z_conj = conj_if_needed(z);
27 |     static_assert(std::is_same_v<decltype(z_conj), Complex>);
28 |     EXPECT_EQ(z_conj, z_conj_expected);
29 |     EXPECT_EQ(z, z_orig); // conj didn't change its input
30 |   }
31 | 
32 |   template<class Real>
33 |   void test_std_complex_conj_if_needed()
34 |   {
35 |     test_any_complex_conj_if_needed<std::complex<Real>>();
36 |   }
37 | 
38 |   void test_MyComplex_conj_if_needed()
39 |   {
40 |     test_any_complex_conj_if_needed<TestLinearAlgebra::MyComplex>();
41 |   }
42 | 
43 |   TEST(test_numbers, conj_if_needed)
44 |   {
45 |     test_std_complex_conj_if_needed<float>();
46 |     test_std_complex_conj_if_needed<double>();
47 |     test_std_complex_conj_if_needed<long double>();
48 | 
49 |     test_MyComplex_conj_if_needed();
50 | 
51 |     test_real_conj_if_needed<float>();
52 |     test_real_conj_if_needed<double>();
53 |     test_real_conj_if_needed<long double>();
54 | 
55 |     test_real_conj_if_needed<int32_t>();
56 |     test_real_conj_if_needed<uint32_t>();
57 |     test_real_conj_if_needed<int64_t>();
58 |     test_real_conj_if_needed<uint64_t>();
59 |   }
60 | } // end anonymous namespace
61 | 


--------------------------------------------------------------------------------
/tests/native/conjugate_transposed.cpp:
--------------------------------------------------------------------------------
 1 | #include "./gtest_fixtures.hpp"
 2 | 
 3 | namespace {
 4 |   using LinearAlgebra::conjugate_transposed;
 5 | 
 6 |   TEST(conjugate_transposed, mdspan_complex_double)
 7 |   {
 8 |     using std::conj;
 9 |     using real_t = double;
10 |     using scalar_t = std::complex<real_t>;
11 |     using matrix_dynamic_t =
12 |       mdspan<scalar_t, extents<std::size_t, dynamic_extent, dynamic_extent>>;
13 |     constexpr std::size_t dim = 5;
14 |     using matrix_static_t =
15 |       mdspan<scalar_t, extents<std::size_t, dim, dim>>;
16 | 
17 |     constexpr std::size_t storageSize = std::size_t(dim*dim);
18 |     std::vector<scalar_t> A_storage (storageSize);
19 |     std::vector<scalar_t> B_storage (storageSize);
20 | 
21 |     matrix_dynamic_t A (A_storage.data (), dim, dim);
22 |     matrix_static_t B (B_storage.data ());
23 | 
24 |     for (std::size_t i = 0; i < dim; ++i) {
25 |       for (std::size_t j = 0; j < dim; ++j) {
26 |         const real_t i_val_re (real_t(i) + 1.0);
27 |         const scalar_t i_val (i_val_re, i_val_re);
28 |         const real_t j_val_re = real_t(j) + 1.0;
29 |         const scalar_t j_val (j_val_re, j_val_re);
30 |         const scalar_t val = i_val + real_t(dim) * j_val;
31 | 
32 |         A(i,j) = val;
33 |         B(i,j) = -val;
34 |       }
35 |     }
36 | 
37 |     auto A_h = conjugate_transposed (A);
38 |     auto B_h = conjugate_transposed (B);
39 | 
40 |     for (std::size_t i = 0; i < dim; ++i) {
41 |       for (std::size_t j = 0; j < dim; ++j) {
42 |         const real_t i_val_re (real_t(i) + 1.0);
43 |         const scalar_t i_val (i_val_re, i_val_re);
44 |         const real_t j_val_re = real_t(j) + 1.0;
45 |         const scalar_t j_val (j_val_re, j_val_re);
46 |         const scalar_t val = i_val + real_t(dim) * j_val;
47 | 
48 |         EXPECT_EQ( A(i,j), val );
49 |         EXPECT_EQ( B(i,j), -val );
50 | 
51 |         EXPECT_EQ( scalar_t(A_h(j,i)), conj(val) );
52 |         EXPECT_EQ( scalar_t(B_h(j,i)), -conj(val) );
53 | 
54 |         EXPECT_EQ( scalar_t(A_h(j,i)), conj(A(i,j)) );
55 |         EXPECT_EQ( scalar_t(B_h(j,i)), conj(B(i,j)) );
56 |       }
57 |     }
58 |   }
59 | }
60 | 


--------------------------------------------------------------------------------
/tests/native/gemv_no_ambig.cpp:
--------------------------------------------------------------------------------
 1 | #include "./gtest_fixtures.hpp"
 2 | #include <iostream>
 3 | 
 4 | #ifdef LINALG_HAS_EXECUTION
 5 | #  include <execution>
 6 | #endif
 7 | 
 8 | namespace {
 9 | 
10 | using LinearAlgebra::matrix_vector_product;
11 | using LinearAlgebra::scaled;
12 | 
13 | TEST(gemv, no_ambiguity)
14 | {
15 |   int N = 40, M = 20;
16 |   {
17 |     // Create Data
18 |     std::vector<double> A_vec(N*M);
19 |     std::vector<double> x_vec(M);
20 |     std::vector<double> y_vec(N);
21 | 
22 |     mdspan<double, extents<std::size_t, dynamic_extent,dynamic_extent>> A(A_vec.data(), N, M);
23 |     mdspan<double, extents<std::size_t, dynamic_extent>> x(x_vec.data(), M);
24 |     mdspan<double, extents<std::size_t, dynamic_extent>> y(y_vec.data(), N);
25 |     for (int i = 0; i < A.extent(0); ++i) {
26 |       for (int j = 0; j < A.extent(1); ++j) {
27 |         A(i,j) = 100.0 * i + j;
28 |       }
29 |     }
30 |     for(int i = 0; i < x.extent(0); ++i) {
31 |       x(i) = 1.0 * i;
32 |     }
33 |     for(int i = 0; i < y.extent(0); ++i) {
34 |       y(i) = -1.0 * i;
35 |     }
36 | 
37 |     matrix_vector_product(A, x, y);
38 |     // The following is an ambiguous call unless the implementation
39 |     // correctly constraints ExecutionPolicy (the spec would imply
40 |     // std::is_execution_policy_v, though implementations might define
41 |     // their own custom "execution policies" that cannot satisfy this).
42 |     matrix_vector_product(
43 |        scaled(2.0, A), x,
44 |        scaled(0.5, y), y);
45 | 
46 | #ifdef LINALG_HAS_EXECUTION
47 |     matrix_vector_product(std::execution::par,
48 |        scaled(2.0, A), x,
49 |        scaled(0.5, y), y);
50 | #endif
51 |   }
52 | }
53 | 
54 | }
55 | 


--------------------------------------------------------------------------------
/tests/native/idx_abs_max.cpp:
--------------------------------------------------------------------------------
 1 | #include "./gtest_fixtures.hpp"
 2 | 
 3 | namespace {
 4 | 
 5 |   using LinearAlgebra::vector_idx_abs_max;
 6 | 
 7 |   TEST_F(unsigned_double_vector, vector_idx_abs_max)
 8 |   {
 9 |     constexpr size_t expected(9);
10 |     EXPECT_EQ(expected, vector_idx_abs_max(v));
11 |   }
12 | 
13 |   TEST_F(signed_double_vector, vector_idx_abs_max)
14 |   {
15 |     constexpr size_t expected(9);
16 |     EXPECT_EQ(expected, vector_idx_abs_max(v));
17 |   }
18 | 
19 |   TEST_F(signed_complex_vector, vector_idx_abs_max)
20 |   {
21 |     constexpr size_t expected(3);
22 |     EXPECT_EQ(expected, vector_idx_abs_max(v));
23 |   }
24 | 
25 |   TEST(BLAS1_vector_idx_abs_max, trivial_case)
26 |   {
27 |     constexpr auto expected = std::numeric_limits<std::size_t>::max();
28 | 
29 |     std::array<double, 0> arr;
30 |     using extents_type = extents<std::size_t, dynamic_extent>;
31 |     mdspan<double, extents_type> a(arr.data(),0);
32 |     EXPECT_EQ(expected, vector_idx_abs_max(a));
33 | 
34 |     using extents_type2 = extents<std::size_t, 0>;
35 |     mdspan<double, extents_type2> b(arr.data());
36 |     EXPECT_EQ(expected, vector_idx_abs_max(b));
37 |   }
38 | 
39 | } // end anonymous namespace
40 | 


--------------------------------------------------------------------------------
/tests/native/imag_if_needed.cpp:
--------------------------------------------------------------------------------
 1 | #include "./my_numbers.hpp"
 2 | 
 3 | namespace {
 4 |   template<class R>
 5 |   void test_imag_if_needed_complex()
 6 |   {
 7 |     using LinearAlgebra::impl::imag_if_needed;
 8 |     std::complex<R> z{R(3.0), R(4.0)};
 9 |     auto z_imag = imag_if_needed(z);
10 |     EXPECT_EQ(z_imag, R(4.0));
11 |     static_assert(std::is_same_v<decltype(z_imag), R>);
12 |   }
13 |   template<class T>
14 |   void test_imag_if_needed_floating_point()
15 |   {
16 |     using LinearAlgebra::impl::imag_if_needed;
17 |     T x = 9.0;
18 |     auto x_imag = imag_if_needed(x);
19 |     EXPECT_EQ(x_imag, T(0.0));
20 |     static_assert(std::is_same_v<decltype(x_imag), T>);
21 |   }
22 |   template<class T>
23 |   void test_imag_if_needed_integral()
24 |   {
25 |     using LinearAlgebra::impl::imag_if_needed;
26 |     T x = 3;
27 |     auto x_imag = imag_if_needed(x);
28 |     EXPECT_EQ(x_imag, T(0));
29 |     static_assert(std::is_same_v<decltype(x_imag), T>);
30 |   }
31 | 
32 |   TEST(test_numbers, imag_if_needed)
33 |   {
34 |     test_imag_if_needed_complex<float>();
35 |     test_imag_if_needed_complex<double>();
36 |     test_imag_if_needed_complex<long double>();
37 | 
38 |     test_imag_if_needed_floating_point<float>();
39 |     test_imag_if_needed_floating_point<double>();
40 |     test_imag_if_needed_floating_point<long double>();
41 | 
42 |     test_imag_if_needed_integral<int8_t>();
43 |     test_imag_if_needed_integral<uint8_t>();
44 |     test_imag_if_needed_integral<int16_t>();
45 |     test_imag_if_needed_integral<uint16_t>();
46 |     test_imag_if_needed_integral<int32_t>();
47 |     test_imag_if_needed_integral<uint32_t>();
48 |     test_imag_if_needed_integral<int64_t>();
49 |     test_imag_if_needed_integral<uint64_t>();
50 | 
51 |     {
52 |       using LinearAlgebra::impl::imag_if_needed;
53 |       TestLinearAlgebra::MyComplex z{3.0, 4.0};
54 |       auto z_imag = imag_if_needed(z);
55 |       EXPECT_EQ(z_imag, 4.0);
56 |       static_assert(std::is_same_v<decltype(z_imag), decltype(imag(z))>);
57 |     }
58 |     {
59 |       using LinearAlgebra::impl::imag_if_needed;
60 |       TestLinearAlgebra::MyReal x{3.0};
61 |       auto x_imag = imag_if_needed(x);
62 |       EXPECT_EQ(x_imag, TestLinearAlgebra::MyReal{});
63 |       static_assert(std::is_same_v<decltype(x_imag), TestLinearAlgebra::MyReal>);
64 |     }
65 |   }
66 | } // end anonymous namespace
67 | 


--------------------------------------------------------------------------------
/tests/native/my_numbers.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef LINALG_TESTS_NATIVE_MY_NUMBERS_HPP
 2 | #define LINALG_TESTS_NATIVE_MY_NUMBERS_HPP
 3 | 
 4 | #include "./gtest_fixtures.hpp"
 5 | 
 6 | namespace TestLinearAlgebra {
 7 | 
 8 | class MyReal {
 9 | public:
10 |   MyReal() = default;
11 |   explicit MyReal(double value) : value_(value) {}
12 |   double value() const { return value_; }
13 | 
14 |   friend MyReal abs(MyReal x) { return MyReal{std::abs(x.value())}; }
15 | 
16 |   friend bool operator==(MyReal x, MyReal y) {
17 |     return x.value() == y.value();
18 |   }
19 | 
20 | private:
21 |   double value_ = 0.0;
22 | };
23 | 
24 | class MyComplex {
25 | private:
26 |   double real_ = 0.0;
27 |   double imag_ = 0.0;
28 | 
29 | public:
30 |   MyComplex() = default;
31 |   MyComplex(double re, double im) : real_(re), imag_(im) {}
32 | 
33 |   friend double real(MyComplex z) {
34 |     return z.real_;
35 |   }
36 | 
37 |   friend double imag(MyComplex z) {
38 |     return z.imag_;
39 |   }
40 | 
41 |   friend double abs(MyComplex z) {
42 |     return std::sqrt(z.real_ * z.real_ + z.imag_ * z.imag_);
43 |   }
44 | 
45 |   friend MyComplex conj(MyComplex z) {
46 |     return {z.real_, -z.imag_};
47 |   }
48 | 
49 |   std::complex<double> value() const {
50 |     return {real_, imag_};
51 |   }
52 | 
53 |   friend bool operator==(MyComplex x, MyComplex y) {
54 |     return x.value() == y.value();
55 |   }
56 | };
57 | 
58 | } // namespace TestLinearAlgebra
59 | 
60 | #endif // LINALG_TESTS_NATIVE_MY_NUMBERS_HPP
61 | 


--------------------------------------------------------------------------------
/tests/native/real_if_needed.cpp:
--------------------------------------------------------------------------------
 1 | #include "./my_numbers.hpp"
 2 | 
 3 | namespace {
 4 |   template<class R>
 5 |   void test_real_if_needed_complex()
 6 |   {
 7 |     using LinearAlgebra::impl::real_if_needed;
 8 |     std::complex<R> z{R(3.0), R(4.0)};
 9 |     auto z_imag = real_if_needed(z);
10 |     EXPECT_EQ(z_imag, R(3.0));
11 |     static_assert(std::is_same_v<decltype(z_imag), R>);
12 |   }
13 |   template<class T>
14 |   void test_real_if_needed_floating_point()
15 |   {
16 |     using LinearAlgebra::impl::real_if_needed;
17 |     T x = 9.0;
18 |     auto x_imag = real_if_needed(x);
19 |     EXPECT_EQ(x_imag, T(9.0));
20 |     static_assert(std::is_same_v<decltype(x_imag), T>);
21 |   }
22 |   template<class T>
23 |   void test_real_if_needed_integral()
24 |   {
25 |     using LinearAlgebra::impl::real_if_needed;
26 |     T x = 3;
27 |     auto x_imag = real_if_needed(x);
28 |     EXPECT_EQ(x_imag, T(3));
29 |     static_assert(std::is_same_v<decltype(x_imag), T>);
30 |   }
31 | 
32 |   TEST(test_numbers, real_if_needed)
33 |   {
34 |     test_real_if_needed_complex<float>();
35 |     test_real_if_needed_complex<double>();
36 |     test_real_if_needed_complex<long double>();
37 | 
38 |     test_real_if_needed_floating_point<float>();
39 |     test_real_if_needed_floating_point<double>();
40 |     test_real_if_needed_floating_point<long double>();
41 | 
42 |     test_real_if_needed_integral<int8_t>();
43 |     test_real_if_needed_integral<uint8_t>();
44 |     test_real_if_needed_integral<int16_t>();
45 |     test_real_if_needed_integral<uint16_t>();
46 |     test_real_if_needed_integral<int32_t>();
47 |     test_real_if_needed_integral<uint32_t>();
48 |     test_real_if_needed_integral<int64_t>();
49 |     test_real_if_needed_integral<uint64_t>();
50 | 
51 |     {
52 |       using LinearAlgebra::impl::real_if_needed;
53 |       TestLinearAlgebra::MyComplex z{ 3.0, 4.0 };
54 |       auto z_imag = real_if_needed(z);
55 |       EXPECT_EQ(z_imag, 3.0);
56 |       static_assert(std::is_same_v<decltype(z_imag), decltype(imag(z))>);
57 |     }
58 |     {
59 |       using LinearAlgebra::impl::real_if_needed;
60 |       TestLinearAlgebra::MyReal x{ 3.0 };
61 |       auto x_real = real_if_needed(x);
62 |       EXPECT_EQ(x_real, TestLinearAlgebra::MyReal{ 3.0 });
63 |       static_assert(std::is_same_v<decltype(x_real), TestLinearAlgebra::MyReal>);
64 |     }
65 |   }
66 | } // end anonymous namespace
67 | 


--------------------------------------------------------------------------------
/tests/native/scale.cpp:
--------------------------------------------------------------------------------
 1 | #include "./gtest_fixtures.hpp"
 2 | 
 3 | namespace {
 4 |   using LinearAlgebra::scale;
 5 | 
 6 |   TEST(BLAS1_scale, mdspan_double)
 7 |   {
 8 |     using scalar_t = double;
 9 |     using vector_t = mdspan<scalar_t, extents<std::size_t, dynamic_extent>>;
10 | 
11 |     constexpr std::size_t vectorSize(5);
12 |     constexpr std::size_t storageSize = vectorSize;
13 |     std::vector<scalar_t> storage(storageSize);
14 | 
15 |     vector_t x(storage.data(), vectorSize);
16 | 
17 |     {
18 |       for (std::size_t k = 0; k < vectorSize; ++k) {
19 |         const scalar_t x_k = scalar_t (k) + 1.0;
20 |         x(k) = x_k;
21 |       }
22 |       const scalar_t scaleFactor = 5.0;
23 |       scale(scaleFactor, x);
24 |       for (std::size_t k = 0; k < vectorSize; ++k) {
25 |         const scalar_t x_k = scalar_t (k) + 1.0;
26 |         EXPECT_EQ( x(k), scaleFactor * x_k );
27 |       }
28 |     }
29 |     {
30 |       for (std::size_t k = 0; k < vectorSize; ++k) {
31 |         const scalar_t x_k = scalar_t (k) + 1.0;
32 |         x(k) = x_k;
33 |       }
34 |       const float scaleFactor = 5.0;
35 |       scale(scaleFactor, x);
36 |       for (std::size_t k = 0; k < vectorSize; ++k) {
37 |         const scalar_t x_k = scalar_t (k) + 1.0;
38 |         EXPECT_EQ( x(k), scaleFactor * x_k );
39 |       }
40 |     }
41 |   }
42 | 
43 |   TEST(BLAS1_scale, mdspan_complex_double)
44 |   {
45 |     using real_t = double;
46 |     using scalar_t = std::complex<real_t>;
47 |     using vector_t = mdspan<scalar_t, extents<std::size_t, dynamic_extent>>;
48 | 
49 |     constexpr std::size_t vectorSize(5);
50 |     constexpr std::size_t storageSize = vectorSize;
51 |     std::vector<scalar_t> storage(storageSize);
52 | 
53 |     vector_t x(storage.data(), vectorSize);
54 | 
55 |     {
56 |       for (std::size_t k = 0; k < vectorSize; ++k) {
57 |         const scalar_t x_k(real_t(k) + 4.0, -real_t(k) - 1.0);
58 |         x(k) = x_k;
59 |       }
60 |       const real_t scaleFactor = 5.0;
61 |       scale(scaleFactor, x);
62 |       for (std::size_t k = 0; k < vectorSize; ++k) {
63 |         const scalar_t x_k(real_t(k) + 4.0, -real_t(k) - 1.0);
64 |         EXPECT_EQ( x(k), scaleFactor * x_k );
65 |       }
66 |     }
67 |     {
68 |       for (std::size_t k = 0; k < vectorSize; ++k) {
69 |         const scalar_t x_k(real_t(k) + 4.0, -real_t(k) - 1.0);
70 |         x(k) = x_k;
71 |       }
72 |       const scalar_t scaleFactor (5.0, -1.0);
73 |       scale(scaleFactor, x);
74 |       for (std::size_t k = 0; k < vectorSize; ++k) {
75 |         const scalar_t x_k(real_t(k) + 4.0, -real_t(k) - 1.0);
76 |         EXPECT_EQ( x(k), scaleFactor * x_k );
77 |       }
78 |     }
79 |   }
80 | }
81 | 
82 | // int main() {
83 | //   std::cout << "hello world" << std::endl;
84 | // }
85 | 


--------------------------------------------------------------------------------
/tests/native/swap.cpp:
--------------------------------------------------------------------------------
 1 | #include "./gtest_fixtures.hpp"
 2 | 
 3 | namespace {
 4 |   using LinearAlgebra::swap_elements;
 5 | 
 6 |   TEST(BLAS1_swap, mdspan_double)
 7 |   {
 8 |     using scalar_t = double;
 9 |     using vector_t = mdspan<scalar_t, extents<std::size_t, dynamic_extent>>;
10 | 
11 |     constexpr std::size_t vectorSize(5);
12 |     constexpr std::size_t storageSize = std::size_t(2) * vectorSize;
13 |     std::vector<scalar_t> storage(storageSize);
14 | 
15 |     vector_t x(storage.data(), vectorSize);
16 |     vector_t y(storage.data() + vectorSize, vectorSize);
17 | 
18 |     for (std::size_t k = 0; k < vectorSize; ++k) {
19 |       const scalar_t x_k = scalar_t (k) + 1.0;
20 |       const scalar_t y_k = scalar_t (k) + 2.0;
21 |       x(k) = x_k;
22 |       y(k) = y_k;
23 |     }
24 | 
25 |     swap_elements(x, y);
26 |     for (std::size_t k = 0; k < vectorSize; ++k) {
27 |       const scalar_t x_k = scalar_t (k) + 1.0;
28 |       const scalar_t y_k = scalar_t (k) + 2.0;
29 |       EXPECT_EQ( x(k), y_k );
30 |       EXPECT_EQ( y(k), x_k );
31 |     }
32 |   }
33 | 
34 |   TEST(BLAS1_swap, mdspan_complex_double)
35 |   {
36 |     using real_t = double;
37 |     using scalar_t = std::complex<real_t>;
38 |     using vector_t = mdspan<scalar_t, extents<std::size_t, dynamic_extent>>;
39 | 
40 |     constexpr std::size_t vectorSize(5);
41 |     constexpr std::size_t storageSize = std::size_t(2) * vectorSize;
42 |     std::vector<scalar_t> storage(storageSize);
43 | 
44 |     vector_t x(storage.data(), vectorSize);
45 |     vector_t y(storage.data() + vectorSize, vectorSize);
46 | 
47 |     for (std::size_t k = 0; k < vectorSize; ++k) {
48 |       const scalar_t x_k(real_t(k) + 4.0, -real_t(k) - 1.0);
49 |       const scalar_t y_k(real_t(k) + 5.0, -real_t(k) - 2.0);
50 |       x(k) = x_k;
51 |       y(k) = y_k;
52 |     }
53 | 
54 |     swap_elements(x, y);
55 |     for (std::size_t k = 0; k < vectorSize; ++k) {
56 |       const scalar_t x_k(real_t(k) + 4.0, -real_t(k) - 1.0);
57 |       const scalar_t y_k(real_t(k) + 5.0, -real_t(k) - 2.0);
58 |       EXPECT_EQ( x(k), y_k );
59 |       EXPECT_EQ( y(k), x_k );
60 |     }
61 |   }
62 | }
63 | 
64 | // int main() {
65 | //   std::cout << "hello world" << std::endl;
66 | // }
67 | 


--------------------------------------------------------------------------------
/tests/native/trsm.cpp:
--------------------------------------------------------------------------------
 1 | #include "./gtest_fixtures.hpp"
 2 | #include <iostream>
 3 | 
 4 | namespace {
 5 |   constexpr std::size_t num_rows_A = 3;
 6 |   constexpr std::size_t num_cols_A = 3;
 7 |   constexpr double storage_A[] =
 8 |     {8., 0., 0.,
 9 |      2., 8., 0.,
10 |      1., 2., 8.};
11 |   constexpr std::size_t num_rows_B = 4;
12 |   constexpr std::size_t num_cols_B = 3;
13 |   constexpr double storage_B[] =
14 |     {1.,  2.,  3.,
15 |      4.,  5.,  6.,
16 |      7.,  8.,  9.,
17 |      10., 11., 12.};
18 |   constexpr double storage_B_times_A[] =
19 |     {15.,  22.,  24.,
20 |      48.,  52.,  48.,
21 |      81.,  82.,  72.,
22 |      114., 112., 96.};
23 |   constexpr double storage_B_times_inv_A[] =
24 |     {0.0390625, 0.15625, 0.375,
25 |      0.296875 , 0.4375 , 0.75,
26 |      0.5546875, 0.71875, 1.125,
27 |      0.8125   , 1.     , 1.5};
28 | 
29 |   template<class IndexType, class Layout>
30 |   void fill_from_layout_right_storage(
31 |     mdspan<double, dextents<IndexType, 2>, Layout> out,
32 |     const double* const in_storage,
33 |     const std::size_t num_rows,
34 |     const std::size_t num_cols)
35 |   {
36 |     mdspan<const double, dextents<std::size_t, 2>, layout_right> in(in_storage, num_rows, num_cols);
37 |     for(std::size_t i = 0; i < num_rows; ++i) {
38 |       for(std::size_t j = 0; j < num_cols; ++j) {
39 | 	out(i,j) = in(i,j);
40 |       }
41 |     }
42 |   }
43 | 
44 |   // Regression test for https://github.com/kokkos/stdBLAS/issues/244 .
45 |   // It will fail if the j loop (mentioned in the bug) counts up instead of down.
46 |   template<class IndexType, class Layout>
47 |   void test_tsrm_lower_triangular_right_side()
48 |   {
49 |     std::vector<double> vec_A(num_rows_A * num_cols_A);
50 |     std::vector<double> vec_B(num_rows_B * num_cols_B);
51 |     const std::size_t num_rows_X = num_rows_B;
52 |     const std::size_t num_cols_X = num_cols_B;
53 |     std::vector<double> vec_X(num_rows_X * num_cols_X);
54 | 
55 |     mdspan<double, dextents<IndexType, 2>, Layout> A(vec_A.data(), num_rows_A, num_cols_A);
56 |     mdspan<double, dextents<IndexType, 2>, Layout> B_nonconst(vec_B.data(), num_rows_B, num_cols_B);
57 |     mdspan<double, dextents<IndexType, 2>, Layout> X(vec_X.data(), num_rows_X, num_cols_X);
58 | 
59 |     fill_from_layout_right_storage<IndexType, Layout>(A, storage_A, num_rows_A, num_cols_A);
60 |     fill_from_layout_right_storage<IndexType, Layout>(B_nonconst, storage_B, num_rows_B, num_cols_B);
61 |     mdspan<const double, dextents<IndexType, 2>, Layout> B = B_nonconst;
62 | 
63 |     using LinearAlgebra::explicit_diagonal;
64 |     using LinearAlgebra::lower_triangle;
65 |     using LinearAlgebra::right_side;
66 |     using LinearAlgebra::triangular_matrix_matrix_solve;
67 |     triangular_matrix_matrix_solve(A, lower_triangle, explicit_diagonal, right_side, B, X);
68 | 
69 |     mdspan<const double, dextents<IndexType, 2>, layout_right>
70 |       B_times_inv_A(storage_B_times_inv_A, num_rows_B, num_cols_A);
71 | 
72 |     for(IndexType r = 0; r < IndexType(num_rows_B); ++r) {
73 |       for(IndexType c = 0; c < IndexType(num_cols_A); ++c) {
74 | 	// We chose the values in A and B so that triangular
75 | 	// solve could compute them without rounding error.
76 | 	EXPECT_EQ( X(r,c), B_times_inv_A(r,c) );
77 |       }
78 |     }
79 |   }
80 | 
81 |   TEST(BLAS3_trsm, double_size_t_layout_right )
82 |   {
83 |     test_tsrm_lower_triangular_right_side< ::std::size_t, layout_right >();
84 |   }
85 | 
86 |   TEST(BLAS3_trsm, double_int_layout_left )
87 |   {
88 |     test_tsrm_lower_triangular_right_side< int, layout_left >();
89 |   }
90 | 
91 | } // end anonymous namespace
92 | 


--------------------------------------------------------------------------------
/tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/blas1_add_kk.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_ADD_HPP_
 3 | #define LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_ADD_HPP_
 4 | 
 5 | #include "signal_kokkos_impl_called.hpp"
 6 | #include "static_extent_match.hpp"
 7 | 
 8 | namespace KokkosKernelsSTD {
 9 | 
10 | namespace add_impl{
11 | 
12 | template <class F, class T, T... Is>
13 | void repeat_impl(F&& f, std::integer_sequence<T, Is...>){
14 |   ( f(std::integral_constant<T, Is>{}), ... );
15 | }
16 | 
17 | template <int N, class F>
18 | void repeat(F&& f){
19 |   repeat_impl(f, std::make_integer_sequence<int, N>{});
20 | }
21 | 
22 | } // namespace add_impl
23 | 
24 | // keeping this in mind: https://github.com/kokkos/stdBLAS/issues/122
25 | 
26 | template<class ExeSpace,
27 |    class ElementType_x,
28 |          std::experimental::extents<>::size_type ... ext_x,
29 |          class Layout_x,
30 |          class ElementType_y,
31 |          std::experimental::extents<>::size_type ... ext_y,
32 |          class Layout_y,
33 |          class ElementType_z,
34 |          std::experimental::extents<>::size_type ... ext_z,
35 |          class Layout_z>
36 |   requires (sizeof...(ext_x) == sizeof...(ext_y) && sizeof...(ext_x) == sizeof...(ext_z))
37 | void add(kokkos_exec<ExeSpace>,
38 | 	 std::experimental::mdspan<
39 | 	   ElementType_x,
40 | 	   std::experimental::extents<ext_x ...>,
41 | 	   Layout_x,
42 | 	   std::experimental::default_accessor<ElementType_x>
43 | 	 > x,
44 | 	 std::experimental::mdspan<
45 | 	   ElementType_y,
46 | 	   std::experimental::extents<ext_y ...>,
47 | 	   Layout_y,
48 | 	   std::experimental::default_accessor<ElementType_y>
49 | 	 > y,
50 | 	 std::experimental::mdspan<
51 | 	   ElementType_z,
52 | 	   std::experimental::extents<ext_z ...>,
53 | 	   Layout_z,
54 | 	   std::experimental::default_accessor<ElementType_z>
55 | 	 > z)
56 | {
57 |   static_assert(z.rank() <= 2);
58 | 
59 |   // P1673 preconditions
60 |   add_impl::repeat<x.rank()>
61 |     ([=](int r){
62 |       if ( x.extent(r) != y.extent(r) ){
63 | 	throw std::runtime_error("KokkosBlas: add: x.extent(r) != y.extent(r) for r="
64 | 				 + std::to_string(r));
65 |       }
66 |       if ( y.extent(r) != z.extent(r) ){
67 | 	throw std::runtime_error("KokkosBlas: add: y.extent(r) != z.extent(r) for r = "
68 | 				 + std::to_string(r));
69 |       }
70 |     });
71 | 
72 |   // P1673 mandates
73 |   add_impl::repeat<x.rank()>
74 |     ([=](int r){
75 |       Impl::static_extent_match(x.static_extent(r), z.static_extent(r));
76 |       Impl::static_extent_match(y.static_extent(r), z.static_extent(r));
77 |       Impl::static_extent_match(x.static_extent(r), y.static_extent(r));
78 |     });
79 | 
80 |   Impl::signal_kokkos_impl_called("add");
81 | 
82 |   auto x_view = Impl::mdspan_to_view(x);
83 |   auto y_view = Impl::mdspan_to_view(y);
84 |   auto z_view = Impl::mdspan_to_view(z);
85 | 
86 |   const auto alpha = static_cast<typename decltype(x_view)::non_const_value_type>(1);
87 |   const auto beta  = static_cast<typename decltype(y_view)::non_const_value_type>(1);
88 |   const auto zero  = static_cast<typename decltype(z_view)::non_const_value_type>(0);
89 | 
90 |   KokkosBlas::update(alpha, x_view, beta, y_view, zero, z_view);
91 | }
92 | 
93 | }
94 | #endif
95 | 


--------------------------------------------------------------------------------
/tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/blas1_copy_kk.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_COPY_HPP_
 3 | #define LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_COPY_HPP_
 4 | 
 5 | #include "signal_kokkos_impl_called.hpp"
 6 | 
 7 | namespace KokkosKernelsSTD {
 8 | 
 9 | template<class ExeSpace,
10 | 	 class ElementType_x,
11 |          std::experimental::extents<>::size_type ... ext_x,
12 |          class Layout_x,
13 |          class ElementType_y,
14 |          std::experimental::extents<>::size_type ... ext_y,
15 |          class Layout_y>
16 | requires ( (sizeof...(ext_x) == sizeof...(ext_y)) && (sizeof...(ext_x) <=2) )
17 | void copy(kokkos_exec<ExeSpace> /*kexe*/,
18 | 	  std::experimental::mdspan<
19 | 	    ElementType_x,
20 | 	    std::experimental::extents<ext_x ...>,
21 | 	    Layout_x,
22 | 	    std::experimental::default_accessor<ElementType_x>
23 | 	  > x,
24 | 	  std::experimental::mdspan<
25 | 	    ElementType_y,
26 | 	    std::experimental::extents<ext_y ...>,
27 | 	    Layout_y,
28 | 	    std::experimental::default_accessor<ElementType_y>
29 | 	  > y)
30 | {
31 |   Impl::signal_kokkos_impl_called("copy");
32 | 
33 |   auto x_view = Impl::mdspan_to_view(x);
34 |   auto y_view = Impl::mdspan_to_view(y);
35 |   auto ex = ExeSpace();
36 | 
37 |   if constexpr(std::is_same_v<typename decltype(x_view)::array_layout, typename decltype(y_view)::array_layout>) {
38 |     Kokkos::deep_copy(ex, y_view, x_view);
39 |   } else {
40 | 
41 |     if constexpr(x.rank()==1){
42 |       Kokkos::parallel_for(Kokkos::RangePolicy(ex, 0, x_view.extent(0)),
43 | 			   KOKKOS_LAMBDA (const std::size_t i){
44 | 			     y_view(i) = x_view(i);
45 | 			   });
46 |     }
47 | 
48 |     else{
49 |       Kokkos::parallel_for(Kokkos::RangePolicy(ex, 0, x_view.extent(0)),
50 | 			   KOKKOS_LAMBDA (const std::size_t i){
51 | 			     for (std::size_t j=0; j<x_view.extent(1); ++j){
52 | 			       y_view(i,j) = x_view(i,j);
53 | 			     }
54 | 			   });
55 |     }
56 |   }
57 | 
58 |   // need to fence even for deep_copy case since passing
59 |   // ex to it makes deep_copy potentially non-blocking
60 |   // https://github.com/kokkos/kokkos/wiki/Kokkos%3A%3Adeep_copy
61 |   ex.fence();
62 |   //fence message when using latest kokkos:
63 |   // ex.fence("KokkosStdBlas::copy: fence after operation");
64 | }
65 | 
66 | } // end namespace KokkosKernelsSTD
67 | #endif
68 | 


--------------------------------------------------------------------------------
/tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/blas1_dot_kk.hpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #ifndef LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_DOT_HPP_
  3 | #define LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_DOT_HPP_
  4 | 
  5 | // keeping this in mind: https://github.com/kokkos/stdBLAS/issues/122
  6 | 
  7 | #include "signal_kokkos_impl_called.hpp"
  8 | #include "static_extent_match.hpp"
  9 | 
 10 | namespace KokkosKernelsSTD {
 11 | 
 12 | template<class ExeSpace,
 13 | 	 class ElementType_x,
 14 | 	 std::experimental::extents<>::size_type ext_x,
 15 |          class Layout_x,
 16 |          class ElementType_y,
 17 | 	 std::experimental::extents<>::size_type ext_y,
 18 |          class Layout_y,
 19 |    class Scalar>
 20 | Scalar dot(kokkos_exec<ExeSpace> /*kexe*/,
 21 | 	   std::experimental::mdspan<
 22 | 	   ElementType_x,
 23 | 	   std::experimental::extents<ext_x>,
 24 | 	   Layout_x,
 25 | 	   std::experimental::default_accessor<ElementType_x>
 26 | 	   > x,
 27 | 	   std::experimental::mdspan<
 28 | 	   ElementType_y,
 29 | 	   std::experimental::extents<ext_y>,
 30 | 	   Layout_y,
 31 | 	   std::experimental::default_accessor<ElementType_y>
 32 | 	   > y,
 33 | 	   Scalar init)
 34 | {
 35 |   // P1673 preconditions
 36 |   if ( x.extent(0) != y.extent(0) ){
 37 |     throw std::runtime_error("KokkosBlas: dot: x.extent(0) != y.extent(0)");
 38 |   }
 39 | 
 40 |   // P1673 mandates
 41 |   static_assert(Impl::static_extent_match(x.static_extent(0), y.static_extent(0)));
 42 | 
 43 |   Impl::signal_kokkos_impl_called("dot");
 44 | 
 45 |   auto x_view = Impl::mdspan_to_view(x);
 46 |   auto y_view = Impl::mdspan_to_view(y);
 47 | 
 48 |   // This overload is for the default_accessor (see the args above).
 49 |   // We cannot use KokkosBlas::dot here because it would automatically
 50 |   // conjugate x for the complex case.
 51 |   // Since here we have the default accessors, we DO NOT want to conjugate x,
 52 |   // we just need to compute sum(x*y), even for the complex case.
 53 | 
 54 |   // Note that here we cannot use Scalar as accumulation type
 55 |   // because in the complex case, Scalar == std::complex type but the
 56 |   // value_type of x_view, y_view is Kokkos::complex, so we need to be careful.
 57 |   using result_type = decltype(x_view(0)*y_view(0));
 58 |   result_type result = {};
 59 |   Kokkos::parallel_reduce(Kokkos::RangePolicy(ExeSpace(), 0, x_view.extent(0)),
 60 |         KOKKOS_LAMBDA (const std::size_t i, result_type & update){
 61 |           update += x_view(i)*y_view(i);
 62 |         }, result);
 63 | 
 64 |   // fence not needed because reducing into result
 65 | 
 66 |   // this is needed so that it works when Scalar is std::complex
 67 |   return Scalar(result) + init;
 68 | }
 69 | 
 70 | template<class ExeSpace,
 71 | 	 class ElementType_x,
 72 | 	 std::experimental::extents<>::size_type ext_x,
 73 |          class Layout_x,
 74 |          class ElementType_y,
 75 | 	 std::experimental::extents<>::size_type ext_y,
 76 |          class Layout_y,
 77 | 	 class Scalar>
 78 | Scalar dot(kokkos_exec<ExeSpace>,
 79 | 	   std::experimental::mdspan<
 80 | 	   ElementType_x,
 81 | 	   std::experimental::extents<ext_x>,
 82 | 	   Layout_x,
 83 | 	   std::experimental::linalg::conjugated_accessor<
 84 | 	   std::experimental::default_accessor<ElementType_x>, ElementType_x
 85 | 	   >
 86 | 	   > x,
 87 | 	   std::experimental::mdspan<
 88 | 	   ElementType_y,
 89 | 	   std::experimental::extents<ext_y>,
 90 | 	   Layout_y,
 91 | 	   std::experimental::default_accessor<ElementType_y>
 92 | 	   > y,
 93 | 	   Scalar init)
 94 | {
 95 |   // P1673 preconditions
 96 |   if ( x.extent(0) != y.extent(0) ){
 97 |     throw std::runtime_error("KokkosBlas: dot: x.extent(0) != y.extent(0)");
 98 |   }
 99 | 
100 |   // P1673 mandates
101 |   static_assert(Impl::static_extent_match(x.static_extent(0), y.static_extent(0)));
102 | 
103 |   Impl::signal_kokkos_impl_called("dot");
104 | 
105 |   auto x_view = Impl::mdspan_to_view(x);
106 |   auto y_view = Impl::mdspan_to_view(y);
107 | 
108 |   // this overload is for x with conjugated (with nested default) accessor
109 |   // so can call KokkosBlas::dot because it automatically conjugates x
110 |   // and it is what we want.
111 |   return Scalar(KokkosBlas::dot(x_view, y_view)) + init;
112 | }
113 | 
114 | }
115 | #endif
116 | 


--------------------------------------------------------------------------------
/tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/blas1_idx_abs_max_kk.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL_P1673_BITS_KOKKOSKERNELS_IDX_ABS_MAX_HPP_
 3 | #define LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL_P1673_BITS_KOKKOSKERNELS_IDX_ABS_MAX_HPP_
 4 | 
 5 | #include <KokkosBlas1_iamax.hpp>
 6 | #include "signal_kokkos_impl_called.hpp"
 7 | 
 8 | namespace KokkosKernelsSTD {
 9 | 
10 | // keeping this in mind: https://github.com/kokkos/stdBLAS/issues/122
11 | 
12 | template<class ExeSpace,
13 |          class ElementType,
14 |          std::experimental::extents<>::size_type ext0,
15 |          class Layout>
16 | std::experimental::extents<>::size_type
17 | vector_idx_abs_max(kokkos_exec<ExeSpace> /*kexe*/,
18 | 	    std::experimental::mdspan<
19 | 	    ElementType,
20 | 	    std::experimental::extents<ext0>,
21 | 	    Layout,
22 | 	    std::experimental::default_accessor<ElementType>> v)
23 | {
24 |   Impl::signal_kokkos_impl_called("vector_idx_abs_max");
25 | 
26 |   auto v_view = Impl::mdspan_to_view(v);
27 | 
28 |   // note that -1 here, this is related to:
29 |   // https://github.com/kokkos/stdBLAS/issues/114
30 |   return KokkosBlas::iamax(v_view) - 1;
31 | }
32 | 
33 | }
34 | #endif
35 | 


--------------------------------------------------------------------------------
/tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/blas1_matrix_frob_norm_kk.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_MATRIX_FROB_NORM_HPP_
 3 | #define LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_MATRIX_FROB_NORM_HPP_
 4 | 
 5 | #include "signal_kokkos_impl_called.hpp"
 6 | 
 7 | namespace KokkosKernelsSTD {
 8 | 
 9 | template<
10 |     class ExeSpace,
11 |     class ElementType,
12 |     std::experimental::extents<>::size_type numRows,
13 |     std::experimental::extents<>::size_type numCols,
14 |     class Layout,
15 |     class Scalar>
16 | Scalar matrix_frob_norm(kokkos_exec<ExeSpace> kexe,
17 | 			std::experimental::mdspan<
18 | 			ElementType,
19 | 			std::experimental::extents<numRows, numCols>,
20 | 			Layout,
21 | 			std::experimental::default_accessor<ElementType>> A,
22 | 			Scalar init)
23 | {
24 | 
25 |   Impl::signal_kokkos_impl_called("matrix_frob_norm");
26 | 
27 |   // corner cases
28 |   constexpr std::size_t zero = 0;
29 |   constexpr std::size_t one  = 1;
30 |   if (A.extent(0) == zero || A.extent(1) == zero) {
31 |     return init;
32 |   }
33 |   else if(A.extent(0) == one && A.extent(1) == one) {
34 |     using std::abs;
35 |     return init + abs(A(0, 0));
36 |   }
37 | 
38 |   auto A_view = Impl::mdspan_to_view(A);
39 | 
40 |   // here we use an impl similar to the scaled_sum_of_squares
41 |   // but we do not call that directly because it would requre
42 |   // flattening the matrix whereas this impl works for any layout
43 | 
44 |   using arithm_traits = Kokkos::Details::ArithTraits<ElementType>;
45 | 
46 |   std::experimental::linalg::sum_of_squares_result<Scalar> ssqr;
47 |   ssqr.scaling_factor = {};
48 |   ssqr.scaled_sum_of_squares = {};
49 | 
50 |   Kokkos::Max<Scalar> max_reducer(ssqr.scaling_factor);
51 |   Kokkos::parallel_reduce( Kokkos::RangePolicy(ExeSpace(), 0, A_view.extent(0)*A_view.extent(1)),
52 | 			   KOKKOS_LAMBDA (const std::size_t k, Scalar & lmax){
53 | 			     const auto i = k / A_view.extent(1);
54 | 			     const auto j = k % A_view.extent(1);
55 | 			     const auto val = arithm_traits::abs(A_view(i,j));
56 | 			     max_reducer.join(lmax, val);
57 | 			   },
58 | 			   max_reducer);
59 |   // no fence needed since reducing into scalar
60 | 
61 |   Kokkos::parallel_reduce(Kokkos::RangePolicy(ExeSpace(), 0, A_view.extent(0)*A_view.extent(1)),
62 | 			  KOKKOS_LAMBDA (const std::size_t k, Scalar & update){
63 | 			    const auto i = k / A_view.extent(1);
64 | 			    const auto j = k % A_view.extent(1);
65 | 			    const auto tmp = arithm_traits::abs(A_view(i,j))/ssqr.scaling_factor;
66 | 			    update += tmp*tmp;
67 | 			  }, ssqr.scaled_sum_of_squares);
68 |   // no fence needed since reducing into scalar
69 | 
70 |   return std::sqrt(init + ssqr.scaling_factor * ssqr.scaling_factor * ssqr.scaled_sum_of_squares);
71 | }
72 | 
73 | } // end namespace KokkosKernelsSTD
74 | #endif
75 | 


--------------------------------------------------------------------------------
/tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/blas1_matrix_inf_norm_kk.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_MATRIX_INF_NORM_HPP_
 3 | #define LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_MATRIX_INF_NORM_HPP_
 4 | 
 5 | #include "signal_kokkos_impl_called.hpp"
 6 | 
 7 | namespace KokkosKernelsSTD {
 8 | 
 9 | template<
10 |     class ExeSpace,
11 |     class ElementType,
12 |     std::experimental::extents<>::size_type numRows,
13 |     std::experimental::extents<>::size_type numCols,
14 |     class Layout,
15 |     class Scalar>
16 | Scalar matrix_inf_norm(kokkos_exec<ExeSpace> /*kexe*/,
17 | 			std::experimental::mdspan<
18 | 			ElementType,
19 | 			std::experimental::extents<numRows, numCols>,
20 | 			Layout,
21 | 			std::experimental::default_accessor<ElementType>> A,
22 | 			Scalar init)
23 | {
24 | 
25 |   Impl::signal_kokkos_impl_called("matrix_inf_norm");
26 | 
27 |   if (A.extent(0) == 0 || A.extent(1) == 0){
28 |     return init;
29 |   }
30 | 
31 |   auto A_view = Impl::mdspan_to_view(A);
32 | 
33 |   Scalar result = {};
34 |   Kokkos::Max<Scalar> reducer(result);
35 |   Kokkos::parallel_reduce(Kokkos::RangePolicy(ExeSpace(), 0, A_view.extent(0)),
36 | 			  KOKKOS_LAMBDA (const std::size_t i, Scalar & update)
37 | 			  {
38 | 			    using ats = Kokkos::Details::ArithTraits<ElementType>;
39 | 			    Scalar mysum = ats::abs(A_view(i,0));
40 | 			    for (std::size_t j=1; j<A_view.extent(1); ++j){
41 | 			      mysum += ats::abs(A_view(i,j));
42 | 			    }
43 | 			    reducer.join(update, mysum);
44 | 			  }, reducer);
45 | 
46 |   // fence not needed because reducing into result
47 | 
48 |   return init + result;
49 | }
50 | 
51 | } // end namespace KokkosKernelsSTD
52 | #endif
53 | 


--------------------------------------------------------------------------------
/tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/blas1_matrix_one_norm_kk.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_MATRIX_ONE_NORM_HPP_
 3 | #define LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_MATRIX_ONE_NORM_HPP_
 4 | 
 5 | #include "signal_kokkos_impl_called.hpp"
 6 | 
 7 | namespace KokkosKernelsSTD {
 8 | 
 9 | template<
10 |     class ExeSpace,
11 |     class ElementType,
12 |     std::experimental::extents<>::size_type numRows,
13 |     std::experimental::extents<>::size_type numCols,
14 |     class Layout,
15 |     class Scalar>
16 | Scalar matrix_one_norm(kokkos_exec<ExeSpace> /*kexe*/,
17 | 			std::experimental::mdspan<
18 | 			ElementType,
19 | 			std::experimental::extents<numRows, numCols>,
20 | 			Layout,
21 | 			std::experimental::default_accessor<ElementType>> A,
22 | 			Scalar init)
23 | {
24 | 
25 |   Impl::signal_kokkos_impl_called("matrix_one_norm");
26 | 
27 |   if (A.extent(1) == 0){
28 |     return init;
29 |   }
30 | 
31 |   auto A_view = Impl::mdspan_to_view(A);
32 | 
33 |   Scalar result = {};
34 |   Kokkos::Max<Scalar> reducer(result);
35 |   Kokkos::parallel_reduce(Kokkos::RangePolicy(ExeSpace(), 0, A_view.extent(1)),
36 | 			  KOKKOS_LAMBDA (const std::size_t j, Scalar & update)
37 | 			  {
38 | 			    using ats = Kokkos::Details::ArithTraits<ElementType>;
39 | 			    Scalar mysum = ats::abs(A_view(0,j));
40 | 			    for (std::size_t i=1; i<A_view.extent(0); ++i){
41 | 			      mysum += ats::abs(A_view(i,j));
42 | 			    }
43 | 			    reducer.join(update, mysum);
44 | 			  }, reducer);
45 | 
46 |   // fence not needed because reducing into result
47 | 
48 |   return init + result;
49 | }
50 | 
51 | } // end namespace KokkosKernelsSTD
52 | #endif
53 | 


--------------------------------------------------------------------------------
/tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/blas1_scale_kk.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_SCALE_HPP_
 3 | #define LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_SCALE_HPP_
 4 | 
 5 | #include "signal_kokkos_impl_called.hpp"
 6 | 
 7 | namespace KokkosKernelsSTD {
 8 | 
 9 | //
10 | // for now, specialize for default_accessor
11 | // https://github.com/kokkos/stdBLAS/issues/122
12 | //
13 | template<class ExeSpace,
14 |          class Scalar,
15 |          class ElementType,
16 |          std::experimental::extents<>::size_type ... ext,
17 |          class Layout>
18 | requires (sizeof...(ext) <= 2)
19 | void scale(kokkos_exec<ExeSpace> /*kexe*/,
20 | 	   const Scalar alpha,
21 |            std::experimental::mdspan<
22 | 	   ElementType,
23 | 	   std::experimental::extents<ext ...>,
24 | 	   Layout,
25 | 	   std::experimental::default_accessor<ElementType>
26 | 	   > obj)
27 | {
28 | 
29 |   Impl::signal_kokkos_impl_called("scale");
30 |   auto obj_view = Impl::mdspan_to_view(obj);
31 |   KokkosBlas::scal(obj_view, alpha, obj_view);
32 | }
33 | 
34 | }
35 | #endif
36 | 


--------------------------------------------------------------------------------
/tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/blas1_swap_elements_kk.hpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #ifndef LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_SWAP_HPP_
  3 | #define LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_SWAP_HPP_
  4 | 
  5 | #include <utility>
  6 | #include "signal_kokkos_impl_called.hpp"
  7 | #include "static_extent_match.hpp"
  8 | 
  9 | namespace KokkosKernelsSTD {
 10 | 
 11 | namespace swap_impl{
 12 | 
 13 | // this is here until we can use kokkos 3.6 which has swap avail
 14 | template <class T>
 15 | requires(std::is_move_assignable<T>::value && std::is_move_constructible<T>::value)
 16 | KOKKOS_INLINE_FUNCTION void _my_tmp_swap(T& a, T& b) noexcept
 17 | {
 18 |   T tmp = std::move(a);
 19 |   a     = std::move(b);
 20 |   b     = std::move(tmp);
 21 | }
 22 | 
 23 | template <class F, class T, T... Is>
 24 | void repeat_impl(F&& f, std::integer_sequence<T, Is...>){
 25 |   ( f(std::integral_constant<T, Is>{}), ... );
 26 | }
 27 | 
 28 | template <int N, class F>
 29 | void repeat(F&& f){
 30 |   repeat_impl(f, std::make_integer_sequence<int, N>{});
 31 | }
 32 | 
 33 | } // end namespace swap_impl
 34 | 
 35 | //
 36 | // for now, specialize for default_accessor
 37 | // https://github.com/kokkos/stdBLAS/issues/122
 38 | //
 39 | template<class ExeSpace,
 40 | 	 class ElementType_x,
 41 |          std::experimental::extents<>::size_type ... ext_x,
 42 |          class Layout_x,
 43 |          class ElementType_y,
 44 |          std::experimental::extents<>::size_type ... ext_y,
 45 |          class Layout_y>
 46 |   requires (sizeof...(ext_x) == sizeof...(ext_y))
 47 | void swap_elements(kokkos_exec<ExeSpace> /*kexe*/,
 48 | 		   std::experimental::mdspan<
 49 | 		     ElementType_x,
 50 | 		     std::experimental::extents<ext_x ...>,
 51 | 		     Layout_x,
 52 | 		     std::experimental::default_accessor<ElementType_x>
 53 | 		   > x,
 54 | 		   std::experimental::mdspan<
 55 | 		     ElementType_y,
 56 | 		     std::experimental::extents<ext_y ...>,
 57 | 		     Layout_y,
 58 | 		     std::experimental::default_accessor<ElementType_y>
 59 | 		   > y)
 60 | {
 61 |   // matching rank already checked via requires above
 62 |   static_assert(x.rank() <= 2);
 63 | 
 64 |   // P1673 preconditions
 65 |   swap_impl::repeat<x.rank()>
 66 |     ([=](int r){
 67 |       if ( x.extent(r) != y.extent(r) ){
 68 | 	throw std::runtime_error("KokkosBlas: swap_elements: x.extent(r) != y.extent(r) for r="
 69 | 				 + std::to_string(r));
 70 |       }
 71 |     });
 72 | 
 73 |   // P1673 mandates
 74 |   swap_impl::repeat<x.rank()>
 75 |     ([=](int r){
 76 |       Impl::static_extent_match(x.static_extent(r), y.static_extent(r));
 77 |     });
 78 | 
 79 |   Impl::signal_kokkos_impl_called("swap_elements");
 80 | 
 81 |   auto x_view = Impl::mdspan_to_view(x);
 82 |   auto y_view = Impl::mdspan_to_view(y);
 83 | 
 84 |   auto ex = ExeSpace();
 85 |   if constexpr(x.rank()==1){
 86 |     Kokkos::parallel_for(Kokkos::RangePolicy(ex, 0, x_view.extent(0)),
 87 | 			 KOKKOS_LAMBDA (std::size_t i){
 88 | 			   swap_impl::_my_tmp_swap(x_view(i), y_view(i));
 89 | 			 });
 90 |   }
 91 | 
 92 |   else{
 93 |     Kokkos::parallel_for(Kokkos::RangePolicy(ex, 0, x_view.extent(0)),
 94 | 			 KOKKOS_LAMBDA (std::size_t i){
 95 | 			   for (std::size_t j=0; j<x_view.extent(1); ++j){
 96 | 			     swap_impl::_my_tmp_swap(x_view(i,j), y_view(i,j));
 97 | 			   }
 98 | 			 });
 99 |   }
100 | 
101 |   //fence message when using latest kokkos:
102 |   ex.fence();
103 |   // ex.fence("KokkosStdBlas::swap_elements: fence after operation");
104 | }
105 | 
106 | } // end namespace KokkosKernelsSTD
107 | #endif
108 | 


--------------------------------------------------------------------------------
/tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/blas1_vector_abs_sum_kk.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL_P1673_BITS_KOKKOSKERNELS_VEC_ABS_SUM_HPP_
 3 | #define LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL_P1673_BITS_KOKKOSKERNELS_VEC_ABS_SUM_HPP_
 4 | 
 5 | #include "signal_kokkos_impl_called.hpp"
 6 | 
 7 | namespace KokkosKernelsSTD {
 8 | 
 9 | // keeping this in mind: https://github.com/kokkos/stdBLAS/issues/122
10 | 
11 | template<class ExeSpace,
12 |          class ElementType,
13 | 	 std::experimental::extents<>::size_type ext0,
14 |          class Layout,
15 |          class Scalar>
16 | Scalar vector_abs_sum(kokkos_exec<ExeSpace> /*kexe*/,
17 | 		      std::experimental::mdspan<
18 | 		      ElementType,
19 | 		      std::experimental::extents<ext0>,
20 | 		      Layout,
21 | 		      std::experimental::default_accessor<ElementType>
22 | 		      > x,
23 | 		      Scalar init)
24 | {
25 | 
26 |   Impl::signal_kokkos_impl_called("vector_abs_sum");
27 | 
28 |   auto x_view = Impl::mdspan_to_view(x);
29 |   using arithm_traits = Kokkos::Details::ArithTraits<ElementType>;
30 |   Scalar result = {};
31 |   Kokkos::parallel_reduce(Kokkos::RangePolicy(ExeSpace(), 0, x_view.extent(0)),
32 | 			  KOKKOS_LAMBDA (const std::size_t i, Scalar & update) {
33 | 			    update += arithm_traits::abs(x_view(i));
34 | 			  }, result);
35 |   // fence not needed because reducing into result
36 | 
37 |   return result + init;
38 | }
39 | 
40 | }
41 | #endif
42 | 


--------------------------------------------------------------------------------
/tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/blas1_vector_norm2_kk.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL_P1673_BITS_KOKKOSKERNELS_VEC_NORM2_HPP_
 3 | #define LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL_P1673_BITS_KOKKOSKERNELS_VEC_NORM2_HPP_
 4 | 
 5 | #include "signal_kokkos_impl_called.hpp"
 6 | 
 7 | namespace KokkosKernelsSTD {
 8 | 
 9 | template<class ExeSpace,
10 |          class ElementType,
11 | 	 std::experimental::extents<>::size_type ext,
12 |          class Layout,
13 |          class Scalar>
14 | Scalar vector_norm2(kokkos_exec<ExeSpace> /*kexe*/,
15 | 		    std::experimental::mdspan<
16 | 		    ElementType,
17 | 		    std::experimental::extents<ext>,
18 | 		    Layout,
19 | 		    std::experimental::default_accessor<ElementType>> x,
20 | 		    Scalar init)
21 | {
22 | 
23 |   Impl::signal_kokkos_impl_called("vector_norm2");
24 | 
25 |   // for the code in stBLAS/examples/kokkos-based,
26 |   // when using float, the nrm2 does not work, giving:
27 |   // Kokkos result = -36893488147419103232.000000
28 |   // return KokkosBlas::nrm2(Impl::mdspan_to_view(x)) + init;
29 | 
30 |   // the following works
31 |   using IPT = Kokkos::Details::InnerProductSpaceTraits<ElementType>;
32 |   auto x_view = Impl::mdspan_to_view(x);
33 |   Scalar result = {};
34 |   Kokkos::parallel_reduce(Kokkos::RangePolicy(ExeSpace(), 0, x_view.extent(0)),
35 | 			  KOKKOS_LAMBDA (const std::size_t i, Scalar & update) {
36 | 			    const typename IPT::mag_type tmp = IPT::norm(x_view(i));
37 | 			    update += tmp*tmp;
38 | 			  }, result);
39 | 
40 |   // fence not needed because reducing into result
41 | 
42 |   return Kokkos::Details::ArithTraits<Scalar>::sqrt(result + init);
43 | }
44 | 
45 | }
46 | #endif
47 | 


--------------------------------------------------------------------------------
/tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/blas1_vector_sum_of_squares_kk.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL_P1673_BITS_KOKKOSKERNELS_VEC_SUM_OF_SQUARES_HPP_
 3 | #define LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL_P1673_BITS_KOKKOSKERNELS_VEC_SUM_OF_SQUARES_HPP_
 4 | 
 5 | #include "signal_kokkos_impl_called.hpp"
 6 | 
 7 | namespace KokkosKernelsSTD {
 8 | 
 9 | template<class ExecSpace,
10 |          class ElementType,
11 |          std::experimental::extents<>::size_type ext0,
12 |          class Layout,
13 |          class Scalar>
14 | std::experimental::linalg::sum_of_squares_result<Scalar>
15 | vector_sum_of_squares(kokkos_exec<ExecSpace> /*kexe*/,
16 | 		      std::experimental::mdspan<
17 | 		      ElementType,
18 | 		      std::experimental::extents<ext0>,
19 | 		      Layout,
20 | 		      std::experimental::default_accessor<ElementType>> x,
21 | 		      std::experimental::linalg::sum_of_squares_result<Scalar> init)
22 | {
23 | 
24 |   Impl::signal_kokkos_impl_called("vector_sum_of_squares");
25 | 
26 |   auto x_view = Impl::mdspan_to_view(x);
27 |   std::experimental::linalg::sum_of_squares_result<Scalar> result;
28 | 
29 |   using arithm_traits = Kokkos::Details::ArithTraits<ElementType>;
30 | 
31 |   Scalar scaling_factor = {};
32 |   Kokkos::Max<Scalar> max_reducer(scaling_factor);
33 |   Kokkos::parallel_reduce( Kokkos::RangePolicy(ExecSpace(), 0, x_view.extent(0)),
34 | 			   KOKKOS_LAMBDA (const std::size_t i, Scalar & lmax){
35 | 			     const auto val = arithm_traits::abs(x_view(i));
36 | 			     max_reducer.join(lmax, val);
37 | 			   },
38 | 			   max_reducer);
39 |   // no fence needed since reducing into scalar
40 |   result.scaling_factor = std::max(scaling_factor, init.scaling_factor);
41 | 
42 |   Scalar ssq = {};
43 |   Kokkos::parallel_reduce(Kokkos::RangePolicy(ExecSpace(), 0, x_view.extent(0)),
44 | 			  KOKKOS_LAMBDA (const std::size_t i, Scalar & update){
45 | 			    const auto tmp = arithm_traits::abs(x_view(i))/result.scaling_factor;
46 | 			    update += tmp*tmp;
47 | 			  }, ssq);
48 |   // no fence needed since reducing into scalar
49 | 
50 |   result.scaled_sum_of_squares = ssq
51 |     + (init.scaling_factor*init.scaling_factor*init.scaled_sum_of_squares)/(scaling_factor*scaling_factor);
52 | 
53 |   return result;
54 | }
55 | 
56 | }
57 | #endif
58 | 


--------------------------------------------------------------------------------
/tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/exec_policy_wrapper_kk.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef __LINALG_KOKKOSKERNELS_EXEC_POLICY_WRAPPER_KK_HPP_
 2 | #define __LINALG_KOKKOSKERNELS_EXEC_POLICY_WRAPPER_KK_HPP_
 3 | #include<Kokkos_Core.hpp>
 4 | #include<execution>
 5 | namespace KokkosKernelsSTD {
 6 | 
 7 | template<class ExecSpace = Kokkos::DefaultExecutionSpace>
 8 | struct kokkos_exec {
 9 | };
10 | 
11 | template<class ExecSpace>
12 | auto execpolicy_mapper(kokkos_exec<ExecSpace>) { return kokkos_exec<ExecSpace>(); }
13 | } // namespace KokkosKernelsSTD
14 | 
15 | // Remap standard execution policies to Kokkos
16 | #ifdef LINALG_ENABLE_KOKKOS_DEFAULT
17 | namespace std {
18 | namespace experimental {
19 | inline namespace __p1673_version_0 {
20 | namespace linalg {
21 |   auto execpolicy_mapper(std::experimental::linalg::impl::default_exec_t) { return KokkosKernelsSTD::kokkos_exec<>(); }
22 |   auto execpolicy_mapper(std::execution::parallel_policy) { return KokkosKernelsSTD::kokkos_exec<>(); }
23 |   auto execpolicy_mapper(std::execution::parallel_unsequenced_policy) { return KokkosKernelsSTD::kokkos_exec<>(); }
24 | }
25 | }
26 | }
27 | }
28 | #endif
29 | #endif
30 | 


--------------------------------------------------------------------------------
/tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/kokkos_conjugate.hpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | #ifndef LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_CONJUGATE_IF_NEEDED_HPP_
19 | #define LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_CONJUGATE_IF_NEEDED_HPP_
20 | 
21 | #include "experimental/__p1673_bits/conj_if_needed.hpp"
22 | 
23 | namespace std {
24 | namespace experimental {
25 | namespace linalg {
26 | namespace impl{
27 | 
28 | // conj_if_needed doesn't use an is_complex trait.
29 | // Instead, it checks whether conj(x) (namespace-unqualified) is a valid expression,
30 | // calls that if so, else assumes that x represents a real number and returns x.
31 | // Thus, we don't actually need to do anything here.
32 | 
33 | } // end namespace impl
34 | } // end namespace linalg
35 | } // end namespace experimental
36 | } // end namespace std
37 | 
38 | #endif //LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_CONJUGATE_IF_NEEDED_HPP_
39 | 


--------------------------------------------------------------------------------
/tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/parallel_matrix.hpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | #ifndef LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_PARALLEL_MATRIX_HPP_
19 | #define LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_PARALLEL_MATRIX_HPP_
20 | 
21 | namespace KokkosKernelsSTD {
22 | namespace Impl {
23 | 
24 | // manages parallel execution of independent action
25 | // called like action(i, j) for each matrix element A(i, j)
26 | template <typename ExecSpace, typename MatrixType>
27 | class ParallelMatrixVisitor {
28 | public:
29 |   KOKKOS_INLINE_FUNCTION ParallelMatrixVisitor(ExecSpace &&exec_in, MatrixType A_in):
30 |     exec(exec_in), A(A_in), ext0(A.extent(0)), ext1(A.extent(1))
31 |   {}
32 | 
33 |   template <typename ActionType>
34 |   KOKKOS_INLINE_FUNCTION
35 |   void for_each_matrix_element(ActionType action) {
36 |     if (ext0 > ext1) { // parallel rows
37 |       Kokkos::parallel_for(Kokkos::RangePolicy(exec, 0, ext0),
38 |         KOKKOS_LAMBDA(const auto i) {
39 |           using idx_type = std::remove_const_t<decltype(i)>;
40 |           for (idx_type j = 0; j < ext1; ++j) {
41 |             action(i, j);
42 |           }
43 |         });
44 |     } else { // parallel columns
45 |       Kokkos::parallel_for(Kokkos::RangePolicy(exec, 0, ext1),
46 |         KOKKOS_LAMBDA(const auto j) {
47 |           using idx_type = std::remove_const_t<decltype(j)>;
48 |           for (idx_type i = 0; i < ext0; ++i) {
49 |             action(i, j);
50 |           }
51 |         });
52 |     }
53 |     exec.fence();
54 |   }
55 | 
56 |   template <typename ActionType>
57 |   void for_each_triangle_matrix_element(std::experimental::linalg::upper_triangle_t t, ActionType action) {
58 |     Kokkos::parallel_for(Kokkos::RangePolicy(exec, 0, ext1),
59 |       KOKKOS_LAMBDA(const auto j) {
60 |         using idx_type = std::remove_const_t<decltype(j)>;
61 |         for (idx_type i = 0; i <= j; ++i) {
62 |           action(i, j);
63 |         }
64 |       });
65 |     exec.fence();
66 |   }
67 | 
68 |   template <typename ActionType>
69 |   void for_each_triangle_matrix_element(std::experimental::linalg::lower_triangle_t t, ActionType action) {
70 |     for_each_triangle_matrix_element(std::experimental::linalg::upper_triangle,
71 |         [action](const auto i, const auto j) {
72 |           action(j, i);
73 |       });
74 |   }
75 | 
76 | private:
77 |   ExecSpace exec;
78 |   MatrixType A;
79 |   size_t ext0;
80 |   size_t ext1;
81 | };
82 | 
83 | } // namespace Impl
84 | } // namespace KokkosKernelsSTD
85 | #endif
86 | 


--------------------------------------------------------------------------------
/tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/signal_kokkos_impl_called.hpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | #ifndef LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_UTILS_HPP_
19 | #define LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_UTILS_HPP_
20 | 
21 | #include <string_view>
22 | 
23 | namespace KokkosKernelsSTD {
24 | namespace Impl {
25 | 
26 | #if defined(KOKKOS_STDBLAS_ENABLE_TESTS)
27 | extern void signal_kokkos_impl_called(std::string_view functionName);
28 | #else
29 | void signal_kokkos_impl_called(std::string_view /* functionName */) {}
30 | #endif
31 | 
32 | } // namespace Impl
33 | } // namespace KokkosKernelsSTD
34 | #endif
35 | 


--------------------------------------------------------------------------------
/tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/static_extent_match.hpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | #ifndef LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_STATICEXTMATCH_HPP_
19 | #define LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_STATICEXTMATCH_HPP_
20 | 
21 | namespace KokkosKernelsSTD {
22 | namespace Impl {
23 | 
24 | template <class size_type>
25 | constexpr bool static_extent_match(size_type extent1, size_type extent2)
26 | {
27 |   return extent1 == std::experimental::dynamic_extent ||
28 |          extent2 == std::experimental::dynamic_extent ||
29 |          extent1 == extent2;
30 | }
31 | 
32 | } // namespace Impl
33 | } // namespace KokkosKernelsSTD
34 | #endif
35 | 


--------------------------------------------------------------------------------
/tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/triangle.hpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ************************************************************************
 3 | //
 4 | //                        Kokkos v. 4.0
 5 | //       Copyright (2022) National Technology & Engineering
 6 | //               Solutions of Sandia, LLC (NTESS).
 7 | //
 8 | // Under the terms of Contract DE-NA0003525 with NTESS,
 9 | // the U.S. Government retains certain rights in this software.
10 | //
11 | // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 | // See https://kokkos.org/LICENSE for license information.
13 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 | //
15 | // ************************************************************************
16 | //@HEADER
17 | 
18 | #ifndef LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_TRIANGLE_UTILS_HPP_
19 | #define LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_TRIANGLE_UTILS_HPP_
20 | 
21 | namespace KokkosKernelsSTD {
22 | namespace Impl {
23 | 
24 | // Note: phrase it simply and the same as in specification ("has unique layout")
25 | template <typename Layout,
26 |           std::experimental::extents<>::size_type numRows,
27 |           std::experimental::extents<>::size_type numCols>
28 | constexpr bool is_unique_layout_v = Layout::template mapping<
29 |     std::experimental::extents<numRows, numCols> >::is_always_unique();
30 | 
31 | template <typename Layout>
32 | struct is_layout_blas_packed: public std::false_type {};
33 | 
34 | template <typename Triangle, typename StorageOrder>
35 | struct is_layout_blas_packed<
36 |   std::experimental::linalg::layout_blas_packed<Triangle, StorageOrder>>:
37 |     public std::true_type {};
38 | 
39 | template <typename Layout>
40 | constexpr bool is_layout_blas_packed_v = is_layout_blas_packed<Layout>::value;
41 | 
42 | // Note: will only signal failure for layout_blas_packed with different triangle
43 | template <typename Layout, typename Triangle>
44 | struct triangle_layout_match: public std::true_type {};
45 | 
46 | template <typename StorageOrder, typename Triangle1, typename Triangle2>
47 | struct triangle_layout_match<
48 |   std::experimental::linalg::layout_blas_packed<Triangle1, StorageOrder>,
49 |   Triangle2>
50 | {
51 |   static constexpr bool value = std::is_same_v<Triangle1, Triangle2>;
52 | };
53 | 
54 | template <typename Layout, typename Triangle>
55 | constexpr bool triangle_layout_match_v = triangle_layout_match<Layout, Triangle>::value;
56 | 
57 | } // namespace Impl
58 | } // namespace KokkosKernelsSTD
59 | #endif
60 | 


--------------------------------------------------------------------------------
/tpl-implementations/include/experimental/linalg_kokkoskernels:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include<experimental/mdspan>
 3 | #include<KokkosBlas.hpp>
 4 | #include "__p1673_bits/kokkos-kernels/mdspan_to_view_mapper_kk.hpp"
 5 | #include "__p1673_bits/kokkos-kernels/kokkos_conjugate.hpp"
 6 | 
 7 | // blas1 (according to P1673)
 8 | #include "__p1673_bits/kokkos-kernels/blas1_dot_kk.hpp"
 9 | #include "__p1673_bits/kokkos-kernels/blas1_add_kk.hpp"
10 | #include "__p1673_bits/kokkos-kernels/blas1_scale_kk.hpp"
11 | #include "__p1673_bits/kokkos-kernels/blas1_idx_abs_max_kk.hpp"
12 | #include "__p1673_bits/kokkos-kernels/blas1_vector_norm2_kk.hpp"
13 | #include "__p1673_bits/kokkos-kernels/blas1_vector_abs_sum_kk.hpp"
14 | #include "__p1673_bits/kokkos-kernels/blas1_vector_sum_of_squares_kk.hpp"
15 | #include "__p1673_bits/kokkos-kernels/blas1_matrix_frob_norm_kk.hpp"
16 | #include "__p1673_bits/kokkos-kernels/blas1_matrix_inf_norm_kk.hpp"
17 | #include "__p1673_bits/kokkos-kernels/blas1_matrix_one_norm_kk.hpp"
18 | #include "__p1673_bits/kokkos-kernels/blas1_swap_elements_kk.hpp"
19 | #include "__p1673_bits/kokkos-kernels/blas1_copy_kk.hpp"
20 | 
21 | // blas2 (according to P1673)
22 | #include "__p1673_bits/kokkos-kernels/blas2_matrix_rank_1_update.hpp"
23 | #include "__p1673_bits/kokkos-kernels/blas2_matrix_rank_2_update.hpp"
24 | #include "__p1673_bits/kokkos-kernels/blas2_gemv_kk.hpp"
25 | #include "__p1673_bits/kokkos-kernels/blas2_symv_kk.hpp"
26 | #include "__p1673_bits/kokkos-kernels/blas2_hemv_kk.hpp"
27 | #include "__p1673_bits/kokkos-kernels/blas2_triangular_mat_vec_product.hpp"
28 | 
29 | // blas3 (according to P1673)
30 | #include "__p1673_bits/kokkos-kernels/blas3_overwriting_gemm_kk.hpp"
31 | #include "__p1673_bits/kokkos-kernels/blas3_matrix_rank_k_update.hpp"
32 | #include "__p1673_bits/kokkos-kernels/blas3_matrix_rank_2k_update.hpp"
33 | #include "__p1673_bits/kokkos-kernels/blas3_matrix_product_kk.hpp"
34 | #include "__p1673_bits/kokkos-kernels/blas3_triangular_matrix_matrix_solve.hpp"
35 | 


--------------------------------------------------------------------------------