├── .clang-format
├── .gitignore
├── CMakeLists.txt
├── CODE_OF_CONDUCT.md
├── Doxyfile
├── LICENSE
├── README.md
├── cmake
    ├── HandleEigen.cmake
    └── Modules
    │   ├── ComputeCppCompilerChecks.cmake
    │   ├── ComputeCppIRMap.cmake
    │   ├── FindComputeCpp.cmake
    │   └── FindEigen.cmake
├── example
    ├── CMakeLists.txt
    └── src
    │   ├── mnist
    │       ├── read_mnist.hpp
    │       ├── run_classifier.hpp
    │       ├── run_gauss_classifier.cpp
    │       ├── run_gmm.cpp
    │       ├── run_lin_classifier.cpp
    │       └── run_svm.cpp
    │   └── utils
    │       ├── scoped_timer.hpp
    │       └── sycl_utils.hpp
├── include
    └── ml
    │   ├── classifiers
    │       ├── bayes
    │       │   ├── bayes_classifier.hpp
    │       │   ├── distributions
    │       │   │   └── log_gaussian_distribution.hpp
    │       │   └── linear_classifier.hpp
    │       ├── classifier.hpp
    │       ├── data_splitter.hpp
    │       ├── data_splitter_extremum_dist.hpp
    │       ├── em
    │       │   ├── em_classifier.hpp
    │       │   └── log_model_per_label.hpp
    │       ├── extremum_dist.hpp
    │       └── svm
    │       │   ├── kernel_cache.hpp
    │       │   ├── smo.hpp
    │       │   ├── svm.hpp
    │       │   └── svm_kernels.hpp
    │   ├── eigen
    │       ├── eigen.hpp
    │       └── sycl_to_eigen.hpp
    │   ├── math
    │       ├── cov.hpp
    │       ├── functors.hpp
    │       ├── helper.hpp
    │       ├── mat_inv.hpp
    │       ├── mat_mul.hpp
    │       ├── mat_ops.hpp
    │       ├── qr.hpp
    │       ├── svd.hpp
    │       ├── tri_inv.hpp
    │       ├── tri_solve.hpp
    │       └── vec_ops.hpp
    │   ├── preprocess
    │       ├── apply_pca.hpp
    │       └── pca.hpp
    │   └── utils
    │       ├── access.hpp
    │       ├── buffer_acc.hpp
    │       ├── buffer_t.hpp
    │       ├── common.hpp
    │       ├── copy.hpp
    │       ├── debug
    │           ├── assert.hpp
    │           ├── print_utils.hpp
    │           └── write_bmp.hpp
    │       ├── device_constants.hpp
    │       ├── optimal_range.hpp
    │       ├── save_utils.hpp
    │       └── sycl_types.hpp
└── tests
    ├── CMakeLists.txt
    └── src
        ├── math
            ├── test_center.cpp
            ├── test_cov.cpp
            ├── test_dot_product.cpp
            ├── test_inv.cpp
            ├── test_mat_mul.cpp
            ├── test_qr.cpp
            ├── test_svd.cpp
            ├── test_tr_op.cpp
            └── test_tri_solve.cpp
        ├── svm
            ├── test_arg_extremum_cond.cpp
            ├── test_svm_linear.cpp
            └── test_svm_poly.cpp
        └── utils
            ├── assert_utils.hpp
            ├── sycl_utils.hpp
            ├── test_save_load.cpp
            └── utils.hpp


/.clang-format:
--------------------------------------------------------------------------------
  1 | ---
  2 | Language:        Cpp
  3 | # BasedOnStyle:  Google
  4 | AccessModifierOffset: -1
  5 | AlignAfterOpenBracket: Align
  6 | AlignConsecutiveAssignments: false
  7 | AlignConsecutiveDeclarations: false
  8 | AlignEscapedNewlines: Left
  9 | AlignOperands:   true
 10 | AlignTrailingComments: true
 11 | AllowAllParametersOfDeclarationOnNextLine: true
 12 | AllowShortBlocksOnASingleLine: false
 13 | AllowShortCaseLabelsOnASingleLine: false
 14 | AllowShortFunctionsOnASingleLine: Inline
 15 | AllowShortIfStatementsOnASingleLine: false
 16 | AllowShortLoopsOnASingleLine: false
 17 | AlwaysBreakAfterDefinitionReturnType: None
 18 | AlwaysBreakAfterReturnType: None
 19 | AlwaysBreakBeforeMultilineStrings: true
 20 | AlwaysBreakTemplateDeclarations: true
 21 | BinPackArguments: true
 22 | BinPackParameters: true
 23 | BraceWrapping:
 24 |   AfterClass:      false
 25 |   AfterControlStatement: false
 26 |   AfterEnum:       false
 27 |   AfterFunction:   false
 28 |   AfterNamespace:  false
 29 |   AfterObjCDeclaration: false
 30 |   AfterStruct:     false
 31 |   AfterUnion:      false
 32 |   AfterExternBlock: false
 33 |   BeforeCatch:     false
 34 |   BeforeElse:      false
 35 |   IndentBraces:    false
 36 |   SplitEmptyFunction: true
 37 |   SplitEmptyRecord: true
 38 |   SplitEmptyNamespace: true
 39 | BreakBeforeBinaryOperators: None
 40 | BreakBeforeBraces: Attach
 41 | BreakBeforeInheritanceComma: true
 42 | BreakBeforeTernaryOperators: true
 43 | BreakConstructorInitializersBeforeComma: false
 44 | BreakConstructorInitializers: BeforeColon
 45 | BreakStringLiterals: true
 46 | ColumnLimit:     80
 47 | CommentPragmas:  '^ IWYU pragma:'
 48 | CompactNamespaces: false
 49 | ConstructorInitializerAllOnOneLineOrOnePerLine: true
 50 | ConstructorInitializerIndentWidth: 4
 51 | ContinuationIndentWidth: 4
 52 | Cpp11BracedListStyle: true
 53 | DerivePointerAlignment: false
 54 | DisableFormat:   false
 55 | FixNamespaceComments: true
 56 | ForEachMacros:
 57 |   - foreach
 58 |   - Q_FOREACH
 59 |   - BOOST_FOREACH
 60 | IncludeBlocks:   Preserve
 61 | IncludeCategories:
 62 |   - Regex:           '^<ext/.*\.h>'
 63 |     Priority:        2
 64 |   - Regex:           '^<.*\.h>'
 65 |     Priority:        1
 66 |   - Regex:           '^<.*'
 67 |     Priority:        2
 68 |   - Regex:           '.*'
 69 |     Priority:        3
 70 | IncludeIsMainRegex: '([-_](test|unittest))?$'
 71 | IndentCaseLabels: true
 72 | IndentPPDirectives: None
 73 | IndentWidth:     2
 74 | IndentWrappedFunctionNames: false
 75 | KeepEmptyLinesAtTheStartOfBlocks: false
 76 | MaxEmptyLinesToKeep: 1
 77 | NamespaceIndentation: None
 78 | PenaltyBreakAssignment: 2
 79 | PenaltyBreakBeforeFirstCallParameter: 1
 80 | PenaltyBreakComment: 300
 81 | PenaltyBreakFirstLessLess: 120
 82 | PenaltyBreakString: 1000
 83 | PenaltyExcessCharacter: 1000000
 84 | PenaltyReturnTypeOnItsOwnLine: 200
 85 | PointerAlignment: Left
 86 | ReflowComments:  true
 87 | SortIncludes:    true
 88 | SortUsingDeclarations: true
 89 | SpaceAfterCStyleCast: true
 90 | SpaceAfterTemplateKeyword: true
 91 | SpaceBeforeAssignmentOperators: true
 92 | SpaceBeforeParens: ControlStatements
 93 | SpaceInEmptyParentheses: false
 94 | SpacesBeforeTrailingComments: 2
 95 | SpacesInAngles:  false
 96 | SpacesInContainerLiterals: true
 97 | SpacesInCStyleCastParentheses: false
 98 | SpacesInParentheses: false
 99 | SpacesInSquareBrackets: false
100 | Standard:        Cpp11
101 | TabWidth:        8
102 | UseTab:          Never
103 | ...
104 | 
105 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | build*/
2 | doc/
3 | 
4 | *~
5 | *.bak
6 | *.swp
7 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | # Copyright (C) Codeplay Software Limited.
  2 | #
  3 | #  Licensed under the Apache License, Version 2.0 (the "License");
  4 | #  you may not use this file except in compliance with the License.
  5 | #  You may obtain a copy of the License at
  6 | #
  7 | #    http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | #  Unless required by applicable law or agreed to in writing, software
 10 | #  distributed under the License is distributed on an "AS IS" BASIS,
 11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | #  See the License for the specific language governing permissions and
 13 | #  limitations under the License.
 14 | 
 15 | cmake_minimum_required(VERSION 3.4.3)
 16 | project(SYCL-ML)
 17 | 
 18 | if(MSVC)
 19 |   message(WARNING "Windows support is only experimental for now")
 20 | endif()
 21 | 
 22 | list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/")
 23 | list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/Modules/")
 24 | 
 25 | # Configuration options controlling automatic downloading of dependencies.
 26 | option(SYCLML_DOWNLOAD_EIGEN "Download Eigen headers" ON)
 27 | option(SYCLML_DOWNLOAD_MISSING_DEPS
 28 |   "Download any dependencies which cannot be found" ON)
 29 | 
 30 | # Eigen configuration options.
 31 | option(SYCLML_EIGEN_LOCAL_MEM
 32 |   "Only compile the local memory versions of Eigen kernels" ON)
 33 | option(SYCLML_EIGEN_NO_LOCAL_MEM
 34 |   "Only compile the no local memory versions of Eigen kernels" OFF)
 35 | option(SYCLML_EIGEN_COMPRESS_NAMES
 36 |   "Compress Eigen SYCL kernel names" OFF)
 37 | option(SYCLML_EIGEN_NO_BARRIER
 38 |   "Use Eigen matmul which does not use barriers (implies NO_LOCAL_MEM)" OFF)
 39 | 
 40 | # ComputeCpp configuration options.
 41 | include(FindComputeCpp)
 42 | # Avoid namespace ambiguities with older compilers
 43 | list(APPEND COMPUTECPP_USER_FLAGS -DCOMPUTECPP_DISABLE_SYCL_NAMESPACE_ALIAS)
 44 | option(SYCLML_COMPUTECPP_USE_SERIAL_MEMOP
 45 |   "Replace memory operations (eg memset) in kernels with serial operations." OFF)
 46 | if(NOT SYCLML_COMPUTECPP_USE_SERIAL_MEMOP)
 47 |   list(APPEND COMPUTECPP_USER_FLAGS -no-serial-memop)
 48 | endif()
 49 | set(COMPUTECPP_DRIVER_DEFAULT_VALUE OFF)
 50 | if (NOT MSVC)
 51 |   set(COMPUTECPP_DRIVER_DEFAULT_VALUE ON)
 52 | endif()
 53 | option(SYCLML_COMPUTECPP_USE_COMPILER_DRIVER
 54 |   "Use ComputeCpp driver instead of a 2 steps compilation"
 55 |   ${COMPUTECPP_DRIVER_DEFAULT_VALUE}
 56 | )
 57 | if(SYCLML_COMPUTECPP_USE_COMPILER_DRIVER)
 58 |   set(CMAKE_CXX_COMPILER ${ComputeCpp_DEVICE_COMPILER_EXECUTABLE})
 59 | endif()
 60 | option(SYCLML_COMPUTECPP_SPLIT_MODULES
 61 |   "Split modules to speed up compilation." ON)
 62 | if(NOT SYCLML_COMPUTECPP_SPLIT_MODULES)
 63 |   list(APPEND COMPUTECPP_USER_FLAGS -fsycl-split-modules=20)
 64 | endif()
 65 | 
 66 | # Tests configuration
 67 | option(SYCLML_TEST_DOUBLE "Add tests using double type." OFF)
 68 | 
 69 | # Set compile options for host and device compilers
 70 | set(CMAKE_CXX_STANDARD 14)
 71 | set(CMAKE_CXX_STANDARD_REQUIRED ON)
 72 | set(CMAKE_CXX_EXTENSIONS OFF)
 73 | 
 74 | include(CheckCXXCompilerFlag)
 75 | foreach(flag -Wall -Wextra -Wpedantic)
 76 |   check_cxx_compiler_flag(${flag} is_flag_supported)
 77 |   if(is_flag_supported)
 78 |     add_compile_options(${flag})
 79 |   endif()
 80 | endforeach()
 81 | if(MSVC)
 82 |   add_compile_options(/bigobj)
 83 | endif()
 84 | 
 85 | include(HandleEigen)
 86 | 
 87 | # Include SYCL-ML
 88 | include_directories(${CMAKE_SOURCE_DIR}/include)
 89 | add_library(sycl_ml INTERFACE)
 90 | target_include_directories(sycl_ml INTERFACE
 91 |   $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
 92 | )
 93 | target_link_libraries(sycl_ml INTERFACE
 94 |   ComputeCpp::ComputeCpp
 95 |   Eigen
 96 | )
 97 | 
 98 | function(add_sycl_ml_executable SOURCE)
 99 |   get_filename_component(SOURCE_NAME_WE ${SOURCE} NAME_WE)
100 |   get_filename_component(DIR ${SOURCE} DIRECTORY)
101 |   string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}/" "${CMAKE_CURRENT_BINARY_DIR}/" OUTPUT_SYCL_DIR ${DIR})
102 |   file(MAKE_DIRECTORY ${OUTPUT_SYCL_DIR})
103 |   set(TARGET_NAME ${SOURCE_NAME_WE})
104 |   set(TARGET_NAME ${TARGET_NAME} PARENT_SCOPE)
105 |   set(OUTPUT_SYCL_DIR ${OUTPUT_SYCL_DIR} PARENT_SCOPE)
106 | 
107 |   add_executable(${TARGET_NAME} ${SOURCE})
108 |   target_link_libraries(${TARGET_NAME} sycl_ml)
109 |   set_property(TARGET ${TARGET_NAME} PROPERTY COMPUTECPP_INCLUDE_AFTER 1)
110 |   # Workaround INTERFACE_SYSTEM_INCLUDE_DIRECTORIES not being propagated
111 |   # Add Eigen as system include
112 |   target_include_directories(${TARGET_NAME} SYSTEM PRIVATE $<TARGET_PROPERTY:Eigen,INTERFACE_INCLUDE_DIRECTORIES>)
113 |   add_sycl_to_target(TARGET ${TARGET_NAME} SOURCES ${SOURCE})
114 | endfunction()
115 | 
116 | add_subdirectory(example)
117 | 
118 | include(CTest)
119 | if(BUILD_TESTING)
120 |   enable_testing()
121 |   add_subdirectory(tests)
122 | endif()
123 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to making participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, sex characteristics, gender identity and expression
 9 | , level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 |   advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 |   address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 |   professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies within all project spaces, and it also applies when
49 | an individual is representing the project or its community in public spaces.
50 | Examples of representing a project or community include using an official
51 | project e-mail address, posting via an official social media account, or acting
52 | as an appointed representative at an online or offline event. Representation of
53 | a project may be further defined and clarified by project maintainers.
54 | 
55 | ## Enforcement
56 | 
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at
59 | [sycl@codeplay.com](mailto:sycl@codeplay.com). All complaints will be reviewed
60 | and investigated and will result in a response that is deemed necessary and
61 | appropriate to the circumstances. The project team is obligated to maintain
62 | confidentiality with regard to the reporter of an incident. Further details of
63 | specific enforcement policies may be posted separately.
64 | 
65 | Project maintainers who do not follow or enforce the Code of Conduct in good
66 | faith may face temporary or permanent repercussions as determined by other
67 | members of the project's leadership.
68 | 
69 | ## Attribution
70 | 
71 | This Code of Conduct is adapted from the
72 | [Contributor Covenant](https://www.contributor-covenant.org/), version 1.4,
73 | available at
74 | https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
75 | 
76 | ---
77 | 
78 | If there are any issues or suggestions relating to the current set of rules, you
79 |  can reach us at [sycl@codeplay.com](mailto:sycl@codeplay.com).
80 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # SYCL-ML
 2 | 
 3 | ## What is it?
 4 | SYCL-ML is a framework providing simple classical machine learning algorithms using SYCL.
 5 | It is meant to be accelerated on any OpenCL device supporting SPIR or SPIR-V.
 6 | The following links give more details on what SYCL is:
 7 | - https://www.khronos.org/sycl
 8 | - https://developer.codeplay.com/computecppce/latest/sycl-guide-introduction
 9 | 
10 | ## What can it do?
11 | Some linear algebra operations had to be implemented such as:
12 | - **Matrix inversion**
13 | - **SVD decomposition**
14 | - **QR decomposition**
15 | 
16 | In terms of machine learning related algorithms it includes:
17 | - **Principal Component Analysis**: used to reduce the dimensionality of a problem.
18 | - **Linear Classifier** (see naive Bayes classifier): classify assuming all variables are equally as important.
19 | - **Gaussian Classifier**: classify using the Gaussian distribution.
20 | - **Gaussian Mixture Model**: based on the EM algorithm, uses multiple Gaussian distribution for each labels.
21 | - **Support Vector Machine**: C-SVM with any kernel function.
22 | 
23 | SYCL-ML is a header only library which makes it easy to integrate.
24 | 
25 | More details on what the project implements and how it works can be found on our [website](https://www.codeplay.com/portal/12-21-17-alternative-machine-learning-algorithms-using-sycl-and-opencl).
26 | 
27 | ## TODO list
28 | - Optimize **SVD** decomposition for faster PCA. The algorithm probably needs to be changed to compute eigenpairs differently.
29 | - Optimize **SVM** for GPU. More recent papers on SVM for GPU should be experimented.
30 | - Implement an **LDA** (or dimensionality reduction algorithms) which would be used as a preprocessing step similarly to a PCA.
31 | - Implement a **K-means** (or other clustering algorithms) which could be used to improve the initialization of the EM.
32 | - Add a proper way to select a SYCL device.
33 | 
34 | ## Prerequisites
35 | SYCL-ML has been tested with:
36 | - Ubuntu 16.04, amdgpu pro driver 17.40
37 | - CMake 3.0
38 | - g++ 5.4
39 | - ComputeCpp 1.2.0
40 | 
41 | ComputeCpp can be downloaded from the [CodePlay](https://www.codeplay.com/products/computesuite/computecpp) website.
42 | Once extracted, ComputeCpp path should be set as an environment variable to `COMPUTECPP_DIR` (usually `/usr/local/computecpp`).
43 | Alternatively, it can be given as an argument to cmake with `-DComputeCpp_DIR=path/to/computecpp`.
44 | 
45 | ## Building
46 | Build all the targets with:
47 | ```bash
48 | mkdir build
49 | cd build
50 | cmake ..
51 | make
52 | ```
53 | CMake will take care of downloading the Eigen dependency and MNIST dataset.
54 | On Unix it will automatically extract the MNIST dataset using `gunzip`.
55 | 
56 | It is recommended to run the tests before running the examples:
57 | ```bash
58 | cd build/tests
59 | ctest --output-on-failure
60 | ```
61 | 
62 | The documentation can be built with `doxygen`. It requires `dot` from the `graphviz` package. Simply run:
63 | ```bash
64 | doxygen
65 | ```
66 | 
67 | ## Contributing
68 | The project is under the Apache 2.0 license. Any contribution is welcome! Also feel free to raise an issue for any
69 | questions or suggestions.
70 | 


--------------------------------------------------------------------------------
/cmake/HandleEigen.cmake:
--------------------------------------------------------------------------------
 1 | # Copyright (C) Codeplay Software Limited.
 2 | 
 3 | cmake_minimum_required(VERSION 3.2.2)
 4 | 
 5 | if(NOT SYCLML_DOWNLOAD_EIGEN)
 6 |   find_package(Eigen)
 7 | endif()
 8 | 
 9 | if(NOT Eigen_FOUND AND (SYCLML_DOWNLOAD_EIGEN OR SYCLML_DOWNLOAD_MISSING_DEPS))
10 |   include(ExternalProject)
11 |   set(EIGEN_REPO "https://gitlab.com/libeigen/eigen" CACHE STRING
12 |     "Eigen repository to use"
13 |   )
14 |   set(EIGEN_GIT_TAG "d0ae052" CACHE STRING
15 |     "Git tag, branch or commit to use for the Eigen library"
16 |   )
17 |   set(EIGEN_SOURCE_DIR ${PROJECT_BINARY_DIR}/Eigen-src)
18 |   if(NOT TARGET Eigen_download)
19 |     ExternalProject_Add(Eigen_download
20 |       GIT_REPOSITORY    ${EIGEN_REPO}
21 |       GIT_TAG           ${EIGEN_GIT_TAG}
22 |       SOURCE_DIR        ${EIGEN_SOURCE_DIR}
23 |       CONFIGURE_COMMAND ""
24 |       BUILD_COMMAND     ""
25 |       INSTALL_COMMAND   ""
26 |       TEST_COMMAND      ""
27 |     )
28 |   endif()
29 |   set(EIGEN_INCLUDE_DIR ${EIGEN_SOURCE_DIR})
30 |   file(MAKE_DIRECTORY ${EIGEN_INCLUDE_DIR})
31 | 
32 |   find_package(Eigen)
33 |   add_dependencies(Eigen Eigen_download)
34 |   mark_as_advanced(EIGEN_REPO EIGEN_GIT_TAG)
35 | endif()
36 | 
37 | if(NOT Eigen_FOUND)
38 |   message(FATAL_ERROR
39 |     "Could not find Eigen, consider setting SYCLML_DOWNLOAD_MISSING_DEPS")
40 | endif()
41 | 


--------------------------------------------------------------------------------
/cmake/Modules/ComputeCppCompilerChecks.cmake:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.4.3)
 2 | 
 3 | if(CMAKE_COMPILER_IS_GNUCXX)
 4 |   if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8)
 5 |     message(FATAL_ERROR "host compiler - gcc version must be > 4.8")
 6 |   endif()
 7 | elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
 8 |   if (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.6)
 9 |     message(FATAL_ERROR "host compiler - clang version must be > 3.6")
10 |   endif()
11 | endif()
12 | 
13 | if(MSVC)
14 |   set(ComputeCpp_STL_CHECK_SRC __STL_check)
15 |   file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/${ComputeCpp_STL_CHECK_SRC}.cpp
16 |     "#include <ios>\n"
17 |     "int main() { return 0; }\n")
18 |   execute_process(
19 |     COMMAND ${ComputeCpp_DEVICE_COMPILER_EXECUTABLE}
20 |             ${COMPUTECPP_DEVICE_COMPILER_FLAGS}
21 |             -isystem ${ComputeCpp_INCLUDE_DIRS}
22 |             -o ${ComputeCpp_STL_CHECK_SRC}.sycl
23 |             -c ${ComputeCpp_STL_CHECK_SRC}.cpp
24 |     WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
25 |     RESULT_VARIABLE ComputeCpp_STL_CHECK_RESULT
26 |     ERROR_QUIET
27 |     OUTPUT_QUIET)
28 |   if(NOT ${ComputeCpp_STL_CHECK_RESULT} EQUAL 0)
29 |     # Try disabling compiler version checks
30 |     execute_process(
31 |       COMMAND ${ComputeCpp_DEVICE_COMPILER_EXECUTABLE}
32 |               ${COMPUTECPP_DEVICE_COMPILER_FLAGS}
33 |               -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH
34 |               -isystem ${ComputeCpp_INCLUDE_DIRS}
35 |               -o ${ComputeCpp_STL_CHECK_SRC}.cpp.sycl
36 |               -c ${ComputeCpp_STL_CHECK_SRC}.cpp
37 |       WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
38 |       RESULT_VARIABLE ComputeCpp_STL_CHECK_RESULT
39 |       ERROR_QUIET
40 |       OUTPUT_QUIET)
41 |     if(NOT ${ComputeCpp_STL_CHECK_RESULT} EQUAL 0)
42 |       message(STATUS "Device compiler cannot consume hosted STL headers. Using any parts of the STL will likely result in device compiler errors.")
43 |     else()
44 |     message(STATUS "Device compiler does not meet certain STL version requirements. Disabling version checks and hoping for the best.")
45 |       list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH)
46 |     endif()
47 |   endif()
48 |   file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/${ComputeCpp_STL_CHECK_SRC}.cpp
49 |               ${CMAKE_CURRENT_BINARY_DIR}/${ComputeCpp_STL_CHECK_SRC}.cpp.sycl)
50 | endif(MSVC)
51 | 


--------------------------------------------------------------------------------
/cmake/Modules/ComputeCppIRMap.cmake:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.4.3)
 2 | 
 3 | # These should match the types of IR output by compute++
 4 | set(IR_MAP_spir bc)
 5 | set(IR_MAP_spir64 bc)
 6 | set(IR_MAP_spir32 bc)
 7 | set(IR_MAP_spirv spv)
 8 | set(IR_MAP_spirv64 spv)
 9 | set(IR_MAP_spirv32 spv)
10 | set(IR_MAP_aorta-x86_64 o)
11 | set(IR_MAP_aorta-aarch64 o)
12 | set(IR_MAP_aorta-rcar-cve o)
13 | set(IR_MAP_custom-spir64 bc)
14 | set(IR_MAP_custom-spir32 bc)
15 | set(IR_MAP_custom-spirv64 spv)
16 | set(IR_MAP_custom-spirv32 spv)
17 | set(IR_MAP_ptx64 s)
18 | set(IR_MAP_amdgcn s)
19 | 


--------------------------------------------------------------------------------
/cmake/Modules/FindEigen.cmake:
--------------------------------------------------------------------------------
 1 | # Copyright (C) Codeplay Software Limited.
 2 | 
 3 | # Try to find the Eigen library and its Tensor module.
 4 | #
 5 | # If the library is found then the `eigen::eigen` target will be exported with
 6 | # the required include directories.
 7 | #
 8 | # Sets the following variables:
 9 | #   eigen_FOUND        - whether the system has Eigen
10 | #   eigen_INCLUDE_DIRS - the Eigen include directory
11 | 
12 | find_path(EIGEN_INCLUDE_DIR
13 |   NAMES unsupported/Eigen/CXX11/Tensor
14 |   PATH_SUFFIXES eigen3 Eigen3
15 |   DOC "The Eigen SYCL Tensor module"
16 | )
17 | include(FindPackageHandleStandardArgs)
18 | find_package_handle_standard_args(Eigen
19 |   FOUND_VAR Eigen_FOUND
20 |   REQUIRED_VARS EIGEN_INCLUDE_DIR
21 | )
22 | mark_as_advanced(Eigen_FOUND EIGEN_INCLUDE_DIRS)
23 | if(Eigen_FOUND)
24 |   set(EIGEN_INCLUDE_DIRS ${EIGEN_INCLUDE_DIR})
25 | endif()
26 | 
27 | if(Eigen_FOUND AND NOT TARGET Eigen)
28 |   add_library(Eigen INTERFACE)
29 |   set_target_properties(Eigen PROPERTIES
30 |     INTERFACE_INCLUDE_DIRECTORIES "${EIGEN_INCLUDE_DIR}"
31 |   )
32 | endif()
33 | if(Eigen_FOUND)
34 |   set(eigen_definitions EIGEN_EXCEPTIONS=1
35 |                         EIGEN_USE_SYCL=1
36 |                         EIGEN_SYCL_USE_DEFAULT_SELECTOR=1)
37 |   find_package(Threads)
38 |   if(Threads_FOUND)
39 |     list(APPEND eigen_definitions EIGEN_SYCL_ASYNC_EXECUTION=1)
40 |     set_property(TARGET Eigen
41 |       APPEND PROPERTY INTERFACE_LINK_LIBRARIES Threads::Threads
42 |     )
43 |   endif()
44 |   if(SYCLML_EIGEN_NO_BARRIER)
45 |     list(APPEND eigen_definitions EIGEN_SYCL_DISABLE_ARM_GPU_CACHE_OPTIMISATION=1
46 |                                   EIGEN_SYCL_NO_LOCAL_MEM=1)
47 |   else()
48 |     if(SYCLML_EIGEN_LOCAL_MEM)
49 |       list(APPEND eigen_definitions EIGEN_SYCL_LOCAL_MEM=1)
50 |     endif()
51 |     if(SYCLML_EIGEN_NO_LOCAL_MEM)
52 |       list(APPEND eigen_definitions EIGEN_SYCL_NO_LOCAL_MEM=1)
53 |     endif()
54 |   endif()
55 |   set_target_properties(Eigen PROPERTIES
56 |     INTERFACE_COMPILE_DEFINITIONS "${eigen_definitions}"
57 |   )
58 |   if(SYCLML_EIGEN_COMPRESS_NAMES)
59 |     set_target_properties(Eigen PROPERTIES
60 |       INTERFACE_COMPUTECPP_FLAGS "-sycl-compress-name"
61 |     )
62 |   endif()
63 | endif()
64 | 


--------------------------------------------------------------------------------
/example/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (C) Codeplay Software Limited.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #    http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | include_directories("src")
16 | 
17 | # Download and extract MNIST
18 | set(MNIST_FILES "train-images-idx3-ubyte"
19 |                 "train-labels-idx1-ubyte"
20 |                 "t10k-images-idx3-ubyte"
21 |                 "t10k-labels-idx1-ubyte")
22 | set(MNIST_RELATIVE_PATH "data/mnist/")
23 | set(MNIST_FULL_PATH ${CMAKE_CURRENT_BINARY_DIR}/${MNIST_RELATIVE_PATH})
24 | file(MAKE_DIRECTORY ${MNIST_FULL_PATH})
25 | foreach(FILENAME ${MNIST_FILES})
26 |   if(NOT EXISTS ${MNIST_FULL_PATH}/${FILENAME})
27 |     set(FILE "${FILENAME}.gz")
28 |     set(DL_LINK "http://yann.lecun.com/exdb/mnist/${FILE}")
29 |     file(DOWNLOAD ${DL_LINK} ${MNIST_FULL_PATH}/${FILE} SHOW_PROGRESS)
30 |     if(UNIX)
31 |       execute_process(COMMAND gunzip ${FILE}
32 |                       WORKING_DIRECTORY ${MNIST_FULL_PATH}
33 |                       RESULT_VARIABLE PROC_RET)
34 |       if(${PROC_RET})
35 |         message(FATAL_ERROR "Could not extract ${FILE}")
36 |       endif()
37 |     else()
38 |       message(WARNING "Please, extract the file ${MNIST_FULL_PATH}/${FILE}")
39 |     endif()
40 |   endif()
41 | endforeach()
42 | 
43 | # Build each example
44 | file(GLOB_RECURSE SOURCES "src/*.cpp")
45 | foreach(SOURCE ${SOURCES})
46 |   add_sycl_ml_executable(${SOURCE})
47 | endforeach()
48 | 


--------------------------------------------------------------------------------
/example/src/mnist/read_mnist.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) Codeplay Software Limited.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *   http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | #ifndef EXAMPLE_SRC_MNIST_READ_MNIST_HPP
 17 | #define EXAMPLE_SRC_MNIST_READ_MNIST_HPP
 18 | 
 19 | #include <algorithm>
 20 | #include <cmath>
 21 | #include <cstring>
 22 | #include <fstream>
 23 | #include <iostream>
 24 | #include <string>
 25 | #include <vector>
 26 | 
 27 | /*
 28 |  * Load the MNIST data set: http://yann.lecun.com/exdb/mnist/
 29 |  * The functions read *ubyte files meaning the .gz files have to be decompressed
 30 |  * (handled by CMake)
 31 |  */
 32 | 
 33 | // Convert from little to big endian.
 34 | static uint32_t reverse_int(uint32_t i) {
 35 |   unsigned char c1 = i & 255;
 36 |   unsigned char c2 = (i >> 8) & 255;
 37 |   unsigned char c3 = (i >> 16) & 255;
 38 |   unsigned char c4 = (i >> 24) & 255;
 39 | 
 40 |   return ((uint32_t) c1 << 24) + ((uint32_t) c2 << 16) + ((uint32_t) c3 << 8) +
 41 |          c4;
 42 | }
 43 | 
 44 | static void read_int(std::ifstream& file, uint32_t& i) {
 45 |   file.read(reinterpret_cast<char*>(&i), sizeof(i));
 46 |   i = reverse_int(i);
 47 | }
 48 | 
 49 | // Return the closest power of 2 higher or equal to x
 50 | template <class T>
 51 | static inline T to_pow2(T x) {
 52 |   return std::pow(2, std::ceil(std::log2(x)));
 53 | }
 54 | 
 55 | template <class T>
 56 | struct static_cast_func {
 57 |   template <class U>
 58 |   T operator()(const U& x) const {
 59 |     return static_cast<T>(x);
 60 |   }
 61 | };
 62 | 
 63 | static std::ifstream open_mnist_file(const std::string& full_path) {
 64 |   std::ifstream file(full_path, std::ios::in | std::ios::binary);
 65 |   if (!file.is_open()) {
 66 |     // The gz format does not specify the output filename.
 67 |     // If the file couldn't open with the suffix "-ubyte", try with ".ubyte"
 68 |     std::string other_full_path = full_path;
 69 |     other_full_path[other_full_path.size() - 6] = '.';
 70 |     file = std::ifstream(other_full_path, std::ios::in | std::ios::binary);
 71 |     if (!file.is_open()) {
 72 |       std::cerr << "Cannot open file `" << full_path << "` nor `"
 73 |                 << other_full_path << "`" << std::endl;
 74 |     }
 75 |   }
 76 |   return file;
 77 | }
 78 | 
 79 | // Read mnist, cast uchar to type T and transpose it (so that an image is a
 80 | // column)
 81 | template <class T>
 82 | std::vector<T> read_mnist_images(const std::string& full_path,
 83 |                                  unsigned& image_size,
 84 |                                  unsigned& padded_image_size,
 85 |                                  unsigned& nb_images, bool transpose,
 86 |                                  bool round_pow2, T norm_factor = 1) {
 87 |   std::ifstream file = open_mnist_file(full_path);
 88 |   if (!file.is_open()) {
 89 |     std::cerr << "Could not open file: " << full_path << std::endl;
 90 |     return {};
 91 |   }
 92 | 
 93 |   uint32_t magic_number = 0;
 94 |   read_int(file, magic_number);
 95 |   if (magic_number != 2051) {
 96 |     std::cerr << "Invalid MNIST file: " << full_path << std::endl;
 97 |     return {};
 98 |   }
 99 | 
100 |   uint32_t read_nb_images = 0, read_nb_rows = 0, read_nb_cols = 0;
101 |   read_int(file, read_nb_images);
102 |   read_int(file, read_nb_rows);
103 |   read_int(file, read_nb_cols);
104 | 
105 |   uint32_t out_read_nb_rows = read_nb_rows;
106 |   uint32_t out_read_nb_cols = read_nb_cols;
107 |   if (round_pow2) {
108 |     out_read_nb_rows = to_pow2(read_nb_rows);
109 |     out_read_nb_cols = to_pow2(read_nb_cols);
110 |   }
111 | 
112 |   nb_images = read_nb_images;
113 |   image_size = read_nb_rows * read_nb_cols;
114 |   unsigned buffer_total_size = nb_images * image_size;
115 |   std::vector<unsigned char> buffer(buffer_total_size);
116 | 
117 |   padded_image_size = out_read_nb_rows * out_read_nb_cols;
118 |   unsigned dataset_total_size = nb_images * padded_image_size;
119 |   std::vector<T> dataset(dataset_total_size);
120 | 
121 |   file.read(reinterpret_cast<char*>(buffer.data()), buffer_total_size);
122 | 
123 |   if (transpose) {
124 |     for (unsigned c = 0; c < nb_images; ++c) {
125 |       for (unsigned r = 0; r < image_size; ++r) {
126 |         // Cast, normalize and transpose
127 |         dataset[r * nb_images + c] =
128 |             static_cast<T>(buffer[c * image_size + r]) / norm_factor;
129 |       }
130 |     }
131 |     if (round_pow2) {  // Set all zeros in the end
132 |       std::memset(&dataset[image_size * nb_images], 0,
133 |                   (padded_image_size - image_size) * nb_images * sizeof(T));
134 |     }
135 |   } else {
136 |     for (unsigned r = 0; r < nb_images; ++r) {
137 |       for (unsigned c = 0; c < image_size; ++c) {
138 |         // Cast and normalize
139 |         dataset[r * padded_image_size + c] =
140 |             static_cast<T>(buffer[r * image_size + c]) / norm_factor;
141 |       }
142 |       std::memset(&dataset[r * padded_image_size + image_size], 0,
143 |                   (padded_image_size - image_size) * sizeof(T));
144 |     }
145 |   }
146 | 
147 |   return dataset;
148 | }
149 | 
150 | template <class T>
151 | std::vector<T> read_mnist_labels(const std::string& full_path,
152 |                                  unsigned& nb_labels) {
153 |   std::ifstream file = open_mnist_file(full_path);
154 |   if (!file.is_open()) {
155 |     std::cerr << "Could not open file: " << full_path << std::endl;
156 |     return {};
157 |   }
158 | 
159 |   uint32_t magic_number = 0;
160 |   read_int(file, magic_number);
161 |   if (magic_number != 2049) {
162 |     std::cerr << "Invalid MNIST file: " << full_path << std::endl;
163 |     return {};
164 |   }
165 | 
166 |   uint32_t read_nb_labels = 0;
167 |   read_int(file, read_nb_labels);
168 |   nb_labels = read_nb_labels;
169 | 
170 |   std::vector<unsigned char> buffer(nb_labels);
171 |   std::vector<T> labels(nb_labels);
172 | 
173 |   file.read(reinterpret_cast<char*>(buffer.data()), nb_labels);
174 |   std::transform(buffer.begin(), buffer.end(), labels.begin(),
175 |                  static_cast_func<T>());
176 | 
177 |   return labels;
178 | }
179 | 
180 | inline std::string mnist_get_train_images_path(const std::string& prefix) {
181 |   return prefix + "/train-images-idx3-ubyte";
182 | }
183 | 
184 | inline std::string mnist_get_train_labels_path(const std::string& prefix) {
185 |   return prefix + "/train-labels-idx1-ubyte";
186 | }
187 | 
188 | inline std::string mnist_get_test_images_path(const std::string& prefix) {
189 |   return prefix + "/t10k-images-idx3-ubyte";
190 | }
191 | 
192 | inline std::string mnist_get_test_labels_path(const std::string& prefix) {
193 |   return prefix + "/t10k-labels-idx1-ubyte";
194 | }
195 | 
196 | #endif  // EXAMPLE_SRC_MNIST_READ_MNIST_HPP
197 | 


--------------------------------------------------------------------------------
/example/src/mnist/run_classifier.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) Codeplay Software Limited.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *   http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | #ifndef EXAMPLE_SRC_MNIST_RUN_CLASSIFIER_HPP
 17 | #define EXAMPLE_SRC_MNIST_RUN_CLASSIFIER_HPP
 18 | 
 19 | #include <iostream>
 20 | 
 21 | #include "ml/preprocess/apply_pca.hpp"
 22 | 
 23 | #include "read_mnist.hpp"
 24 | #include "utils/scoped_timer.hpp"
 25 | #include "utils/sycl_utils.hpp"
 26 | 
 27 | /**
 28 |  * @brief Train and test any given classifier.
 29 |  *
 30 |  * @tparam ClassifierT
 31 |  * @param mnist_path
 32 |  * @param pca_args arguments given to the PCA
 33 |  * @param classifier
 34 |  */
 35 | template <class ClassifierT>
 36 | void run_classifier(
 37 |     const std::string& mnist_path,
 38 |     const ml::pca_args<typename ClassifierT::DataType>& pca_args,
 39 |     ClassifierT classifier = ClassifierT()) {
 40 |   // The TIME macro creates an object that will print the time elapsed between
 41 |   // its construction and destruction
 42 |   TIME(run_classifier);
 43 | 
 44 |   using DataType = typename ClassifierT::DataType;
 45 |   using LabelType = typename ClassifierT::LabelType;
 46 | 
 47 |   // MNIST specific
 48 |   std::vector<LabelType> label_set(10);
 49 |   // Create the set of labels here instead of computing it during the training
 50 |   std::iota(label_set.begin(), label_set.end(), 0);
 51 |   // Data will be shifted in the range [0, 1]
 52 |   const DataType normalize_factor = 255;
 53 | 
 54 |   // Load and save options
 55 |   const bool load_classifier = false;
 56 |   const bool save_classifier = false;
 57 | 
 58 |   // What the classifier will compute
 59 |   std::vector<LabelType> host_predicted_test_labels;
 60 | 
 61 |   {  // Scope with a SYCL queue
 62 |     cl::sycl::queue& q = create_queue();
 63 | 
 64 |     ml::apply_pca<DataType> apply_pca;
 65 | 
 66 |     // Load the train data, perform PCA and train the classifier
 67 |     {
 68 |       unsigned obs_size, padded_obs_size, nb_train_obs;
 69 |       // Load train data
 70 |       ml::matrix_t<DataType> sycl_train_data;
 71 |       {
 72 |         auto host_train_data = read_mnist_images<DataType>(
 73 |             mnist_get_train_images_path(mnist_path), obs_size, padded_obs_size,
 74 |             nb_train_obs, false, true, normalize_factor);
 75 |         if (host_train_data.empty()) {
 76 |           return;
 77 |         }
 78 |         ml::matrix_t<DataType> sycl_train_data_raw(
 79 |             host_train_data.data(),
 80 |             cl::sycl::range<2>(nb_train_obs, padded_obs_size));
 81 |         // Specify the real size of an observation
 82 |         sycl_train_data_raw.data_range[1] = obs_size;
 83 |         sycl_train_data_raw.set_final_data(nullptr);
 84 | 
 85 |         sycl_train_data =
 86 |             apply_pca.compute_and_apply(q, sycl_train_data_raw, pca_args);
 87 |       }
 88 | 
 89 |       // Load labels
 90 |       auto host_train_labels = read_mnist_labels<LabelType>(
 91 |           mnist_get_train_labels_path(mnist_path), nb_train_obs);
 92 |       if (host_train_labels.empty()) {
 93 |         return;
 94 |       }
 95 | 
 96 |       if (load_classifier) {
 97 |         classifier.load_from_disk(q);
 98 |       } else {
 99 |         {  // Create a scope to time only the training
100 |           TIME(train_classifier);
101 |           // Give the sets of labels to avoid computing it during the training
102 |           classifier.set_label_set(label_set);
103 |           classifier.train(q, sycl_train_data, host_train_labels);
104 |           // Wait to measure the correct training time
105 |           q.wait_and_throw();
106 |         }
107 |         if (save_classifier) {
108 |           classifier.save_to_disk(q);
109 |         }
110 |       }
111 |     }  // End of train
112 | 
113 |     // Load the test data, apply the PCA using the eigenvectors from the
114 |     // training and test the classifier
115 |     {
116 |       unsigned obs_size, padded_obs_size, nb_test_obs;
117 |       ml::matrix_t<DataType> sycl_test_data;
118 |       {  // Load test data
119 |         auto host_test_data = read_mnist_images<DataType>(
120 |             mnist_get_test_images_path(mnist_path), obs_size, padded_obs_size,
121 |             nb_test_obs, false, true, normalize_factor);
122 |         if (host_test_data.empty()) {
123 |           return;
124 |         }
125 |         ml::matrix_t<DataType> sycl_test_data_raw(
126 |             host_test_data.data(),
127 |             cl::sycl::range<2>(nb_test_obs, padded_obs_size));
128 |         // Specify the real size of an observation
129 |         sycl_test_data_raw.data_range[1] = obs_size;
130 |         sycl_test_data_raw.set_final_data(nullptr);
131 | 
132 |         sycl_test_data = apply_pca.apply(q, sycl_test_data_raw);
133 |       }
134 | 
135 |       // Inference
136 |       TIME(predict_classifier);
137 |       auto sycl_predicted_test_labels = classifier.predict(q, sycl_test_data);
138 |       // Can be rounded up to a power of 2
139 |       auto nb_labels_predicted = sycl_predicted_test_labels.get_kernel_size();
140 |       host_predicted_test_labels.resize(nb_labels_predicted);
141 |       sycl_predicted_test_labels.set_final_data(
142 |           host_predicted_test_labels.data());
143 |       // Wait to measure the correct prediction time
144 |       q.wait_and_throw();
145 |     }  // End of tests
146 | 
147 |     clear_eigen_device();
148 |   }  // SYCL queue is destroyed
149 | 
150 |   // Compare predicted labels and expected labels
151 |   unsigned nb_test_obs;
152 |   auto host_expected_test_labels = read_mnist_labels<LabelType>(
153 |       mnist_get_test_labels_path(mnist_path), nb_test_obs);
154 |   if (host_expected_test_labels.empty()) {
155 |     return;
156 |   }
157 |   classifier.print_score(host_predicted_test_labels.data(),
158 |                          host_expected_test_labels.data(), nb_test_obs);
159 | }
160 | 
161 | #endif  // EXAMPLE_SRC_MNIST_RUN_CLASSIFIER_HPP
162 | 


--------------------------------------------------------------------------------
/example/src/mnist/run_gauss_classifier.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) Codeplay Software Limited.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *   http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #include "ml/classifiers/bayes/bayes_classifier.hpp"
17 | #include "ml/classifiers/bayes/distributions/log_gaussian_distribution.hpp"
18 | #include "run_classifier.hpp"
19 | 
20 | int main(int argc, char** argv) {
21 |   std::string mnist_path = "data/mnist";
22 |   if (argc >= 2) {
23 |     mnist_path = argv[1];
24 |   }
25 | 
26 |   // Runs the gaussian classifier on MNIST with a PCA
27 |   using data_t = float;
28 |   using distribution_t = ml::buffered_log_gaussian_distribution<data_t>;
29 |   ml::pca_args<data_t> pca_args;
30 |   pca_args.min_nb_vecs = 64;    // Keep at least 64 basis vector
31 |   pca_args.keep_percent = 0.8;  // Keep at least 80% of information
32 |   pca_args.scale_factor = 1E2;  // More accurate but slower PCA
33 |   try {
34 |     run_classifier<ml::bayes_classifier<distribution_t, uint8_t>>(mnist_path,
35 |                                                                   pca_args);
36 |   } catch (cl::sycl::exception e) {
37 |     std::cerr << e.what();
38 |   }
39 | 
40 |   return 0;
41 | }
42 | 


--------------------------------------------------------------------------------
/example/src/mnist/run_gmm.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) Codeplay Software Limited.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *   http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #include "ml/classifiers/bayes/distributions/log_gaussian_distribution.hpp"
17 | #include "ml/classifiers/em/em_classifier.hpp"
18 | #include "ml/classifiers/em/log_model_per_label.hpp"
19 | #include "run_classifier.hpp"
20 | 
21 | int main(int argc, char** argv) {
22 |   std::string mnist_path = "data/mnist";
23 |   if (argc >= 2) {
24 |     mnist_path = argv[1];
25 |   }
26 | 
27 |   // Runs the GMM with 8 models per label on MNIST with a PCA
28 |   using data_t = float;
29 |   using label_t = uint8_t;
30 |   using distribution_t = ml::buffered_log_gaussian_distribution<data_t>;
31 |   static constexpr unsigned M = 8;
32 |   ml::pca_args<data_t> pca_args;
33 |   pca_args.min_nb_vecs = 64;    // Keep at least 64 basis vector
34 |   pca_args.keep_percent = 0.8;  // Keep at least 80% of information
35 |   pca_args.scale_factor = 1E2;  // More accurate but slower PCA
36 |   try {
37 |     run_classifier<
38 |         ml::em_classifier<label_t, ml::log_model_per_label<M, distribution_t>>>(
39 |         mnist_path, pca_args);
40 |   } catch (cl::sycl::exception e) {
41 |     std::cerr << e.what();
42 |   }
43 | 
44 |   return 0;
45 | }
46 | 


--------------------------------------------------------------------------------
/example/src/mnist/run_lin_classifier.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) Codeplay Software Limited.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *   http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #include "ml/classifiers/bayes/linear_classifier.hpp"
17 | #include "run_classifier.hpp"
18 | 
19 | int main(int argc, char** argv) {
20 |   std::string mnist_path = "data/mnist";
21 |   if (argc >= 2) {
22 |     mnist_path = argv[1];
23 |   }
24 |   // Runs the linear classifier on MNIST with a PCA
25 |   using data_t = float;
26 |   ml::pca_args<data_t> pca_args;
27 |   pca_args.min_nb_vecs = 128;   // Keep at least 128 basis vector
28 |   pca_args.keep_percent = 0.8;  // Keep at least 80% of information
29 |   pca_args.scale_factor = 1E2;  // More accurate but slower PCA
30 |   try {
31 |     run_classifier<ml::linear_classifier<data_t, uint8_t>>(mnist_path,
32 |                                                            pca_args);
33 |   } catch (cl::sycl::exception e) {
34 |     std::cerr << e.what();
35 |   }
36 | 
37 |   return 0;
38 | }
39 | 


--------------------------------------------------------------------------------
/example/src/mnist/run_svm.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) Codeplay Software Limited.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *   http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #include "ml/classifiers/svm/svm.hpp"
17 | #include "run_classifier.hpp"
18 | 
19 | int main(int argc, char** argv) {
20 |   std::string mnist_path = "data/mnist";
21 |   if (argc >= 2) {
22 |     mnist_path = argv[1];
23 |   }
24 | 
25 |   // Runs the SVM with the RBF kernel on MNIST with a PCA.
26 |   // The SVM will store 2 rows of the kernel matrix and has a tolerance of 0.1
27 |   using data_t = float;
28 |   using label_t = uint8_t;
29 |   using svm_kernel_t = ml::svm_rbf_kernel<data_t>;
30 | 
31 |   const data_t C = 5;            // Parameter of a C-SVM
32 |   const svm_kernel_t ker(0.05);  // Parameter of the RBF kernel
33 | 
34 |   ml::pca_args<data_t> pca_args;
35 |   pca_args.min_nb_vecs = 64;    // Keep at least 64 basis vector
36 |   pca_args.keep_percent = 0.8;  // Keep at least 80% of information
37 |   pca_args.scale_factor = 1E2;  // More accurate but slower PCA
38 | 
39 |   try {
40 |     run_classifier(mnist_path, pca_args,
41 |                    ml::svm<svm_kernel_t, label_t>(C, ker, 2, 0.1, 0.1));
42 |   } catch (cl::sycl::exception e) {
43 |     std::cerr << e.what();
44 |   }
45 | 
46 |   return 0;
47 | }
48 | 


--------------------------------------------------------------------------------
/example/src/utils/scoped_timer.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) Codeplay Software Limited.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *   http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #ifndef EXAMPLE_SRC_UTILS_SCOPED_TIMER_HPP
17 | #define EXAMPLE_SRC_UTILS_SCOPED_TIMER_HPP
18 | 
19 | #include <chrono>
20 | #include <iostream>
21 | #include <string>
22 | 
23 | /**
24 |  * @brief scoped_timer measures and print the time between the creation and
25 |  * destruction of the object. Also print with an indentation when several
26 |  * scoped_timer are used.
27 |  */
28 | class scoped_timer {
29 |   using sc = std::chrono::high_resolution_clock;
30 | 
31 |  public:
32 |   scoped_timer(const std::string& name) : _name(name) {
33 |     std::cout << std::string(indent, ' ') << "Starting " << _name << std::endl;
34 |     indent += 2;
35 |     _t0 = sc::now();
36 |   }
37 | 
38 |   ~scoped_timer() {
39 |     std::chrono::duration<double> diff = sc::now() - _t0;
40 |     indent -= 2;
41 |     std::cout << std::string(indent, ' ') << _name << ": " << diff.count()
42 |               << "s" << std::endl;
43 |   }
44 | 
45 |  private:
46 |   static unsigned indent;
47 |   std::string _name;
48 |   sc::time_point _t0;
49 | };
50 | 
51 | unsigned scoped_timer::indent = 0;
52 | 
53 | /**
54 |  * @brief Create a timer and mark the variable unused.
55 |  */
56 | #define TIME(name)                   \
57 |   scoped_timer _timer_##name(#name); \
58 |   (void) _timer_##name
59 | 
60 | #endif  // EXAMPLE_SRC_UTILS_SCOPED_TIMER_HPP
61 | 


--------------------------------------------------------------------------------
/example/src/utils/sycl_utils.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) Codeplay Software Limited.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *   http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #ifndef EXAMPLE_SRC_UTILS_SYCL_UTILS_HPP
17 | #define EXAMPLE_SRC_UTILS_SYCL_UTILS_HPP
18 | 
19 | #include "ml/utils/common.hpp"
20 | 
21 | class init_first_kernel;
22 | 
23 | /**
24 |  * @brief Used to avoid measuring OpenCL initialization overhead
25 |  * @param q
26 |  */
27 | void launch_first_kernel(cl::sycl::queue& q) {
28 |   q.submit([](cl::sycl::handler& cgh) {
29 |     cgh.single_task<init_first_kernel>([]() {});
30 |   });
31 | }
32 | 
33 | /**
34 |  * @brief Initialize device_constants and return the queue.
35 |  * @return the sycl queue
36 |  */
37 | cl::sycl::queue& create_queue() {
38 |   ml::device_constants<>::instance = new ml::device_constants<>();
39 |   auto& q = ml::get_eigen_device().sycl_queue();
40 |   launch_first_kernel(q);
41 |   return q;
42 | }
43 | 
44 | /**
45 |  * @brief Free the singleton device_constants.
46 |  */
47 | void clear_eigen_device() {
48 |   delete ml::get_device_constants();
49 | }
50 | 
51 | #endif  // EXAMPLE_SRC_UTILS_SYCL_UTILS_HPP
52 | 


--------------------------------------------------------------------------------
/include/ml/classifiers/bayes/bayes_classifier.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) Codeplay Software Limited.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *   http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #ifndef INCLUDE_ML_CLASSIFIERS_BAYES_BAYES_CLASSIFIER_HPP
17 | #define INCLUDE_ML_CLASSIFIERS_BAYES_BAYES_CLASSIFIER_HPP
18 | 
19 | #include "ml/classifiers/data_splitter_extremum_dist.hpp"
20 | 
21 | namespace ml {
22 | 
23 | /**
24 |  * @brief Naive Bayes Classifier
25 |  *
26 |  * Compute the parameters of a distribution during the training.
27 |  * Use the parameters during the inference.
28 |  *
29 |  * @tparam DistributionT
30 |  * @tparam LabelT
31 |  */
32 | template <class DistributionT, class LabelT>
33 | class bayes_classifier
34 |     : public data_splitter_extremum_dist<typename DistributionT::DataType,
35 |                                          LabelT, GREATER> {
36 |  public:
37 |   using DataType = typename DistributionT::DataType;
38 | 
39 |  protected:
40 |   std::vector<DistributionT> _distributions;
41 | 
42 |   virtual void train_setup_for_each_label(queue& q) override {
43 |     data_splitter_extremum_dist<DataType, LabelT,
44 |                                 GREATER>::train_setup_for_each_label(q);
45 | 
46 |     range<1> data_dim_rng(this->_data_dim);
47 |     auto data_dim_pow2_rng = get_optimal_nd_range(this->_data_dim_pow2);
48 |     range<2> data_dim_rng_d2(this->_data_dim, this->_data_dim);
49 |     auto data_dim_pow2_rng_d2 =
50 |         get_optimal_nd_range(this->_data_dim_pow2, this->_data_dim_pow2);
51 | 
52 |     auto nb_labels = this->get_nb_labels();
53 |     for (unsigned l = 0; l < nb_labels; ++l) {
54 |       _distributions.emplace_back();
55 |       _distributions.back().init(data_dim_rng, data_dim_pow2_rng,
56 |                                  data_dim_rng_d2, data_dim_pow2_rng_d2);
57 |     }
58 |   }
59 | 
60 |   virtual void train_for_each_label(queue& q, unsigned label_idx,
61 |                                     matrix_t<DataType>& act_data) override {
62 |     _distributions[label_idx].compute(q, act_data);
63 |   }
64 | 
65 |   virtual void compute_dist(queue& q, matrix_t<DataType>& dataset,
66 |                             matrix_t<DataType>& dist) override {
67 |     auto nb_labels = this->get_nb_labels();
68 |     for (SYCLIndexT l = 0; l < nb_labels; ++l) {
69 |       auto dist_row = dist.get_row(l);
70 |       _distributions[l].compute_dist(q, dataset, dist_row);
71 |     }
72 |   }
73 | };
74 | 
75 | }  // namespace ml
76 | 
77 | #endif  // INCLUDE_ML_CLASSIFIERS_BAYES_BAYES_CLASSIFIER_HPP
78 | 


--------------------------------------------------------------------------------
/include/ml/classifiers/bayes/linear_classifier.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) Codeplay Software Limited.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *   http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #ifndef INCLUDE_ML_CLASSIFIERS_BAYES_LINEAR_CLASSIFIER_HPP
17 | #define INCLUDE_ML_CLASSIFIERS_BAYES_LINEAR_CLASSIFIER_HPP
18 | 
19 | #include "ml/classifiers/data_splitter_extremum_dist.hpp"
20 | 
21 | namespace ml {
22 | 
23 | /**
24 |  * @brief Naive Bayes Classifier with a linear function.
25 |  *
26 |  * During the training, compute the average for each label.
27 |  * The distance is the Euclidean distance between the learned average and the
28 |  * given sample. The index of the smallest distance then gives the predicted
29 |  * label.
30 |  *
31 |  * The linear_classifier could be written using the bayes_classifier but is
32 |  * simpler and faster this way.
33 |  *
34 |  * @tparam DataT
35 |  * @tparam LabelT
36 |  */
37 | template <class DataT, class LabelT>
38 | class linear_classifier : public data_splitter_extremum_dist<DataT, LabelT> {
39 |  protected:
40 |   vector_t<DataT> _act_data_avg;
41 |   matrix_t<DataT> _data_avg_per_label;
42 | 
43 |   virtual void train_setup_for_each_label(queue& q) override {
44 |     data_splitter_extremum_dist<DataT, LabelT>::train_setup_for_each_label(q);
45 | 
46 |     auto nb_labels = this->get_nb_labels();
47 |     _act_data_avg = vector_t<DataT>(range<1>(this->_data_dim),
48 |                                     get_optimal_nd_range(this->_data_dim_pow2));
49 |     _data_avg_per_label =
50 |         matrix_t<DataT>(range<2>(nb_labels, this->_data_dim),
51 |                         get_optimal_nd_range(nb_labels, this->_data_dim_pow2));
52 |   }
53 | 
54 |   virtual void train_for_each_label(queue& q, unsigned label_idx,
55 |                                     matrix_t<DataT>& act_data) override {
56 |     avg(q, act_data, _act_data_avg);
57 |     copy_vec_to_mat<ROW, access::mode::discard_write>(
58 |         q, _data_avg_per_label, _act_data_avg, _act_data_avg.kernel_range,
59 |         static_cast<SYCLIndexT>(label_idx));
60 |   }
61 | 
62 |   virtual void compute_dist(queue&, matrix_t<DataT>& dataset,
63 |                             matrix_t<DataT>& dist) override {
64 |     // Sum squared each pixel
65 |     eig_index_t nb_labels = static_cast<eig_index_t>(access_data_dim(dist, 0));
66 |     eig_index_t nb_obs = static_cast<eig_index_t>(access_data_dim(dataset, 0));
67 |     eig_index_t data_dim_pow2 = static_cast<eig_index_t>(this->_data_dim_pow2);
68 | 
69 |     auto eig_dataset = sycl_to_eigen(dataset);
70 |     auto eig_data_avg_per_label = sycl_to_eigen(_data_avg_per_label);
71 | 
72 |     auto eig_dist = sycl_to_eigen(dist);
73 |     auto dataset_3d = eig_dataset.tensor()
74 |                           .reshape(eig_dims_t<3>{nb_obs, 1, data_dim_pow2})
75 |                           .broadcast(eig_dims_t<3>{1, nb_labels, 1});
76 |     auto data_avg_per_label_3d =
77 |         eig_data_avg_per_label.tensor()
78 |             .reshape(eig_dims_t<3>{1, nb_labels, data_dim_pow2})
79 |             .broadcast(eig_dims_t<3>{nb_obs, 1, 1});
80 |     auto sliced_dist = eig_dist.tensor().slice(
81 |         eig_dsize_t<2>{0, 0}, eig_dsize_t<2>(nb_labels, nb_obs));
82 |     sliced_dist.device(get_eigen_device()) =
83 |         (dataset_3d - data_avg_per_label_3d)
84 |             .square()
85 |             .sum(eig_dims_t<1>{2})
86 |             .shuffle(eig_dims_t<2>{1, 0});
87 |   }
88 | };
89 | 
90 | }  // namespace ml
91 | 
92 | #endif  // INCLUDE_ML_CLASSIFIERS_BAYES_LINEAR_CLASSIFIER_HPP
93 | 


--------------------------------------------------------------------------------
/include/ml/classifiers/classifier.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) Codeplay Software Limited.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *   http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | #ifndef INCLUDE_ML_CLASSIFIERS_CLASSIFIER_HPP
 17 | #define INCLUDE_ML_CLASSIFIERS_CLASSIFIER_HPP
 18 | 
 19 | #include <iomanip>
 20 | #include <iostream>
 21 | #include <unordered_map>
 22 | #include <vector>
 23 | 
 24 | #include "ml/utils/common.hpp"
 25 | 
 26 | namespace ml {
 27 | 
 28 | /**
 29 |  * @brief Abstract class for all classifiers.
 30 |  *
 31 |  * @tparam DataT type of the dataset
 32 |  * @tparam LabelT type of the labels
 33 |  */
 34 | template <class DataT, class LabelT>
 35 | class classifier {
 36 |  public:
 37 |   using DataType = DataT;
 38 |   using LabelType = LabelT;
 39 | 
 40 |   /**
 41 |    * @brief Train the classifier.
 42 |    *
 43 |    * @param q
 44 |    * @param dataset
 45 |    * @param labels
 46 |    * @param nb_labels number of different labels, must be set if set_label_set
 47 |    * has not been called
 48 |    */
 49 |   virtual void train(queue& q, matrix_t<DataT>& dataset,
 50 |                      std::vector<LabelT>& labels, unsigned nb_labels = 0) = 0;
 51 | 
 52 |   /**
 53 |    * @brief Predict labels with the given observations.
 54 |    *
 55 |    * @param q
 56 |    * @param dataset
 57 |    * @return the predicted labels
 58 |    */
 59 |   virtual vector_t<LabelT> predict(queue& q, matrix_t<DataT>& dataset) = 0;
 60 | 
 61 |   /**
 62 |    * @brief Print statistics about the predicted labels.
 63 |    *
 64 |    * Compute and print the confusion matrix as well as the success rate,
 65 |    * precision, recall and F1-score.
 66 |    *
 67 |    * @param[in] predicted
 68 |    * @param[in] expected
 69 |    * @param nb_obs
 70 |    */
 71 |   static void print_score(
 72 |       const LabelT* predicted, const LabelT* expected, unsigned nb_obs,
 73 |       unsigned nb_labels,
 74 |       const std::unordered_map<LabelT, unsigned>& label_user_to_label_idx) {
 75 |     std::vector<unsigned> cm(nb_labels * nb_labels, 0);
 76 |     for (unsigned i = 0; i < nb_obs; ++i) {
 77 |       cm[label_user_to_label_idx.at(expected[i]) * nb_labels +
 78 |          label_user_to_label_idx.at(predicted[i])] += 1;
 79 |     }
 80 | 
 81 |     double success_rate = 0;
 82 |     double precision = 0;
 83 |     double recall = 0;
 84 |     double sum_row;
 85 |     double sum_col;
 86 |     double diag_val;
 87 |     for (unsigned i = 0; i < nb_labels; ++i) {
 88 |       sum_row = 0;
 89 |       sum_col = 0;
 90 |       for (unsigned j = 0; j < nb_labels; ++j) {
 91 |         sum_row += cm[i * nb_labels + j];
 92 |         sum_col += cm[j * nb_labels + i];
 93 |       }
 94 | 
 95 |       diag_val = cm[i * nb_labels + i];
 96 |       success_rate += diag_val;
 97 |       precision += diag_val / sum_row;
 98 |       recall += diag_val / sum_col;
 99 |     }
100 | 
101 |     success_rate /= nb_obs;
102 |     precision /= nb_labels;
103 |     recall /= nb_labels;
104 | 
105 |     double f1_score = 2 * (precision * recall) / (precision + recall);
106 | 
107 |     std::cout << "\nSuccess rate: " << success_rate * 100 << "%\n";
108 |     std::cout << "Precision: " << precision * 100 << "%\n";
109 |     std::cout << "Recall: " << recall * 100 << "%\n";
110 |     std::cout << "F1-score: " << f1_score << "\n\n";
111 | 
112 |     std::cout << "Confusion matrix:\n";
113 |     char prev_fill = std::cout.fill(' ');
114 |     for (unsigned i = 0; i < nb_labels; ++i) {
115 |       for (unsigned j = 0; j < nb_labels; ++j) {
116 |         if (j < nb_labels - 1) {
117 |           std::cout << std::left << std::setw(5) << cm[i * nb_labels + j]
118 |                     << ' ';
119 |         } else {
120 |           std::cout << std::left << cm[i * nb_labels + j] << '\n';
121 |         }
122 |       }
123 |     }
124 |     std::cout.fill(prev_fill);
125 |   }
126 | 
127 |   inline void print_score(const LabelT* predicted, const LabelT* expected,
128 |                           unsigned nb_obs) {
129 |     classifier<DataT, LabelT>::print_score(
130 |         predicted, expected, nb_obs, get_nb_labels(), _label_user_to_label_idx);
131 |   }
132 | 
133 |   virtual void load_from_disk(queue&) { assert(false); }
134 |   virtual void save_to_disk(queue&) { assert(false); }
135 | 
136 |   inline unsigned get_nb_labels() const {
137 |     return _host_label_idx_to_label_user.size();
138 |   }
139 | 
140 |   /**
141 |    * @brief Give the set of labels instead of computing it during the training.
142 |    *
143 |    * Optional function called before the training.
144 |    *
145 |    * @tparam LabelSet any type with a begin and end method used for copy
146 |    * @param[in] label_set
147 |    */
148 |   template <class LabelSet>
149 |   void set_label_set(const LabelSet& label_set) {
150 |     std::copy(label_set.begin(), label_set.end(),
151 |               std::back_inserter(_host_label_idx_to_label_user));
152 |     assert(_host_label_idx_to_label_user.size() > 0);
153 |     setup_host_label_idx_to_label_user();
154 |   }
155 | 
156 |   /**
157 |    * @brief Compute the list of indexes of each labels.
158 |    *
159 |    * @tparam HostLabelsT any type with a squared bracket accessor
160 |    * @param host_labels
161 |    * @param nb_labels number of different labels
162 |    * @param nb_obs number of element in host_labels
163 |    * @return labels_indices
164 |    */
165 |   template <class HostLabelsT>
166 |   std::vector<std::vector<SYCLIndexT>> get_labels_indices(
167 |       const HostLabelsT& host_labels, unsigned nb_labels, unsigned nb_obs) {
168 |     std::vector<std::vector<SYCLIndexT>> labels_indices(nb_labels);
169 |     for (unsigned i = 0; i < nb_obs; ++i) {
170 |       labels_indices[this->_label_user_to_label_idx[host_labels[i]]].push_back(
171 |           i);
172 |     }
173 |     return labels_indices;
174 |   }
175 | 
176 |  protected:
177 |   std::vector<LabelT> _host_label_idx_to_label_user;
178 |   vector_t<LabelT> _label_idx_to_label_user;
179 |   std::unordered_map<LabelT, unsigned> _label_user_to_label_idx;
180 | 
181 |   /**
182 |    * @brief Fill _label_idx_to_label_user and _label_user_to_label_idx
183 |    *
184 |    * @tparam HostLabelT any type accessible with square brackets
185 |    * @param[in] labels
186 |    * @param nb_labels
187 |    */
188 |   template <class HostLabelT>
189 |   void process_labels(const HostLabelT& host_labels, unsigned nb_labels) {
190 |     // Labels have been set by the user beforehand
191 |     if (!_label_user_to_label_idx.empty()) {
192 |       return;
193 |     }
194 | 
195 |     // Find all different labels
196 |     _host_label_idx_to_label_user.reserve(nb_labels);
197 |     for (unsigned i = 0; _host_label_idx_to_label_user.size() < nb_labels;
198 |          ++i) {
199 |       auto user_label = host_labels[i];
200 |       auto it = std::find(_host_label_idx_to_label_user.begin(),
201 |                           _host_label_idx_to_label_user.end(), user_label);
202 |       if (it == _host_label_idx_to_label_user.end()) {
203 |         _host_label_idx_to_label_user.push_back(user_label);
204 |       }
205 |     }
206 |     std::sort(_host_label_idx_to_label_user.begin(),
207 |               _host_label_idx_to_label_user.end());
208 |     setup_host_label_idx_to_label_user();
209 |   }
210 | 
211 |   /**
212 |    * @brief Copy _host_label_idx_to_label_user to the device and to
213 |    * _label_user_to_label_idx.
214 |    */
215 |   void setup_host_label_idx_to_label_user() {
216 |     auto nb_labels = _host_label_idx_to_label_user.size();
217 |     _label_idx_to_label_user = vector_t<LabelT>(
218 |         const_cast<const LabelT*>(_host_label_idx_to_label_user.data()),
219 |         range<1>(nb_labels));
220 | 
221 |     // Map label user back to label idx
222 |     for (unsigned i = 0; i < nb_labels; ++i) {
223 |       _label_user_to_label_idx[_host_label_idx_to_label_user[i]] = i;
224 |     }
225 |   }
226 | 
227 |   /**
228 |    * @brief Check that nb_labels was given or set_label_set has been called.
229 |    *
230 |    * @param[in, out] nb_labels
231 |    * @return true if nb_labels was given or set_label_set has been called
232 |    */
233 |   bool check_nb_labels(unsigned& nb_labels) {
234 |     if (nb_labels == 0) {
235 |       nb_labels = get_nb_labels();
236 |       if (nb_labels == 0) {
237 |         std::cerr << "Error: set_label_set must be called before training if "
238 |                      "nb_labels is 0."
239 |                   << std::endl;
240 |         return false;
241 |       }
242 |     }
243 |     if (nb_labels == 1) {
244 |       std::cerr << "Error: must have more than one label." << std::endl;
245 |       return false;
246 |     }
247 |     return true;
248 |   }
249 | };
250 | 
251 | }  // namespace ml
252 | 
253 | #endif  // INCLUDE_ML_CLASSIFIERS_CLASSIFIER_HPP
254 | 


--------------------------------------------------------------------------------
/include/ml/classifiers/data_splitter.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) Codeplay Software Limited.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *   http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #ifndef INCLUDE_ML_CLASSIFIERS_DATA_SPLITTER_HPP
17 | #define INCLUDE_ML_CLASSIFIERS_DATA_SPLITTER_HPP
18 | 
19 | #include <algorithm>
20 | #include <vector>
21 | 
22 | #include "ml/classifiers/classifier.hpp"
23 | #include "ml/math/mat_ops.hpp"
24 | 
25 | namespace ml {
26 | 
27 | /**
28 |  * @brief Abstract class of all classifiers needing to split the data for each
29 |  * label.
30 |  *
31 |  * @tparam DataT
32 |  * @tparam LabelT
33 |  */
34 | template <class DataT, class LabelT>
35 | class data_splitter : public virtual classifier<DataT, LabelT> {
36 |   template <int Index, typename... Details>
37 |   using NameGenDS = NameGen<Index, data_splitter, Details..., DataT, LabelT>;
38 | 
39 |  public:
40 |   /**
41 |    * @brief Call train_for_each_label with a sub-dataset.
42 |    *
43 |    * Assumes labels are integers in [min(labels), max(labels)]
44 |    *
45 |    * @param q
46 |    * @param dataset
47 |    * @param labels
48 |    * @param nb_labels number of different labels
49 |    */
50 |   virtual void train(queue& q, matrix_t<DataT>& dataset,
51 |                      std::vector<LabelT>& host_labels,
52 |                      unsigned nb_labels = 0) override {
53 |     if (!this->check_nb_labels(nb_labels)) {
54 |       return;
55 |     }
56 | 
57 |     auto nb_obs = access_data_dim(dataset, 0);
58 |     assert_eq(nb_obs, host_labels.size());
59 | 
60 |     _data_dim = access_data_dim(dataset, 1);
61 |     _data_dim_pow2 = access_ker_dim(dataset, 1);
62 | 
63 |     this->process_labels(host_labels, nb_labels);
64 | 
65 |     // Compute indices for each labels
66 |     auto labels_indices =
67 |         this->get_labels_indices(host_labels, nb_labels, nb_obs);
68 | 
69 |     // Train for each label
70 |     train_setup_for_each_label(q);
71 |     for (unsigned i = 0; i < nb_labels; ++i) {
72 |       const auto& act_labels_indices = labels_indices[i];
73 |       auto act_data = split_by_index(q, dataset, act_labels_indices);
74 |       train_for_each_label(q, i, act_data);
75 |     }
76 |   }
77 | 
78 |  protected:
79 |   SYCLIndexT _data_dim;
80 |   SYCLIndexT _data_dim_pow2;
81 | 
82 |   virtual void train_setup_for_each_label(queue&) {}
83 |   virtual void train_for_each_label(queue& q, unsigned label_idx,
84 |                                     matrix_t<DataT>& act_data) = 0;
85 | };
86 | 
87 | }  // namespace ml
88 | 
89 | #endif  // INCLUDE_ML_CLASSIFIERS_DATA_SPLITTER_HPP
90 | 


--------------------------------------------------------------------------------
/include/ml/classifiers/data_splitter_extremum_dist.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) Codeplay Software Limited.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *   http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #ifndef INCLUDE_ML_CLASSIFIERS_DATA_SPLITTER_MIN_DIST_HPP
17 | #define INCLUDE_ML_CLASSIFIERS_DATA_SPLITTER_MIN_DIST_HPP
18 | 
19 | #include "ml/classifiers/data_splitter.hpp"
20 | #include "ml/classifiers/extremum_dist.hpp"
21 | 
22 | namespace ml {
23 | 
24 | /**
25 |  * @brief Abstract class regrouping the data_splitter and extremum_dist classes.
26 |  *
27 |  * @tparam DataT
28 |  * @tparam LabelT
29 |  * @tparam Compare minimize or maximize the computed distance
30 |  */
31 | template <class DataT, class LabelT, extremum_dist_compare Compare = LESS>
32 | class data_splitter_extremum_dist
33 |     : public data_splitter<DataT, LabelT>
34 |     , public extremum_dist<DataT, LabelT, Compare> {
35 |  protected:
36 |   inline virtual void train_setup_for_each_label(queue&) override {
37 |     this->_predict_data_dim_assert = this->_data_dim;
38 |   }
39 | };
40 | 
41 | }  // namespace ml
42 | 
43 | #endif  // INCLUDE_ML_CLASSIFIERS_DATA_SPLITTER_MIN_DIST_HPP
44 | 


--------------------------------------------------------------------------------
/include/ml/classifiers/em/em_classifier.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) Codeplay Software Limited.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *   http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #ifndef INCLUDE_ML_CLASSIFIERS_EM_EM_CLASSIFIER_HPP
17 | #define INCLUDE_ML_CLASSIFIERS_EM_EM_CLASSIFIER_HPP
18 | 
19 | #include <vector>
20 | 
21 | #include "ml/classifiers/data_splitter_extremum_dist.hpp"
22 | 
23 | namespace ml {
24 | 
25 | /**
26 |  * @brief Classifier using the EM algorithm.
27 |  *
28 |  * If used with log_model_per_label and log_gaussian_distribution this
29 |  * implements a GMM. The GMM learn M models per label (instead of M=1 with a
30 |  * GaussClassifier).
31 |  *
32 |  * @see log_model_per_label
33 |  * @tparam LabelT
34 |  * @tparam ModelPerLabelT type of the model to use
35 |  */
36 | template <class LabelT, class ModelPerLabelT>
37 | class em_classifier
38 |     : public data_splitter_extremum_dist<typename ModelPerLabelT::DataType,
39 |                                          LabelT, GREATER> {
40 |  public:
41 |   using DataType = typename ModelPerLabelT::DataType;
42 | 
43 |   em_classifier(ModelPerLabelT model_impl = ModelPerLabelT())
44 |       : data_splitter_extremum_dist<typename ModelPerLabelT::DataType, LabelT,
45 |                                     GREATER>(),
46 |         _model_impl(model_impl) {}
47 | 
48 |   virtual void load_from_disk(queue& q) override {
49 |     for (unsigned i = 0; i < this->get_nb_labels(); ++i) {
50 |       _ems[i].load_from_disk(q);
51 |     }
52 |   }
53 | 
54 |   virtual void save_to_disk(queue& q) override {
55 |     for (unsigned i = 0; i < this->get_nb_labels(); ++i) {
56 |       _ems[i].save_to_disk(q);
57 |     }
58 |   }
59 | 
60 |  protected:
61 |   std::vector<ModelPerLabelT> _ems;
62 | 
63 |   virtual void train_setup_for_each_label(queue& q) override {
64 |     data_splitter_extremum_dist<DataType, LabelT,
65 |                                 GREATER>::train_setup_for_each_label(q);
66 | 
67 |     for (unsigned i = 0; i < this->get_nb_labels(); ++i) {
68 |       _ems.push_back(_model_impl);  // Copy model parameters
69 |       _ems.back().set_idx(i);
70 |     }
71 |   }
72 | 
73 |   virtual inline void train_for_each_label(
74 |       queue& q, unsigned label_idx, matrix_t<DataType>& act_data) override {
75 |     _ems[label_idx].train(q, act_data);
76 |   }
77 | 
78 |   virtual void compute_dist(queue& q, matrix_t<DataType>& dataset,
79 |                             matrix_t<DataType>& dist) override {
80 |     for (unsigned label_idx = 0; label_idx < this->get_nb_labels();
81 |          ++label_idx) {
82 |       auto dist_row = dist.get_row(label_idx);
83 |       _ems[label_idx].compute_llk(q, dataset, dist_row);
84 |     }
85 |   }
86 | 
87 |  private:
88 |   ModelPerLabelT _model_impl;
89 | };
90 | 
91 | }  // namespace ml
92 | 
93 | #endif  // INCLUDE_ML_CLASSIFIERS_EM_EM_CLASSIFIER_HPP
94 | 


--------------------------------------------------------------------------------
/include/ml/classifiers/extremum_dist.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) Codeplay Software Limited.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *   http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | #ifndef INCLUDE_ML_CLASSIFIERS_MIN_DIST_HPP
 17 | #define INCLUDE_ML_CLASSIFIERS_MIN_DIST_HPP
 18 | 
 19 | #include "ml/classifiers/classifier.hpp"
 20 | #include "ml/math/mat_ops.hpp"
 21 | 
 22 | namespace ml {
 23 | 
 24 | /**
 25 |  * @brief Determine whether the classifier should minimize or maximize the
 26 |  * computed distance.
 27 |  */
 28 | enum extremum_dist_compare { LESS, GREATER };
 29 | 
 30 | namespace detail {
 31 | 
 32 | template <extremum_dist_compare Compare>
 33 | struct compare_detail;
 34 | 
 35 | template <>
 36 | struct compare_detail<LESS> {
 37 |   template <class T>
 38 |   using Op = std::less<T>;
 39 |   static constexpr int SIGN = -1;
 40 | };
 41 | 
 42 | template <>
 43 | struct compare_detail<GREATER> {
 44 |   template <class T>
 45 |   using Op = std::greater<T>;
 46 |   static constexpr int SIGN = 1;
 47 | };
 48 | 
 49 | }  // namespace detail
 50 | 
 51 | /**
 52 |  * @brief Abstract class of all classifiers minimizing or maximizing a distance.
 53 |  *
 54 |  * @tparam DataT
 55 |  * @tparam LabelT
 56 |  * @tparam Compare minimize or maximize the computed distance
 57 |  */
 58 | template <class DataT, class LabelT, extremum_dist_compare Compare = LESS>
 59 | class extremum_dist : public virtual classifier<DataT, LabelT> {
 60 |  protected:
 61 |   using Op = typename detail::compare_detail<Compare>::template Op<DataT>;
 62 | 
 63 |   static constexpr DataT SIGN =
 64 |       static_cast<DataT>(detail::compare_detail<Compare>::SIGN);
 65 |   SYCLIndexT _predict_data_dim_assert;
 66 | 
 67 |   virtual void compute_dist(queue& q, matrix_t<DataT>& dataset,
 68 |                             matrix_t<DataT>& dist) = 0;
 69 | 
 70 |   template <int Index, typename... Details>
 71 |   using NameGenED = NameGen<Index, Details..., DataT, LabelT, Op>;
 72 | 
 73 |  public:
 74 |   virtual vector_t<LabelT> predict(queue& q,
 75 |                                    matrix_t<DataT>& dataset) override {
 76 |     assert_eq(access_data_dim(dataset, 1), this->_predict_data_dim_assert);
 77 | 
 78 |     auto nb_labels = this->get_nb_labels();
 79 |     auto nb_obs = access_data_dim(dataset, 0);
 80 |     auto padded_nb_obs =
 81 |         get_device_constants()->pad_sub_buffer_size<DataT>(nb_obs);
 82 |     // The pad between nb_obs and padded_nb_obs can be left uninitialized.
 83 |     // It will produce random values in predicted_labels which shouldn't be
 84 |     // read.
 85 |     matrix_t<DataT> dist(range<2>(nb_labels, nb_obs),
 86 |                          get_optimal_nd_range(nb_labels, padded_nb_obs));
 87 |     compute_dist(q, dataset, dist);
 88 | 
 89 |     // Find extremum dist for each column
 90 |     vector_t<LabelT> predicted_labels(range<1>(nb_obs),
 91 |                                       get_optimal_nd_range(padded_nb_obs));
 92 |     q.submit([this, &dist, &predicted_labels, nb_labels](handler& cgh) {
 93 |       auto dist_acc = dist.template get_access_2d<access::mode::read>(cgh);
 94 |       auto label_idx_to_user_acc =
 95 |           this->_label_idx_to_label_user
 96 |               .template get_access_1d<access::mode::read>(cgh);
 97 |       auto predicted_labels_acc =
 98 |           predicted_labels.template get_access_1d<access::mode::discard_write>(
 99 |               cgh);
100 |       cgh.parallel_for<NameGenED<0>>(
101 |           predicted_labels.get_nd_range(), [=](nd_item<1> item) {
102 |             auto col = item.get_global_id(0);
103 |             auto extremum_index = 0;
104 |             auto extremum_dist = dist_acc(extremum_index, col);
105 |             for (unsigned i = 1; i < nb_labels; ++i) {  // Loop is small enough
106 |               if (Op()(dist_acc(i, col), extremum_dist)) {
107 |                 extremum_dist = dist_acc(i, col);
108 |                 extremum_index = i;
109 |               }
110 |             }
111 |             predicted_labels_acc(col) = label_idx_to_user_acc(extremum_index);
112 |           });
113 |     });
114 | 
115 |     return predicted_labels;
116 |   }
117 | };
118 | 
119 | }  // namespace ml
120 | 
121 | #endif  // INCLUDE_ML_CLASSIFIERS_MIN_DIST_HPP
122 | 


--------------------------------------------------------------------------------
/include/ml/classifiers/svm/kernel_cache.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) Codeplay Software Limited.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *   http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | #ifndef INCLUDE_ML_CLASSIFIERS_SVM_KERNEL_CACHE_HPP
 17 | #define INCLUDE_ML_CLASSIFIERS_SVM_KERNEL_CACHE_HPP
 18 | 
 19 | #include <list>
 20 | #include <set>
 21 | #include <unordered_map>
 22 | #include <vector>
 23 | 
 24 | #include "ml/classifiers/svm/svm_kernels.hpp"
 25 | 
 26 | namespace ml {
 27 | 
 28 | namespace detail {
 29 | 
 30 | /**
 31 |  * @brief Cache either the whole kernel matrix or only the last row used.
 32 |  *
 33 |  * @tparam KerFun
 34 |  * @tparam T
 35 |  */
 36 | template <class KerFun, class T>
 37 | class kernel_cache {
 38 |  public:
 39 |   kernel_cache(queue& q, const KerFun& ker, matrix_t<T>& x,
 40 |                const range<1>& data_rng, const nd_range<1>& ker_rng)
 41 |       : _q(q), _ker(ker), _x(x), _ker_diag_buf(data_rng, ker_rng) {
 42 |     // Compute the diagonal values of ker only once
 43 |     ker(q, x, _ker_diag_buf);
 44 |     auto m = access_ker_dim(x, 0);
 45 |     auto padded_m = to_pow2(m);
 46 |     auto pad_size_rng = get_optimal_nd_range(range<1>(padded_m - m), id<1>(m));
 47 |     if (pad_size_rng.get_global_linear_range() > 0) {
 48 |       sycl_memset(q, _ker_diag_buf, pad_size_rng);
 49 |     }
 50 |   }
 51 | 
 52 |   virtual vector_t<T> get_ker_row(SYCLIndexT row) = 0;
 53 | 
 54 |   inline vector_t<T>& get_ker_diag() { return _ker_diag_buf; }
 55 |   inline T get_ker_diag(SYCLIndexT row) {
 56 |     return _ker_diag_buf.read_to_host(row);
 57 |   }
 58 | 
 59 |  protected:
 60 |   queue& _q;
 61 |   const KerFun& _ker;
 62 | 
 63 |   matrix_t<T>& _x;
 64 |   vector_t<T> _ker_diag_buf;  // diagonal of kernel matrix
 65 | };
 66 | 
 67 | /**
 68 |  * @brief Compute the whole kernel matrix once
 69 |  *
 70 |  * If resulting matrix is too big, use kernel_cache_row instead.
 71 |  *
 72 |  * @tparam KerFun
 73 |  * @tparam T
 74 |  */
 75 | template <class KerFun, class T>
 76 | class kernel_cache_matrix : public kernel_cache<KerFun, T> {
 77 |  public:
 78 |   kernel_cache_matrix(queue& q, const KerFun& ker, matrix_t<T>& x,
 79 |                       const range<1>& data_rng, const nd_range<1>& ker_rng)
 80 |       : kernel_cache<KerFun, T>(q, ker, x, data_rng, ker_rng), _ker_mat() {
 81 |     auto nb_obs = access_ker_dim(x, 0);
 82 |     auto padded_nb_obs = get_device_constants()->pad_sub_buffer_size<T>(nb_obs);
 83 |     _ker_mat = matrix_t<T>(range<2>(nb_obs, nb_obs),
 84 |                            get_optimal_nd_range(nb_obs, padded_nb_obs));
 85 |     ker(q, x, x, _ker_mat);
 86 |   }
 87 | 
 88 |   inline virtual vector_t<T> get_ker_row(SYCLIndexT row) override {
 89 |     return _ker_mat.get_row(row);
 90 |   }
 91 | 
 92 |  private:
 93 |   matrix_t<T> _ker_mat;
 94 | };
 95 | 
 96 | /**
 97 |  * @brief Map a row index with its corresponding row in the kernel matrix.
 98 |  *
 99 |  * Should be used if the kernel matrix is too large.
100 |  *
101 |  * nb_cache_line is the maximum number of kernel line to cache.
102 |  * It should be 2 for simple kernel (linear or polynomial) and grow bigger for
103 |  * more complex kernels. The maximum size of the cache in byte is sizeof(T) * n
104 |  * * nb_cache_line.
105 |  *
106 |  * @tparam KerFun
107 |  * @tparam T
108 |  */
109 | template <class KerFun, class T>
110 | class kernel_cache_row : public kernel_cache<KerFun, T> {
111 |  public:
112 |   kernel_cache_row(queue& q, const KerFun& ker, matrix_t<T>& x,
113 |                    const range<1>& data_rng, const nd_range<1>& ker_rng,
114 |                    SYCLIndexT nb_cache_line)
115 |       : kernel_cache<KerFun, T>(q, ker, x, data_rng, ker_rng),
116 |         _nb_cache_line(nb_cache_line),
117 |         _ker_cache(),
118 |         _cache_last_access() {}
119 | 
120 |   virtual vector_t<T> get_ker_row(SYCLIndexT row) override {
121 |     auto it = _ker_cache.find(row);
122 |     if (it != _ker_cache.end()) {
123 |       // Move element row to the end
124 |       auto row_it =
125 |           std::find(_cache_last_access.begin(), _cache_last_access.end(), row);
126 |       _cache_last_access.splice(_cache_last_access.end(), _cache_last_access,
127 |                                 row_it);
128 |       return it->second;
129 |     }
130 | 
131 |     _cache_last_access.push_back(row);
132 |     if (_ker_cache.size() >= _nb_cache_line) {
133 |       auto replace_row = _cache_last_access.front();
134 |       _cache_last_access.pop_front();
135 |       auto ker_row = std::move(_ker_cache[replace_row]);
136 |       _ker_cache.erase(replace_row);
137 |       this->_ker(this->_q, this->_x, row, ker_row);
138 |       auto inserted_it = _ker_cache.insert(std::make_pair(row, ker_row));
139 |       return inserted_it.first->second;
140 |     }
141 | 
142 |     auto inserted_it = _ker_cache.emplace(
143 |         std::piecewise_construct, std::forward_as_tuple(row),
144 |         std::forward_as_tuple(this->_ker_diag_buf.data_range,
145 |                               this->_ker_diag_buf.kernel_range));
146 |     auto& ker_row = inserted_it.first->second;
147 |     this->_ker(this->_q, this->_x, row, ker_row);
148 |     return ker_row;
149 |   }
150 | 
151 |  private:
152 |   SYCLIndexT _nb_cache_line;
153 |   std::unordered_map<SYCLIndexT, ml::vector_t<T>>
154 |       _ker_cache;                            // Cached rows of kernel matrix
155 |   std::list<SYCLIndexT> _cache_last_access;  // Indices of last used rows
156 | };
157 | 
158 | }  // namespace detail
159 | 
160 | }  // namespace ml
161 | 
162 | #endif  // INCLUDE_ML_CLASSIFIERS_SVM_KERNEL_CACHE_HPP
163 | 


--------------------------------------------------------------------------------
/include/ml/eigen/eigen.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) Codeplay Software Limited.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *   http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | /**
17 |  * @file
18 |  * @brief Include the Tensor module of Eigen using SYCL and define useful
19 |  * aliases.
20 |  */
21 | 
22 | #ifndef INCLUDE_ML_EIGEN_MY_EIGEN_HPP
23 | #define INCLUDE_ML_EIGEN_MY_EIGEN_HPP
24 | 
25 | #include <unsupported/Eigen/CXX11/Tensor>
26 | 
27 | namespace ml {
28 | 
29 | using Eigen::Dynamic;
30 | 
31 | template <class T, int DIM, Eigen::StorageOptions DataLayout = Eigen::RowMajor>
32 | using tensor_map_t = Eigen::TensorMap<Eigen::Tensor<T, DIM, DataLayout>>;
33 | 
34 | #define DEFINE_EIGEN_ALIAS(NAME, DIM)                       \
35 |   template <class T, int DataLayout = Eigen::RowMajor>      \
36 |   using eig_##NAME##_t = Eigen::Tensor<T, DIM, DataLayout>; \
37 |   template <class T, int DataLayout = Eigen::RowMajor>      \
38 |   using eig_##NAME##_map_t = Eigen::TensorMap<eig_##NAME##_t<T, DataLayout>>
39 | 
40 | /// @brief Generate \p eig_scalar_t and \p eig_scalar_map_t
41 | DEFINE_EIGEN_ALIAS(scalar, 0);
42 | /// @brief Generate \p eig_vec_t and \p eig_vec_map_t
43 | DEFINE_EIGEN_ALIAS(vec, 1);
44 | /// @brief Generate \p eig_mat_t and \p eig_mat_map_t
45 | DEFINE_EIGEN_ALIAS(mat, 2);
46 | /// @brief Generate \p eig_mats_t and \p eig_mats_map_t
47 | DEFINE_EIGEN_ALIAS(mats, 3);
48 | 
49 | using eig_index_t = typename eig_mat_t<float>::Index;
50 | using eig_dim_pair_t = typename eig_mat_t<float>::DimensionPair;
51 | template <int DIM>
52 | using eig_dsize_t = Eigen::DSizes<eig_index_t, DIM>;
53 | template <int DIM>
54 | using eig_dims_t = Eigen::array<eig_index_t, DIM>;
55 | 
56 | }  // namespace ml
57 | 
58 | #endif  // INCLUDE_ML_EIGEN_MY_EIGEN_HPP
59 | 


--------------------------------------------------------------------------------
/include/ml/eigen/sycl_to_eigen.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) Codeplay Software Limited.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *   http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | #ifndef INCLUDE_ML_EIGEN_SYCL_TO_EIGEN_HPP
 17 | #define INCLUDE_ML_EIGEN_SYCL_TO_EIGEN_HPP
 18 | 
 19 | #include <memory>
 20 | 
 21 | #include "ml/utils/access.hpp"
 22 | #include "ml/utils/buffer_t.hpp"
 23 | 
 24 | namespace ml {
 25 | 
 26 | namespace detail {
 27 | 
 28 | template <int IN_DIM, int OUT_DIM = IN_DIM>
 29 | eig_dsize_t<OUT_DIM> range_to_dsize(const range<IN_DIM>& r) {
 30 |   static_assert(IN_DIM <= OUT_DIM, "");
 31 | 
 32 |   eig_dsize_t<OUT_DIM> dim;
 33 |   int i = 0;
 34 |   for (; i < IN_DIM; ++i) {
 35 |     dim[i] = static_cast<eig_index_t>(r[i]);
 36 |   }
 37 |   for (; i < OUT_DIM; ++i) {
 38 |     dim[i] = 1;
 39 |   }
 40 |   return dim;
 41 | }
 42 | 
 43 | template <>
 44 | eig_dsize_t<0> range_to_dsize<1, 0>(const range<1>&) {
 45 |   return {};
 46 | }
 47 | 
 48 | }  // namespace detail
 49 | 
 50 | /**
 51 |  * @brief Convert a SYCL buffer to an Eigen Tensor.
 52 |  *
 53 |  * The class holds the host pointer and makes sure that the Tensor is destroyed
 54 |  * at the end.\n Thus this object must stay alive as long as the Tensor is used.
 55 |  *
 56 |  * @todo Because of the way Eigen works if 2 \p sycl_to_eigen_t objects are
 57 |  * created with the same buffer and one is destroyed, the 2 Tensors become
 58 |  * invalid. The fix would require to either count the number of references for
 59 |  * each buffer or to create a different pointer if one already exist.
 60 |  *
 61 |  * @tparam T
 62 |  * @tparam IN_DIM dimension of the SYCL buffer
 63 |  * @tparam OUT_DIM dimension of the Eigen Tensor
 64 |  * @tparam DataLayout Eigen::RowMajor or Eigen::ColMajor
 65 |  */
 66 | template <class T, int IN_DIM, int OUT_DIM = IN_DIM,
 67 |           Eigen::StorageOptions DataLayout = Eigen::RowMajor>
 68 | class sycl_to_eigen_t {
 69 |  private:
 70 |   using Self = sycl_to_eigen_t<T, IN_DIM, OUT_DIM, DataLayout>;
 71 | 
 72 |  public:
 73 |   sycl_to_eigen_t() = default;
 74 | 
 75 |   sycl_to_eigen_t(buffer_t<T, IN_DIM>& b, const eig_dsize_t<OUT_DIM>& sizes) {
 76 |     auto reinterpret_buffer =
 77 |         b.template reinterpret<Eigen::TensorSycl::internal::buffer_data_type_t>(
 78 |             cl::sycl::range<1>(b.get_count() * sizeof(T)));
 79 |     _host_ptr =
 80 |         static_cast<T*>(get_eigen_device().attach_buffer(reinterpret_buffer)) +
 81 |         b.sub_buffer_offset.get(0);
 82 |     _tensor = std::make_unique<tensor_map_t<T, OUT_DIM, DataLayout>>(_host_ptr,
 83 |                                                                      sizes);
 84 |   }
 85 | 
 86 |   ~sycl_to_eigen_t() {
 87 |     if (_host_ptr) {
 88 |       get_eigen_device().detach_buffer(_host_ptr);
 89 |     }
 90 |   }
 91 | 
 92 |   /**
 93 |    * @return the Eigen Tensor
 94 |    */
 95 |   inline auto& tensor() { return *_tensor; }
 96 | 
 97 |   /**
 98 |    * @return the Eigen TensorDevice (for assignment)
 99 |    */
100 |   inline auto device() { return tensor().device(get_eigen_device()); }
101 | 
102 |   inline const T* ptr() const { return _host_ptr; }
103 | 
104 |   // No copy, only move
105 |   sycl_to_eigen_t(const Self&) = delete;
106 |   sycl_to_eigen_t(Self&&) = default;
107 |   Self& operator=(const Self&) = delete;
108 |   Self& operator=(Self&&) = default;
109 | 
110 |  private:
111 |   T* _host_ptr;
112 |   std::unique_ptr<tensor_map_t<T, OUT_DIM, DataLayout>> _tensor;
113 | };
114 | 
115 | /**
116 |  * @brief Create a Tensor of dimension 0 from a SYCL buffer.
117 |  *
118 |  * Only the first value of the buffer is used.
119 |  *
120 |  * @tparam IN_DIM
121 |  * @tparam DataLayout
122 |  * @tparam T
123 |  * @param b
124 |  * @return the \p sycl_to_eigen_t associated to \p b
125 |  */
126 | template <int IN_DIM, Eigen::StorageOptions DataLayout = Eigen::RowMajor,
127 |           class T>
128 | inline auto sycl_to_scalar_eigen(buffer_t<T, IN_DIM>& b) {
129 |   assert_less_or_eq(1LU, b.get_kernel_size());
130 |   return sycl_to_eigen_t<T, IN_DIM, 0, DataLayout>(b, eig_dsize_t<0>());
131 | }
132 | 
133 | /**
134 |  * @brief Create a Tensor of any dimensions from a SYCL buffer.
135 |  *
136 |  * @tparam IN_DIM dimension of the input buffer
137 |  * @tparam OUT_DIM dimension of the output Tensor
138 |  * @tparam R_DIM dimension of the range
139 |  * @tparam DataLayout
140 |  * @tparam T
141 |  * @param b
142 |  * @param r range defining the size of the tensor
143 |  * @return the \p sycl_to_eigen_t associated to \p b
144 |  */
145 | template <int IN_DIM, int OUT_DIM = IN_DIM,
146 |           Eigen::StorageOptions DataLayout = Eigen::RowMajor, int R_DIM,
147 |           class T>
148 | inline auto sycl_to_eigen(buffer_t<T, IN_DIM>& b, const range<R_DIM>& r) {
149 |   static_assert(
150 |       R_DIM >= IN_DIM && (R_DIM <= OUT_DIM || (R_DIM == 1 && OUT_DIM == 0)),
151 |       "");
152 |   assert_less_or_eq(r.size(), b.get_kernel_size());
153 |   return sycl_to_eigen_t<T, IN_DIM, OUT_DIM, DataLayout>(
154 |       b, detail::range_to_dsize<R_DIM, OUT_DIM>(r));
155 | }
156 | 
157 | /// @see sycl_to_eigen(buffer_t<T, IN_DIM>&, const range<IN_DIM>&)
158 | template <int IN_DIM, int OUT_DIM = IN_DIM,
159 |           Eigen::StorageOptions DataLayout = Eigen::RowMajor, class T>
160 | inline auto sycl_to_eigen(buffer_t<T, IN_DIM>& b) {
161 |   return sycl_to_eigen<IN_DIM, OUT_DIM, DataLayout>(b, b.get_kernel_range());
162 | }
163 | 
164 | /**
165 |  * @brief Force a buffer of dimension 1 to be converted to a Tensor of
166 |  * dimension 2.
167 |  *
168 |  * @tparam D whether to build the Tensor as a column (by default) or a row.
169 |  * @tparam DataLayout
170 |  * @tparam T
171 |  * @param v
172 |  * @return the \p sycl_to_eigen_t associated to \p b
173 |  */
174 | template <data_dim D = COL, Eigen::StorageOptions DataLayout = Eigen::RowMajor,
175 |           class T>
176 | inline auto sycl_to_eigen_2d(vector_t<T>& v) {
177 |   return sycl_to_eigen<1, 2, DataLayout>(
178 |       v, build_lin_or_tr<opp<D>(), range<2>>(v.get_kernel_range()[0], 1));
179 | }
180 | 
181 | }  // namespace ml
182 | 
183 | #endif  // INCLUDE_ML_EIGEN_SYCL_TO_EIGEN_HPP
184 | 


--------------------------------------------------------------------------------
/include/ml/math/cov.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) Codeplay Software Limited.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *   http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #ifndef INCLUDE_ML_MATH_COV_HPP
17 | #define INCLUDE_ML_MATH_COV_HPP
18 | 
19 | #include "ml/math/mat_mul.hpp"
20 | 
21 | namespace ml {
22 | 
23 | /**
24 |  * @brief Compute the covariance matrix of \p dataset
25 |  *
26 |  * Assumes the data has been centered already.
27 |  * It is normalized by the number of observation N (instead of the usual N-1).
28 |  * Formula for D=ROW is \f$ (dataset' * dataset) / N \f$
29 |  *
30 |  * @tparam D specifies which dimension represents the number of observations
31 |  * @tparam T
32 |  * @param q
33 |  * @param[in] dataset
34 |  * @param[out] cov_mat
35 |  */
36 | template <data_dim D = ROW, class T>
37 | void cov(queue& q, matrix_t<T>& dataset, matrix_t<T>& cov_mat) {
38 |   auto nb_obs = access_data_dim<D>(dataset, 0);
39 |   auto data_dim = access_data_dim<D>(dataset, 1);
40 |   assert_rng_eq(cov_mat.data_range, range<2>(data_dim, data_dim));
41 | 
42 |   mat_mul<opp<D>(), D>(q, dataset, dataset, cov_mat);
43 |   sycl_normalize(q, cov_mat, static_cast<T>(nb_obs));
44 | }
45 | 
46 | }  // namespace ml
47 | 
48 | #endif  // INCLUDE_ML_MATH_COV_HPP
49 | 


--------------------------------------------------------------------------------
/include/ml/math/functors.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) Codeplay Software Limited.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *   http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #ifndef INCLUDE_ML_MATH_FUNCTORS_HPP
17 | #define INCLUDE_ML_MATH_FUNCTORS_HPP
18 | 
19 | #include "ml/utils/common.hpp"
20 | 
21 | namespace ml {
22 | 
23 | namespace functors {
24 | 
25 | template <class T>
26 | struct positive {
27 |   constexpr T operator()(T x) const { return x > 0; }
28 | };
29 | 
30 | template <class T>
31 | struct negative {
32 |   constexpr T operator()(T x) const { return x < 0; }
33 | };
34 | 
35 | template <class T>
36 | struct identity {
37 |   constexpr T operator()(T x) const { return x; }
38 | };
39 | 
40 | template <class T>
41 | struct sqrt {
42 |   constexpr T operator()(T x) const { return cl::sycl::sqrt(x); }
43 | };
44 | 
45 | template <class T, class BinaryOp>
46 | class partial_binary_op {
47 |  public:
48 |   partial_binary_op(T c, BinaryOp binary_op = BinaryOp())
49 |       : _c(c), _binary_op(binary_op) {}
50 | 
51 |   inline constexpr T operator()(T x) const { return _binary_op(_c, x); }
52 | 
53 |  private:
54 |   T _c;
55 |   BinaryOp _binary_op;
56 | };
57 | 
58 | template <class T>
59 | struct sum_log_abs {
60 |   inline constexpr T operator()(T x1, T x2) const {
61 |     return x1 + cl::sycl::log(cl::sycl::fabs(x2));
62 |   }
63 | };
64 | 
65 | template <class T>
66 | struct exp_diff {
67 |   template <class T1, class T2>
68 |   constexpr T operator()(T1 x1, T2 x2) const {
69 |     return cl::sycl::exp(x1 - x2);
70 |   }
71 | };
72 | 
73 | template <class T>
74 | struct amortize {
75 |   amortize(T factor) : _factor(factor) {}
76 |   constexpr T operator()(T act, T prev) const { return act - prev * _factor; }
77 | 
78 |  private:
79 |   T _factor;
80 | };
81 | 
82 | }  // namespace functors
83 | 
84 | }  // namespace ml
85 | 
86 | #endif  // INCLUDE_ML_MATH_FUNCTORS_HPP
87 | 


--------------------------------------------------------------------------------
/include/ml/math/helper.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) Codeplay Software Limited.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *   http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #ifndef INCLUDE_ML_MATH_HELPER_HPP
17 | #define INCLUDE_ML_MATH_HELPER_HPP
18 | 
19 | #include <cmath>
20 | 
21 | namespace ml {
22 | 
23 | template <class T>
24 | static const T PI = std::atan(1) * 4;
25 | 
26 | }
27 | 
28 | #endif  // INCLUDE_ML_MATH_HELPER_HPP
29 | 


--------------------------------------------------------------------------------
/include/ml/math/mat_inv.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) Codeplay Software Limited.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *   http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | #ifndef INCLUDE_ML_MATH_MAT_INV_HPP
 17 | #define INCLUDE_ML_MATH_MAT_INV_HPP
 18 | 
 19 | #include "ml/math/mat_mul.hpp"
 20 | #include "ml/math/mat_ops.hpp"
 21 | 
 22 | namespace ml {
 23 | 
 24 | class ml_mat_inv;
 25 | 
 26 | /**
 27 |  * @brief Invert the given matrix of size nxn.
 28 |  *
 29 |  * Uses the Gauss-Jordan method.
 30 |  *
 31 |  * @see tri_solve(queue&, matrix_t<T>&, matrix_t<T>&) for a more numerically
 32 |  * stable solution
 33 |  * @tparam T
 34 |  * @param q
 35 |  * @param[in] mat
 36 |  * @param[out] inv
 37 |  * @param c_buffer temporary buffer must be at least of size nx(2*n)
 38 |  * @param block_buffer temporary buffer must be at least of size nx(n+1)
 39 |  * @return A SYCL event corresponding to the last submitted operation
 40 |  */
 41 | template <class T>
 42 | event mat_inv(queue& q, matrix_t<T>& mat, matrix_t<T>& inv,
 43 |               matrix_t<T>& c_buffer, matrix_t<T>& block_buffer) {
 44 |   auto data_dim = mat.data_range[1];
 45 |   mat.assert_square();
 46 |   assert_rng_less_or_eq(mat.get_kernel_range(), inv.data_range);
 47 |   assert_rng_less_or_eq(c_buffer.data_range, data_dim, 2 * data_dim);
 48 |   assert_rng_less_or_eq(block_buffer.data_range, data_dim, data_dim + 1);
 49 | 
 50 |   // C = [A|I]
 51 |   q.submit([&mat, &c_buffer](handler& cgh) {
 52 |     auto mat_acc = mat.template get_access_2d<access::mode::read>(cgh);
 53 |     auto c_acc =
 54 |         c_buffer.template get_access_2d<access::mode::discard_write>(cgh);
 55 |     cgh.parallel_for<NameGen<0, ml_mat_inv, T>>(
 56 |         c_buffer.get_nd_range(), [=](nd_item<2> item) {
 57 |           auto global_nb_rows = item.get_global_range()[0];
 58 |           auto row = item.get_global_id(0);
 59 |           auto col = item.get_global_id(1);
 60 |           // Copy A if left part, set identity otherwise
 61 |           c_acc(row, col) = (col < global_nb_rows)
 62 |                                 ? mat_acc(row, col)
 63 |                                 : ((row + global_nb_rows) == col);
 64 |         });
 65 |   });
 66 | 
 67 |   // Compute C so that C = [I|A^-1]
 68 |   for (decltype(data_dim) r = 0; r < data_dim; ++r) {
 69 |     // Write update in block_buffer
 70 |     q.submit([&c_buffer, &block_buffer, r](handler& cgh) {
 71 |       auto c_acc = c_buffer.template get_access_2d<access::mode::read>(cgh);
 72 |       auto block_acc =
 73 |           block_buffer.template get_access_2d<access::mode::discard_write>(cgh);
 74 |       cgh.parallel_for<NameGen<1, ml_mat_inv, T>>(
 75 |           block_buffer.get_nd_range(), [=](nd_item<2> item) {
 76 |             auto row = item.get_global_id(0);
 77 |             auto col = item.get_global_id(1);
 78 |             int is_row_eq_r = row == r;
 79 |             // if row == r: C(i,j) = C(i,j) / C(r,r)
 80 |             // else:        C(i,j) = C(i,j) - (C(i,r) / C(r,r)) * C(r, j)
 81 |             block_acc(row, col) =
 82 |                 is_row_eq_r * (c_acc(row, col + r) / c_acc(r, r)) +
 83 |                 !is_row_eq_r *
 84 |                     (c_acc(row, col + r) -
 85 |                      (c_acc(row, r) / c_acc(r, r)) * c_acc(r, col + r));
 86 |           });
 87 |     });
 88 | 
 89 |     // Copy block_buffer in c_buffer
 90 |     q.submit([&c_buffer, &block_buffer, r](handler& cgh) {
 91 |       auto c_acc = c_buffer.template get_access_2d<access::mode::write>(cgh);
 92 |       auto block_acc =
 93 |           block_buffer.template get_access_2d<access::mode::read>(cgh);
 94 |       cgh.parallel_for<NameGen<2, ml_mat_inv, T>>(
 95 |           block_buffer.get_nd_range(), [=](nd_item<2> item) {
 96 |             auto row = item.get_global_id(0);
 97 |             auto col = item.get_global_id(1);
 98 |             c_acc(row, col + r) = block_acc(row, col);
 99 |           });
100 |     });
101 |   }
102 | 
103 |   // Copy the right part of C to inv
104 |   return q.submit([&c_buffer, &inv](handler& cgh) {
105 |     auto c_acc = c_buffer.template get_access_2d<access::mode::read>(cgh);
106 |     auto inv_acc = inv.template get_access_2d<access::mode::discard_write>(cgh);
107 |     cgh.parallel_for<NameGen<3, ml_mat_inv, T>>(
108 |         inv.get_nd_range(), [=](nd_item<2> item) {
109 |           auto global_nb_rows = item.get_global_range()[0];
110 |           auto row = item.get_global_id(0);
111 |           auto col = item.get_global_id(1);
112 |           inv_acc(row, col) = c_acc(row, global_nb_rows + col);
113 |         });
114 |   });
115 | }
116 | 
117 | /**
118 |  * @brief Invert the given matrix and create any necessary temporary buffers.
119 |  *
120 |  * @see mat_inv(queue&, matrix_t<T>&, matrix_t<T>&, matrix_t<T>&, matrix_t<T>&)
121 |  * @tparam T
122 |  * @param q
123 |  * @param[in] mat
124 |  * @param[out] inv
125 |  * @return A SYCL event corresponding to the last submitted operation
126 |  */
127 | template <class T>
128 | event mat_inv(queue& q, matrix_t<T>& mat, matrix_t<T>& inv) {
129 |   auto data_dim = mat.data_range[1];
130 |   matrix_t<T> c_buffer(range<2>(data_dim, 2 * data_dim));
131 |   matrix_t<T> block_buffer(range<2>(data_dim, data_dim + 1));
132 |   return mat_inv(q, mat, inv, c_buffer, block_buffer);
133 | }
134 | 
135 | }  // namespace ml
136 | 
137 | #endif  // INCLUDE_ML_MATH_MAT_INV_HPP
138 | 


--------------------------------------------------------------------------------
/include/ml/math/mat_mul.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) Codeplay Software Limited.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *   http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #ifndef INCLUDE_ML_MATH_MAT_MUL_HPP
17 | #define INCLUDE_ML_MATH_MAT_MUL_HPP
18 | 
19 | #include "ml/math/vec_ops.hpp"
20 | 
21 | namespace ml {
22 | 
23 | /**
24 |  * @brief Matrix multiplication using Eigen.
25 |  *
26 |  * The tensors are sliced to their data_range first in case their kernel_range
27 |  * is bigger.
28 |  *
29 |  * @tparam D1 whether to transpose \p b1
30 |  * @tparam D2 whether to transpose \p b2
31 |  * @tparam T
32 |  * @tparam DIM1 Tensor dimension of \p b1
33 |  * @tparam DIM2 Tensor dimension of \p b2
34 |  * @tparam DIM3 Tensor dimension of \p b3
35 |  * @param[in] b1 mxk
36 |  * @param[in] b2 kxn
37 |  * @param[out] b3 mxn
38 |  */
39 | template <data_dim D1 = LIN, data_dim D2 = LIN, class T, int DIM1, int DIM2,
40 |           int DIM3>
41 | void mat_mul(queue&, buffer_t<T, DIM1>& b1, buffer_t<T, DIM2>& b2,
42 |              buffer_t<T, DIM3>& b3) {
43 |   STATIC_ASSERT_DATA_DIM_FOR_DIM_2(DIM1, D1);
44 |   STATIC_ASSERT_DATA_DIM_FOR_DIM_2(DIM2, D2);
45 |   static_assert(1 <= DIM1 && DIM1 <= 2, "");
46 |   static_assert(1 <= DIM1 && DIM2 <= 2, "");
47 |   static_assert(DIM3 == std::min(DIM1, DIM2), "");
48 | 
49 |   // Act as if data_dim were LIN because the transpose is handled by dims
50 |   // Reshape inputs and outputs to be 2D
51 |   auto eig_t1 = sycl_to_eigen<DIM1, 2>(b1);
52 |   auto eig_t2 = sycl_to_eigen<DIM2, 2>(b2);
53 |   auto eig_t3 = sycl_to_eigen<DIM3, 2>(b3);
54 | 
55 |   auto sliced_t1 = eig_t1.tensor().slice(
56 |       eig_dsize_t<2>{0, 0}, detail::range_to_dsize<DIM1, 2>(b1.data_range));
57 |   auto sliced_t2 = eig_t2.tensor().slice(
58 |       eig_dsize_t<2>{0, 0}, detail::range_to_dsize<DIM2, 2>(b2.data_range));
59 |   auto sliced_t3 = eig_t3.tensor().slice(
60 |       eig_dsize_t<2>{0, 0}, detail::range_to_dsize<DIM3, 2>(b3.data_range));
61 | 
62 |   sliced_t3.device(get_eigen_device()) =
63 |       sliced_t1.contract(sliced_t2, get_contract_dim<opp<D1>(), D2>());
64 | }
65 | 
66 | }  // namespace ml
67 | 
68 | #endif  // INCLUDE_ML_MATH_MAT_MUL_HPP
69 | 


--------------------------------------------------------------------------------
/include/ml/math/qr.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) Codeplay Software Limited.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *   http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | #ifndef INCLUDE_ML_MATH_QR_HPP
 17 | #define INCLUDE_ML_MATH_QR_HPP
 18 | 
 19 | #include "ml/math/vec_ops.hpp"
 20 | 
 21 | namespace ml {
 22 | 
 23 | class ml_qr;
 24 | 
 25 | /**
 26 |  * @brief QR decomposition of the given matrix of size mxn.
 27 |  *
 28 |  * Uses the Householder transformations algorithm.
 29 |  * Note: A blocked Householder would be more performant.
 30 |  *
 31 |  * qr(A) computes Q and R such that A = Q * R where Q is an orthogonal matrix
 32 |  * and R an upper triangular matrix. This implementation assumes that m is
 33 |  * greater than n and only writes R in the upper triangular part of A. The lower
 34 |  * triangular part of R should be set to 0 if needed. Note that for each row of
 35 |  * R a sign can be chosen, this implementation always chooses 1.
 36 |  *
 37 |  * @tparam T
 38 |  * @param q
 39 |  * @param[in, out] mat
 40 |  * @param w temporary buffer must be of size m at least.
 41 |  * @param vec_buf temporary buffer must be of size n at least.
 42 |  * @param eps threshold below which the division by u1 is avoided.
 43 |  */
 44 | template <class T>
 45 | void qr(queue& q, matrix_t<T>& mat, vector_t<T>& w, vector_t<T>& vec_buf,
 46 |         T eps = 1E-5) {
 47 |   auto m = access_ker_dim(mat, 0);
 48 |   auto n = access_ker_dim(mat, 1);
 49 |   using IndexT = decltype(n);
 50 | 
 51 |   assert_less_or_eq(n, m);
 52 |   assert_less_or_eq(m, w.data_range[0]);
 53 |   assert_less_or_eq(n, vec_buf.data_range[0]);
 54 | 
 55 |   static constexpr T ACT_SIGN = 1;
 56 |   SYCLIndexT jj_offset;
 57 |   T host_mat_jj;
 58 |   T act_norm;
 59 |   T act_u1;
 60 |   T act_tau;
 61 | 
 62 |   auto eig_mat = sycl_to_eigen(mat);
 63 |   // Force tensor dim 2 for matrix multiplication
 64 |   auto eig_w = sycl_to_eigen_2d(w);
 65 |   auto eig_vec_buf = sycl_to_eigen_2d(vec_buf);
 66 |   vector_t<T> norm_buf((range<1>(1)));
 67 |   auto eig_norm = sycl_to_scalar_eigen(norm_buf);
 68 |   eig_dsize_t<1> slice_offsets_d1;
 69 |   eig_dsize_t<1> slice_extents_d1;
 70 |   eig_dsize_t<2> slice_offsets_mat;
 71 |   eig_dsize_t<2> slice_extents_mat;
 72 |   eig_dsize_t<2> slice_offsets_w{0, 0};
 73 |   eig_dsize_t<2> slice_extents_w{1, 1};
 74 |   eig_dsize_t<2> slice_offsets_vec_buf{0, 0};
 75 |   eig_dsize_t<2> slice_extents_vec_buf{1, 1};
 76 | 
 77 |   auto compute_acts = [&](IndexT j) {
 78 |     jj_offset = j * (n + 1);
 79 |     // Get elements with indices [j, m] of the jth column and take the norm
 80 |     slice_offsets_d1[0] = j;
 81 |     slice_extents_d1[0] = m - j;
 82 |     eig_norm.device() = eig_mat.tensor()
 83 |                             .chip(j, 1)
 84 |                             .slice(slice_offsets_d1, slice_extents_d1)
 85 |                             .square()
 86 |                             .sum()
 87 |                             .sqrt();
 88 |     host_mat_jj = mat.read_to_host(jj_offset);
 89 |     // At each iteration the sign can be chosen to be different.
 90 |     // Choosing it to be -sign(mat(j,j)) maximizes the value of u1 but is more
 91 |     // likely to cause division by zero
 92 |     // act_sign = -cl::sycl::sign(host_mat_jj);
 93 |     act_norm = ACT_SIGN * norm_buf.read_to_host(0);
 94 |     act_u1 = host_mat_jj - act_norm;
 95 |     act_tau = -act_u1 / act_norm;
 96 |     mat.write_from_host(jj_offset, act_norm);
 97 |   };
 98 | 
 99 |   auto w_rng = w.kernel_range;
100 |   auto mat_rng = mat.kernel_range;
101 |   SYCLIndexT nb_rows_ker;
102 |   IndexT j = 0;
103 |   for (; j < n - 1; ++j) {
104 |     compute_acts(j);
105 | 
106 |     if (std::abs(act_u1) < eps) {
107 |       // Note: matrix Q would be inacurate if this is reached
108 |       continue;
109 |     }
110 | 
111 |     nb_rows_ker = m - j;
112 |     if (nb_rows_ker % 2 == 0) {
113 |       bool nb_rows_ker_is_pow2 = is_pow2(nb_rows_ker);
114 |       if (nb_rows_ker_is_pow2 || !is_pow2(w_rng.get_global_range()[0])) {
115 |         w_rng = get_optimal_nd_range(nb_rows_ker);
116 |       }
117 |       if (nb_rows_ker_is_pow2 || !is_pow2(mat_rng.get_global_range()[0])) {
118 |         mat_rng = get_optimal_nd_range(nb_rows_ker, access_ker_dim(mat, 1));
119 |       }
120 |     }
121 | 
122 |     // Compute w and update R
123 |     q.submit([&mat, &w, w_rng, nb_rows_ker, act_u1, j](handler& cgh) {
124 |       auto mat_acc = mat.template get_access_2d<access::mode::read_write>(cgh);
125 |       auto w_acc = w.template get_access_1d<access::mode::discard_write>(cgh);
126 |       cgh.parallel_for<NameGen<0, ml_qr, T>>(w_rng, [=](nd_item<1> item) {
127 |         auto row = item.get_global_id(0) + 1;
128 |         if (row < nb_rows_ker) {
129 |           auto val = mat_acc(row + j, j) / act_u1;
130 |           mat_acc(row + j, j) = val;
131 |           w_acc(row) = val;
132 |         }
133 |       });
134 |     });
135 |     w.write_from_host(0, T(1));
136 | 
137 |     // Compute vec_buf
138 |     slice_extents_w[0] = nb_rows_ker;
139 |     slice_extents_vec_buf[0] = n - j - 1;
140 |     slice_offsets_mat[0] = j;
141 |     slice_offsets_mat[1] = j + 1;
142 |     slice_extents_mat[0] = nb_rows_ker;
143 |     slice_extents_mat[1] = n - j - 1;
144 |     auto sliced_w = eig_w.tensor().slice(slice_offsets_w, slice_extents_w);
145 |     auto sliced_vec_buf = eig_vec_buf.tensor().slice(slice_offsets_vec_buf,
146 |                                                      slice_extents_vec_buf);
147 |     auto sliced_mat =
148 |         eig_mat.tensor().slice(slice_offsets_mat, slice_extents_mat);
149 |     sliced_vec_buf.device(get_eigen_device()) =
150 |         sliced_mat.contract(sliced_w, get_contract_dim<ROW, ROW>());
151 | 
152 |     // Update R
153 |     q.submit([&vec_buf, &w, &mat, mat_rng, act_tau, j, m, n](handler& cgh) {
154 |       auto vec_acc = vec_buf.template get_access_1d<access::mode::read>(cgh);
155 |       auto w_acc = w.template get_access_1d<access::mode::read>(cgh);
156 |       auto mat_acc = mat.template get_access_2d<access::mode::read_write>(cgh);
157 |       cgh.parallel_for<NameGen<1, ml_qr, T>>(mat_rng, [=](nd_item<2> item) {
158 |         auto row = item.get_global_id(0);
159 |         auto col = item.get_global_id(1);
160 |         if (row < m - j && col < n - j - 1) {
161 |           mat_acc(j + row, j + 1 + col) -=
162 |               (act_tau * w_acc(row)) * vec_acc(col);
163 |         }
164 |       });
165 |     });
166 |   }
167 | 
168 |   compute_acts(j);
169 | }
170 | 
171 | /**
172 |  * @brief QR decomposition of the given matrix.
173 |  *
174 |  * @tparam T
175 |  * @param q
176 |  * @param[in, out] mat
177 |  * @param data_dim_rng 1d range of the size of an observation
178 |  * @param data_dim_pow2_rng 1d kernel range of the size of an observation (can
179 |  * be padded to a bigger power of 2)
180 |  */
181 | template <class T>
182 | void qr(queue& q, matrix_t<T>& mat, const range<1>& data_dim_rng,
183 |         const nd_range<1>& data_dim_pow2_rng) {
184 |   range<1> nb_obs_rng(access_ker_dim(mat, 0));
185 |   auto nb_obs_pow2_rng = get_optimal_nd_range(nb_obs_rng);
186 |   vector_t<T> w_buf(nb_obs_rng, nb_obs_pow2_rng);
187 |   vector_t<T> vec_buf(data_dim_rng, data_dim_pow2_rng);
188 | 
189 |   qr(q, mat, w_buf, vec_buf);
190 | }
191 | 
192 | /**
193 |  * @brief QR decomposition of the given matrix.
194 |  *
195 |  * @see qr(queue&, matrix_t<T>&, const range<1>&, const nd_range<1>&, const
196 |  * range<1>&, const nd_range<1>&)
197 |  * @tparam T
198 |  * @param q
199 |  * @param[in, out] mat
200 |  */
201 | template <class T>
202 | void qr(queue& q, matrix_t<T>& mat) {
203 |   range<1> data_dim_rng(access_ker_dim(mat, 1));
204 |   auto data_dim_pow2_rng = get_optimal_nd_range(data_dim_rng);
205 |   qr(q, mat, data_dim_rng, data_dim_pow2_rng);
206 | }
207 | 
208 | }  // namespace ml
209 | 
210 | #endif  // INCLUDE_ML_MATH_QR_HPP
211 | 


--------------------------------------------------------------------------------
/include/ml/math/tri_inv.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) Codeplay Software Limited.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *   http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | #ifndef INCLUDE_ML_MATH_TRI_INV_HPP
 17 | #define INCLUDE_ML_MATH_TRI_INV_HPP
 18 | 
 19 | #include "ml/utils/common.hpp"
 20 | 
 21 | namespace ml {
 22 | 
 23 | class ml_try_inv;
 24 | 
 25 | /**
 26 |  * @brief Invert the given upper triangular matrix of size nxn.
 27 |  *
 28 |  * Uses the Gauss-Jordan method.
 29 |  *
 30 |  * @see tri_solve(queue&, matrix_t<T>&, matrix_t<T>&) for a more numerically
 31 |  * stable solution
 32 |  * @tparam T
 33 |  * @param q
 34 |  * @param[in] tri
 35 |  * @param[out] inv
 36 |  * @param t_buffer temporary buffer must be of size nxn at least.
 37 |  * @param t_pow_buffer temporary buffer must be of size nxn at least.
 38 |  * @param data_dim_1_nd_rng 1d kernel range of size n.
 39 |  * @return A SYCL event corresponding to the last submitted operation
 40 |  */
 41 | template <class T>
 42 | event tri_inv(queue& q, matrix_t<T>& tri, matrix_t<T>& inv,
 43 |               matrix_t<T>& t_buffer, matrix_t<T>& t_pow_buffer,
 44 |               const nd_range<1>& data_dim_1_nd_rng) {
 45 |   assert(&tri != &inv);
 46 |   assert(&tri != &t_buffer);
 47 |   assert(&tri != &t_pow_buffer);
 48 |   assert(&inv != &t_buffer);
 49 |   assert(&inv != &t_pow_buffer);
 50 |   assert(&t_buffer != &t_pow_buffer);
 51 | 
 52 |   auto data_dim_2_rng = tri.kernel_range.get_global_range();
 53 |   assert_eq(data_dim_1_nd_rng.get_global_range()[0], data_dim_2_rng[0]);
 54 |   assert_rng_square(data_dim_2_rng);
 55 |   auto data_dim = data_dim_2_rng[0];
 56 |   using IndexT = decltype(data_dim);
 57 |   assert_rng_less_or_eq(data_dim_2_rng, tri.data_range);
 58 |   assert_rng_less_or_eq(data_dim_2_rng, inv.data_range);
 59 |   assert_rng_less_or_eq(data_dim_2_rng, t_buffer.data_range);
 60 |   assert_rng_less_or_eq(data_dim_2_rng, t_pow_buffer.data_range);
 61 | 
 62 |   q.submit([&tri, &t_buffer, &t_pow_buffer, &inv](handler& cgh) {
 63 |     auto tri_acc = tri.template get_access_2d<access::mode::read>(cgh);
 64 |     auto t_acc =
 65 |         t_buffer.template get_access_2d<access::mode::discard_write>(cgh);
 66 |     auto t_pow_acc =
 67 |         t_pow_buffer.template get_access_2d<access::mode::discard_write>(cgh);
 68 |     auto inv_acc = inv.template get_access_2d<access::mode::discard_write>(cgh);
 69 |     cgh.parallel_for<NameGen<0, ml_try_inv, T>>(
 70 |         tri.get_nd_range(), [=](nd_item<2> item) {
 71 |           auto row = item.get_global_id(0);
 72 |           auto col = item.get_global_id(1);
 73 |           T val = (col > row) ? (-tri_acc(row, col) / tri_acc(row, row)) : 0;
 74 |           t_acc(row, col) = val;
 75 |           t_pow_acc(row, col) = val;
 76 |           inv_acc(row, col) = (row == col) ? 1 : val;
 77 |         });
 78 |   });
 79 | 
 80 |   auto tri_nd_range = tri.get_nd_range();
 81 |   for (IndexT i = 2; i < data_dim; ++i) {  // i = 0 -> id; i = 1 -> t_acc
 82 |     // mat_mul where we know some zeros
 83 |     q.submit([&t_pow_buffer, &t_buffer, &inv, tri_nd_range, data_dim,
 84 |               i](handler& cgh) {
 85 |       auto t_pow_acc =
 86 |           t_pow_buffer.template get_access_2d<access::mode::read_write>(cgh);
 87 |       auto t_acc = t_buffer.template get_access_2d<access::mode::read>(cgh);
 88 |       auto inv_acc = inv.template get_access_2d<access::mode::read_write>(cgh);
 89 |       cgh.parallel_for<NameGen<2, ml_try_inv, T>>(
 90 |           tri_nd_range, [=](nd_item<2> item) {
 91 |             auto row = item.get_global_id(0);
 92 |             auto col = item.get_global_id(1);
 93 |             if (row < data_dim - i && col < data_dim - i && col >= row) {
 94 |               auto diag_idx = col - row;
 95 |               col += i;
 96 |               T sum = 0;
 97 |               // don't use the full line or column because of zeros
 98 |               for (size_t j = 0; j <= diag_idx; ++j) {
 99 |                 sum += t_pow_acc(row, row + i + j - 1) *
100 |                        t_acc(row + i + j - 1, col);
101 |               }
102 |               // Store the result in the lower triangle part and transpose it
103 |               // later
104 |               t_pow_acc(col, row) = sum;
105 |               inv_acc(row, col) += sum;
106 |             }
107 |           });
108 |     });
109 | 
110 |     // Transpose lower part of t_pow_acc to upper part
111 |     q.submit([&t_pow_buffer, tri_nd_range, data_dim, i](handler& cgh) {
112 |       auto t_pow_acc =
113 |           t_pow_buffer.template get_access_2d<access::mode::read_write>(cgh);
114 |       cgh.parallel_for<NameGen<3, ml_try_inv, T>>(
115 |           tri_nd_range, [=](nd_item<2> item) {
116 |             auto row = item.get_global_id(0);
117 |             auto col = item.get_global_id(1);
118 |             if (row < data_dim - i && col < data_dim - i && col >= row) {
119 |               col += i;
120 |               t_pow_acc(row, col) = t_pow_acc(col, row);
121 |             }
122 |           });
123 |     });
124 |   }
125 | 
126 |   return q.submit([&tri, &inv](handler& cgh) {
127 |     auto tri_acc = tri.template get_access_2d<access::mode::read>(cgh);
128 |     auto inv_acc = inv.template get_access_2d<access::mode::read_write>(cgh);
129 |     cgh.parallel_for<NameGen<4, ml_try_inv, T>>(
130 |         tri.get_nd_range(), [=](nd_item<2> item) {
131 |           auto row = item.get_global_id(0);
132 |           auto col = item.get_global_id(1);
133 |           inv_acc(row, col) /= tri_acc(col, col);
134 |         });
135 |   });
136 | }
137 | 
138 | /**
139 |  * @brief Invert the given upper triangular matrix and create any necessary
140 |  * temporary buffers.
141 |  *
142 |  * @see tri_inv(queue&, matrix_t<T>&, matrix_t<T>&, matrix_t<T>&, matrix_t<T>&,
143 |  * const nd_range<1>&)
144 |  * @tparam T
145 |  * @param q
146 |  * @param[in] tri
147 |  * @param[out] inv
148 |  * @return A SYCL event corresponding to the last submitted operation
149 |  */
150 | template <class T>
151 | event tri_inv(queue& q, matrix_t<T>& tri, matrix_t<T>& inv) {
152 |   tri.assert_square();
153 |   assert_rng_eq(tri.get_kernel_range(), inv.get_kernel_range());
154 | 
155 |   matrix_t<T> t_buffer{tri.data_range, tri.kernel_range};
156 |   matrix_t<T> t_pow_buffer{tri.data_range, tri.kernel_range};
157 | 
158 |   return tri_inv(q, tri, inv, t_buffer, t_pow_buffer,
159 |                  get_optimal_nd_range(tri.data_range[0]));
160 | }
161 | 
162 | }  // namespace ml
163 | 
164 | #endif  // INCLUDE_ML_MATH_TRI_INV_HPP
165 | 


--------------------------------------------------------------------------------
/include/ml/math/tri_solve.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) Codeplay Software Limited.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *   http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | #ifndef INCLUDE_ML_MATH_TRI_SOLVE_HPP
 17 | #define INCLUDE_ML_MATH_TRI_SOLVE_HPP
 18 | 
 19 | #include "ml/math/mat_ops.hpp"
 20 | 
 21 | namespace ml {
 22 | 
 23 | class ml_mat_tri_solve;
 24 | class ml_mat_tri_solve_div_row;
 25 | 
 26 | namespace detail {
 27 | 
 28 | template <data_dim D>
 29 | struct tri_solve_data_dim;
 30 | 
 31 | // Upper specific case
 32 | template <>
 33 | struct tri_solve_data_dim<LIN> {
 34 |   static inline SYCLIndexT get_row_idx(SYCLIndexT n, SYCLIndexT i) {
 35 |     return n - i - 1;
 36 |   }
 37 |   using get_next_row_idx_op = std::minus<SYCLIndexT>;
 38 |   using apply_subtract_condition_op = std::less<SYCLIndexT>;
 39 | };
 40 | 
 41 | // Lower specific case
 42 | template <>
 43 | struct tri_solve_data_dim<TR> {
 44 |   static inline SYCLIndexT get_row_idx(SYCLIndexT, SYCLIndexT i) { return i; }
 45 |   using get_next_row_idx_op = std::plus<SYCLIndexT>;
 46 |   using apply_subtract_condition_op = std::greater<SYCLIndexT>;
 47 | };
 48 | 
 49 | template <data_dim DX, class T>
 50 | event div_row(queue& q, matrix_t<T>& A, matrix_t<T>& X, SYCLIndexT row_idx,
 51 |               const nd_range<1>& col_ker_rng) {
 52 |   return q.submit([&A, &X, row_idx, col_ker_rng](handler& cgh) {
 53 |     // Don't need DA because we only access the diagonal
 54 |     auto a_acc = A.template get_access_2d<access::mode::read>(cgh);
 55 |     auto x_acc = X.template get_access_2d<access::mode::read_write, DX>(cgh);
 56 |     cgh.parallel_for<NameGen<DX, ml_mat_tri_solve_div_row, T>>(
 57 |         col_ker_rng, [=](nd_item<1> item) {
 58 |           auto col = item.get_global_id(0);
 59 |           x_acc(row_idx, col) /= a_acc(row_idx, row_idx);
 60 |         });
 61 |   });
 62 | }
 63 | 
 64 | template <data_dim DA, data_dim DX, class T>
 65 | event compute_x(queue& q, matrix_t<T>& A, matrix_t<T>& X, SYCLIndexT row_idx) {
 66 |   return q.submit([&A, &X, row_idx](handler& cgh) {
 67 |     auto a_acc = A.template get_access_2d<access::mode::read, DA>(cgh);
 68 |     auto x_acc = X.template get_access_2d<access::mode::read_write, DX>(cgh);
 69 |     const auto apply_subtract_condition =
 70 |         typename detail::tri_solve_data_dim<DA>::apply_subtract_condition_op();
 71 |     cgh.parallel_for<NameGen<DA * 2 + DX, ml_mat_tri_solve, T>>(
 72 |         X.get_nd_range(), [=](nd_item<2> item) {
 73 |           auto row = item.get_global_id(DX);
 74 |           auto col = item.get_global_id(opp<DX>());
 75 |           if (apply_subtract_condition(row, row_idx)) {
 76 |             x_acc(row, col) -= x_acc(row_idx, col) * a_acc(row, row_idx);
 77 |           }
 78 |         });
 79 |   });
 80 | }
 81 | 
 82 | }  // namespace detail
 83 | 
 84 | /**
 85 |  * @brief Compute X = A \ X = inv(A) * X without explicitly inverting A.
 86 |  *
 87 |  * Assumes that A is upper triangular.
 88 |  * X (resp. X') must have the same number of rows than A if DX=LIN (resp.
 89 |  * DX=COL)
 90 |  *
 91 |  * @tparam DX whether to transpose \p X
 92 |  * @tparam DA whether to transpose \p A
 93 |  * @tparam T
 94 |  * @param q
 95 |  * @param[in, out] X
 96 |  * @param[in] A
 97 |  * @return A SYCL event corresponding to the last submitted operation
 98 |  */
 99 | template <data_dim DX = LIN, data_dim DA = LIN, class T>
100 | event tri_solve(queue& q, matrix_t<T>& X, matrix_t<T>& A) {
101 |   const auto n = access_ker_dim(A, 0);
102 |   A.assert_square();
103 |   assert_eq(access_ker_dim<DX>(X, 0), n);
104 | 
105 |   const auto nb_cols = access_ker_dim<DX>(X, 1);
106 |   const auto col_ker_rng = get_optimal_nd_range(nb_cols);
107 |   const auto get_next_row_idx =
108 |       typename detail::tri_solve_data_dim<DA>::get_next_row_idx_op();
109 | 
110 |   // First iteration can be computed directly
111 |   event event;
112 |   SYCLIndexT row_idx = detail::tri_solve_data_dim<DA>::get_row_idx(n, 0);
113 |   SYCLIndexT next_row_idx = get_next_row_idx(row_idx, 1);
114 |   event = detail::div_row<DX>(q, A, X, row_idx, col_ker_rng);
115 | 
116 |   // Each result found must be subtracted for the next iterations
117 |   for (SYCLIndexT i = 1; i < n; ++i) {
118 |     detail::compute_x<DA, DX>(q, A, X, row_idx);
119 |     row_idx = next_row_idx;
120 |     next_row_idx = get_next_row_idx(row_idx, 1);
121 |     event = detail::div_row<DX>(q, A, X, row_idx, col_ker_rng);
122 |   }
123 |   return event;
124 | }
125 | 
126 | /**
127 |  * @brief Compute X = A \ B = inv(A) * B without explicitly inverting A.
128 |  *
129 |  * @see tri_solve(queue&, matrix_t<T>&, matrix_t<T>&)
130 |  * @tparam DX whether to transpose \p X
131 |  * @tparam DA whether to transpose \p A
132 |  * @tparam T
133 |  * @param q
134 |  * @param[out] X
135 |  * @param[in] A
136 |  * @param[in] B
137 |  * @return A SYCL event corresponding to the last submitted operation
138 |  */
139 | template <data_dim DX = LIN, data_dim DA = LIN, class T>
140 | inline event tri_solve(queue& q, matrix_t<T>& X, matrix_t<T>& A,
141 |                        matrix_t<T>& B) {
142 |   sycl_copy(q, B, X);
143 |   return tri_solve<DX, DA>(q, X, A);
144 | }
145 | 
146 | /**
147 |  * @brief Compute X = C \ B = inv(C) * B with C = A'*A.
148 |  *
149 |  * @tparam DX whether to transpose \p X
150 |  * @tparam T
151 |  * @param q
152 |  * @param[out] X
153 |  * @param[in] A
154 |  * @param[in] B
155 |  * @return A SYCL event corresponding to the last submitted operation
156 |  */
157 | template <data_dim DX = LIN, class T>
158 | event chol_solve(queue& q, matrix_t<T>& X, matrix_t<T>& A, matrix_t<T>& B) {
159 |   tri_solve<DX, TR>(q, X, A, B);
160 |   return tri_solve<DX, LIN>(q, X, A);
161 | }
162 | 
163 | }  // namespace ml
164 | 
165 | #endif  // INCLUDE_ML_MATH_TRI_SOLVE_HPP
166 | 


--------------------------------------------------------------------------------
/include/ml/preprocess/apply_pca.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) Codeplay Software Limited.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *   http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | #ifndef INCLUDE_ML_PREPROCESS_APPLY_PCA_HPP
 17 | #define INCLUDE_ML_PREPROCESS_APPLY_PCA_HPP
 18 | 
 19 | #include <cassert>
 20 | #include <fstream>
 21 | 
 22 | #include "pca.hpp"
 23 | 
 24 | namespace ml {
 25 | 
 26 | /**
 27 |  * @brief Helper to compute and apply the PCA from a training set and applying
 28 |  * it on a test set.
 29 |  *
 30 |  * @see pca_svd
 31 |  * @tparam T
 32 |  */
 33 | template <class T>
 34 | class apply_pca {
 35 |  public:
 36 |   apply_pca()
 37 |       : _enable_pca(true),
 38 |         _nb_vec_computed(0),
 39 |         _data_avg(range<1>()),
 40 |         _eigenvectors(range<2>()) {}
 41 | 
 42 |   /**
 43 |    * @brief Either load the eigenvectors or compute them and apply the PCA to
 44 |    * the given data.
 45 |    *
 46 |    * @param q
 47 |    * @param[in, out] data this matrix has been centered after this call
 48 |    * @param pca_args @see struct pca_args
 49 |    * @return the new data
 50 |    */
 51 |   matrix_t<T> compute_and_apply(queue& q, matrix_t<T>& data,
 52 |                                 const pca_args<T>& pca_args) {
 53 |     _enable_pca = pca_args.keep_percent > 0;
 54 |     if (!_enable_pca) {
 55 |       return data;
 56 |     }
 57 | 
 58 |     auto data_dim = access_data_dim(data, 1);
 59 |     auto data_dim_pow2 = access_ker_dim(data, 1);
 60 | 
 61 |     _data_avg =
 62 |         vector_t<T>(range<1>(data_dim), get_optimal_nd_range(data_dim_pow2));
 63 | 
 64 |     std::string load_filename = get_filename(
 65 |         data_dim_pow2, pca_args.min_nb_vecs, pca_args.scale_factor);
 66 |     if (pca_args.auto_load && file_exists(load_filename)) {
 67 |       // avg and center_data would have been called by pca_svd otherwise
 68 |       avg(q, data, _data_avg);
 69 |       center_data<COL>(q, data, _data_avg);
 70 | 
 71 |       _nb_vec_computed = pca_args.min_nb_vecs;
 72 |       _eigenvectors = matrix_t<T>(
 73 |           range<2>(pca_args.min_nb_vecs, data_dim),
 74 |           get_optimal_nd_range(pca_args.min_nb_vecs, data_dim_pow2));
 75 |       load_array(q, _eigenvectors, load_filename);
 76 |     } else {
 77 |       std::cout << "Computing PCA..." << std::endl;
 78 |       _eigenvectors = pca_svd(q, data, _data_avg, pca_args);
 79 |       _nb_vec_computed = access_data_dim(_eigenvectors, 0);
 80 |       if (pca_args.save) {
 81 |         save_array(q, _eigenvectors,
 82 |                    get_filename(data_dim_pow2, _nb_vec_computed,
 83 |                                 pca_args.scale_factor));
 84 |       }
 85 |     }
 86 | 
 87 |     matrix_t<T> new_data =
 88 |         matrix_t<T>(range<2>(access_data_dim(data, 0), _nb_vec_computed));
 89 |     mat_mul<LIN, TR>(q, data, _eigenvectors, new_data);
 90 |     return new_data;
 91 |   }
 92 | 
 93 |   /**
 94 |    * @brief Apply the PCA to a dataset from previously computed eigenvectors and
 95 |    * data_avg.
 96 |    *
 97 |    * @param q
 98 |    * @param[in, out] data this matrix has been centered after this call
 99 |    * @return the new data
100 |    */
101 |   matrix_t<T> apply(queue& q, matrix_t<T>& data) {
102 |     if (!_enable_pca) {
103 |       return data;
104 |     }
105 | 
106 |     assert(_nb_vec_computed != 0);
107 |     matrix_t<T> new_data(range<2>(access_data_dim(data, 0), _nb_vec_computed));
108 |     center_data<COL>(q, data, _data_avg);
109 |     mat_mul<LIN, TR>(q, data, _eigenvectors, new_data);
110 |     return new_data;
111 |   }
112 | 
113 |  private:
114 |   bool _enable_pca;
115 |   SYCLIndexT _nb_vec_computed;
116 |   vector_t<T> _data_avg;
117 |   matrix_t<T> _eigenvectors;
118 | 
119 |   /**
120 |    * @brief Get the filename used for saving and loading eigenvectors.
121 |    *
122 |    * @param data_dim_pow2
123 |    * @param nb_vec
124 |    * @return the filename
125 |    */
126 |   inline std::string get_filename(SYCLIndexT data_dim_pow2, SYCLIndexT nb_vec,
127 |                                   T svd_factor) {
128 |     std::stringstream ss;
129 |     ss << "pca_" << nb_vec << "_" << data_dim_pow2 << "_" << svd_factor << "_"
130 |        << typeid(T).name();
131 |     return ss.str();
132 |   }
133 | 
134 |   /**
135 |    * @param filename
136 |    * @return true if filename exists (and is not locked)
137 |    */
138 |   inline bool file_exists(const std::string& filename) {
139 |     std::ifstream ifs(filename);
140 |     return ifs.good();
141 |   }
142 | };
143 | 
144 | }  // namespace ml
145 | 
146 | #endif  // INCLUDE_ML_PREPROCESS_APPLY_PCA_HPP
147 | 


--------------------------------------------------------------------------------
/include/ml/preprocess/pca.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) Codeplay Software Limited.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *   http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | #ifndef INCLUDE_ML_PREPROCESS_PCA_HPP
 17 | #define INCLUDE_ML_PREPROCESS_PCA_HPP
 18 | 
 19 | #include "ml/math/cov.hpp"
 20 | #include "ml/math/mat_ops.hpp"
 21 | #include "ml/math/svd.hpp"
 22 | 
 23 | namespace ml {
 24 | 
 25 | namespace detail {
 26 | 
 27 | class ml_pca_svd_copy_v;
 28 | 
 29 | template <class T>
 30 | event copy_eigenvectors(queue& q, vector_t<SYCLIndexT>& indices,
 31 |                         matrix_t<T>& in_v, matrix_t<T>& out_v) {
 32 |   return q.submit([&indices, &in_v, &out_v](handler& cgh) {
 33 |     auto in_acc = in_v.template get_access_2d<access::mode::read>(cgh);
 34 |     auto indices_acc = indices.template get_access_1d<access::mode::read>(cgh);
 35 |     auto out_acc =
 36 |         out_v.template get_access_2d<access::mode::discard_write>(cgh);
 37 |     cgh.parallel_for<NameGen<0, ml_pca_svd_copy_v, T>>(
 38 |         out_v.get_nd_range(), [=](nd_item<2> item) {
 39 |           auto row = item.get_global_id(0);
 40 |           auto col = item.get_global_id(1);
 41 |           out_acc(row, col) = in_acc(indices_acc(row), col);
 42 |         });
 43 |   });
 44 | }
 45 | 
 46 | }  // namespace detail
 47 | 
 48 | /**
 49 |  * @brief Arguments given to PCA
 50 |  *
 51 |  * auto_load: whether to load the basis vectors from the disk if the expected
 52 |  * file is present, defaults to true save: whether to save the basis vectors to
 53 |  * the disk if the PCA was not loaded, defaults to true min_nb_vecs: minimum
 54 |  * number of vectors to use, defaults to 0 which disable this constraint
 55 |  * keep_percent: minimum "amount of information" to keep in range [0; 1]. 0
 56 |  * disables the PCA and 1 keeps as many vectors as possible. Defaults to 1
 57 |  * scale_factor: factor applied when computing the PCA, a higher value yields
 58 |  * more precision but is slower. Defaults to 1
 59 |  *
 60 |  */
 61 | template <class T>
 62 | struct pca_args {
 63 |   pca_args()
 64 |       : auto_load(true),
 65 |         save(true),
 66 |         min_nb_vecs(0),
 67 |         keep_percent(1.f),
 68 |         scale_factor(T(1)) {}
 69 | 
 70 |   bool auto_load;
 71 |   bool save;
 72 |   SYCLIndexT min_nb_vecs;
 73 |   float keep_percent;
 74 |   T scale_factor;
 75 | };
 76 | 
 77 | /**
 78 |  * @brief Center the data and compute the principal components.
 79 |  *
 80 |  * Assumes the number of rows is the number of observations and the size of an
 81 |  * observation is a power of 2. Uses the svd to compute the eigenpairs. V =
 82 |  * pca(X) gives the eigenvectors so that Y = cX * V' where cX is the data
 83 |  * centered and Y is the new data with a smaller size of observation.
 84 |  *
 85 |  * @see apply_pca_svd
 86 |  * @tparam T
 87 |  * @param q
 88 |  * @param[in] data
 89 |  * @param[out] data_avg
 90 |  * @param pca_args @see struct pca_args
 91 |  * @return the eigenvectors V
 92 |  */
 93 | template <class T>
 94 | matrix_t<T> pca_svd(queue& q, matrix_t<T>& data, vector_t<T>& data_avg,
 95 |                     const pca_args<T>& pca_args) {
 96 |   avg(q, data, data_avg);
 97 |   center_data<COL>(q, data, data_avg);
 98 |   auto data_dim = access_data_dim(data, 1);
 99 |   auto data_dim_pow2 = access_ker_dim(data, 1);
100 | 
101 |   // For precision, scale data to change the eigenvalues but not the
102 |   // eigenvectors
103 |   auto scaled_data = matrix_t<T>(data.data_range, data.kernel_range);
104 |   if (pca_args.scale_factor != T(1)) {
105 |     vec_unary_op(q, data, scaled_data,
106 |                  functors::partial_binary_op<T, std::multiplies<T>>(
107 |                      pca_args.scale_factor));
108 |   }
109 | 
110 |   matrix_t<T> cov_matrix(range<2>(data_dim, data_dim),
111 |                          get_optimal_nd_range(data_dim_pow2, data_dim_pow2));
112 |   cov(q, scaled_data, cov_matrix);
113 |   SYCLIndexT estimated_nb_vecs = data_dim;
114 |   auto svd_out = svd<false, true, true>(q, cov_matrix, estimated_nb_vecs);
115 | 
116 |   if (pca_args.keep_percent >= 1) {
117 |     return svd_out.V;
118 |   }
119 | 
120 |   // Sort indices of l in descending order
121 |   std::vector<SYCLIndexT> host_indices(estimated_nb_vecs);
122 |   std::iota(begin(host_indices), end(host_indices), 0);
123 |   auto& host_l = svd_out.L;
124 |   std::sort(
125 |       begin(host_indices), end(host_indices),
126 |       [&](SYCLIndexT i1, SYCLIndexT i2) { return host_l[i1] > host_l[i2]; });
127 | 
128 |   // Compute nb_vecs needed to reach keep_percent
129 |   SYCLIndexT nb_vecs = 0;
130 |   float act_percent = 0;
131 |   for (; nb_vecs < estimated_nb_vecs && act_percent < pca_args.keep_percent;
132 |        ++nb_vecs) {
133 |     act_percent += host_l[host_indices[nb_vecs]] / svd_out.eig_vals_sum;
134 |   }
135 |   nb_vecs = std::max(nb_vecs, pca_args.min_nb_vecs);
136 |   std::cout << "Keeping " << nb_vecs << " vectors" << std::endl;
137 |   assert(nb_vecs > 0);
138 | 
139 |   // Copy the eigenvectors with the highest eigenvalue
140 |   vector_t<SYCLIndexT> sycl_indices(host_indices.data(), range<1>(nb_vecs));
141 |   matrix_t<T> V(range<2>(nb_vecs, data_dim),
142 |                 get_optimal_nd_range(nb_vecs, data_dim_pow2));
143 |   detail::copy_eigenvectors(q, sycl_indices, svd_out.V, V);
144 | 
145 |   return V;
146 | }
147 | 
148 | }  // namespace ml
149 | 
150 | #endif  // INCLUDE_ML_PREPROCESS_PCA_HPP
151 | 


--------------------------------------------------------------------------------
/include/ml/utils/access.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) Codeplay Software Limited.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *   http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | /**
 17 |  * @file
 18 |  * @brief Define data_dim and some common functions related to it.
 19 |  */
 20 | 
 21 | #ifndef INCLUDE_ML_UTILS_ACCESS_HPP
 22 | #define INCLUDE_ML_UTILS_ACCESS_HPP
 23 | 
 24 | #include "ml/utils/sycl_types.hpp"
 25 | 
 26 | namespace ml {
 27 | 
 28 | /**
 29 |  * @brief Represent either a choice of dimension or of transposing.
 30 |  *
 31 |  * A choice of dimension means whether to use a row or a column.\n
 32 |  * A choice of transposing means whether to access the matrix as if it were
 33 |  * transposed or not.\n
 34 |  *
 35 |  */
 36 | enum data_dim {
 37 |   /// 0
 38 |   ROW = 0,
 39 |   /// Alias for ROW
 40 |   LIN = ROW,
 41 |   /// 1
 42 |   COL = 1,
 43 |   /// Alias for COL
 44 |   TR = COL
 45 | };
 46 | 
 47 | namespace detail {
 48 | 
 49 | template <class T, data_dim D>
 50 | struct lin_or_tr {
 51 |   static inline T apply(T lin, T) { return lin; }
 52 | };
 53 | 
 54 | template <class T>
 55 | struct lin_or_tr<T, TR> {
 56 |   static inline T apply(T, T tr) { return tr; }
 57 | };
 58 | 
 59 | }  // namespace detail
 60 | 
 61 | /**
 62 |  * @brief Return the first value if LIN, the second otherwise.
 63 |  *
 64 |  * @tparam D
 65 |  * @tparam T
 66 |  * @param lin
 67 |  * @param tr
 68 |  * @return \p lin if D=LIN, \p tr otherwise
 69 |  */
 70 | template <data_dim D, class T>
 71 | inline constexpr T lin_or_tr(T lin, T tr) {
 72 |   return detail::lin_or_tr<T, D>::apply(lin, tr);
 73 | }
 74 | 
 75 | /**
 76 |  * @brief Return the opposite value of D.
 77 |  *
 78 |  * @tparam D
 79 |  * @return TR if D=LIN, LIN otherwise
 80 |  */
 81 | template <data_dim D>
 82 | inline constexpr data_dim opp() {
 83 |   return static_cast<data_dim>((D + 1) % 2);
 84 | }
 85 | 
 86 | /**
 87 |  * @brief Access an index of a \p range<2> that may be swapped according to \p
 88 |  * D.
 89 |  *
 90 |  * @tparam D
 91 |  * @param r
 92 |  * @param i
 93 |  * @return the ith element if D=LIN, the other element otherwise
 94 |  */
 95 | template <data_dim D>
 96 | inline SYCLIndexT access_rng(const range<2>& r, SYCLIndexT i) {
 97 |   assert(i == 0 || i == 1);
 98 |   return r[lin_or_tr<D>(i, (i + 1) % 2)];
 99 | }
100 | 
101 | /**
102 |  * @brief Construct an object \p B with the 2 given parameters that may be
103 |  * swapped according to \p D.
104 |  *
105 |  * @tparam D
106 |  * @tparam B class to build, must have a constructor with 2 @ref SYCLIndexT
107 |  * @param x1
108 |  * @param x2
109 |  * @return the built object
110 |  */
111 | template <data_dim D, class B>
112 | inline constexpr B build_lin_or_tr(SYCLIndexT x1, SYCLIndexT x2) {
113 |   return B(lin_or_tr<D>(x1, x2), lin_or_tr<D>(x2, x1));
114 | }
115 | 
116 | /**
117 |  * @brief Construct another object \p B with the 2 parameters extracted from \p
118 |  * b that may be swapped according to \p D.
119 |  *
120 |  * @see build_lin_or_tr(SYCLIndexT, SYCLIndexT)
121 |  * @tparam D
122 |  * @tparam B class to build, must have a constructor with 2 arguments and a
123 |  * squared bracket accessor
124 |  * @param b
125 |  * @return the built object
126 |  */
127 | template <data_dim D, class B>
128 | inline constexpr B build_lin_or_tr(const B& b) {
129 |   return build_lin_or_tr<D, B>(b[0], b[1]);
130 | }
131 | 
132 | template <data_dim D1, data_dim D2>
133 | inline constexpr std::array<eig_dim_pair_t, 1> get_contract_dim() {
134 |   return {eig_dim_pair_t(D1, D2)};
135 | }
136 | 
137 | }  // namespace ml
138 | 
139 | #endif  // INCLUDE_ML_UTILS_ACCESS_HPP
140 | 


--------------------------------------------------------------------------------
/include/ml/utils/buffer_acc.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) Codeplay Software Limited.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *   http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | #ifndef INCLUDE_ML_UTILS_BUFFER_ACC_HPP
 17 | #define INCLUDE_ML_UTILS_BUFFER_ACC_HPP
 18 | 
 19 | #include "ml/utils/access.hpp"
 20 | 
 21 | #ifndef ML_DEBUG_BOUND_CHECK
 22 | /**
 23 |  * @brief Set to 1 for buffer initialization with nan and boundaries access
 24 |  * check.
 25 |  *
 26 |  * For debug only.
 27 |  * @warning Very slow.
 28 |  */
 29 | #define ML_DEBUG_BOUND_CHECK 0
 30 | #endif  // ML_DEBUG_BOUND_CHECK
 31 | 
 32 | namespace ml {
 33 | 
 34 | template <class T, int DIM>
 35 | class buffer_t;
 36 | 
 37 | namespace detail {
 38 | 
 39 | template <data_dim D>
 40 | struct get_index_2d;
 41 | 
 42 | template <>
 43 | struct get_index_2d<LIN> {
 44 |   static inline SYCLIndexT compute(SYCLIndexT r, SYCLIndexT c,
 45 |                                    SYCLIndexT nb_cols) {
 46 |     return r * nb_cols + c;
 47 |   }
 48 | };
 49 | 
 50 | template <>
 51 | struct get_index_2d<TR> {
 52 |   static inline SYCLIndexT compute(SYCLIndexT r, SYCLIndexT c,
 53 |                                    SYCLIndexT nb_cols) {
 54 |     return c * nb_cols + r;
 55 |   }
 56 | };
 57 | 
 58 | template <class T, access::mode>
 59 | struct is_reference_access {
 60 |   using value = T&;
 61 | };
 62 | 
 63 | template <class T>
 64 | struct is_reference_access<T, access::mode::read> {
 65 |   using value = T;
 66 | };
 67 | 
 68 | template <class T, int DIM, access::mode acc_mode, access::target acc_target>
 69 | class buffer_1d_acc_t {
 70 |  public:
 71 |   buffer_1d_acc_t(handler& cgh, buffer_t<T, DIM>* b)
 72 |       :
 73 | #if ML_DEBUG_BOUND_CHECK
 74 |         _range(b->get_kernel_size()),
 75 | #endif
 76 |         _offset(b->sub_buffer_offset),
 77 |         _acc(b->template get_access<acc_mode>(cgh, b->sub_buffer_range,
 78 |                                               b->sub_buffer_offset)) {
 79 |   }
 80 | 
 81 |   inline typename is_reference_access<T, acc_mode>::value operator()(
 82 |       SYCLIndexT x) const {
 83 |     x += _offset.get(0);
 84 | #if ML_DEBUG_BOUND_CHECK
 85 |     if (x >= _range[0]) {
 86 |       printf("Warning accessing at (%lu) from buffer of size (%lu)\n", x,
 87 |              _range[0]);
 88 |     }
 89 | #endif
 90 |     return _acc[x];
 91 |   }
 92 | 
 93 |   inline accessor<T, 1, acc_mode, acc_target> get() { return _acc; }
 94 | 
 95 |  private:
 96 | #if ML_DEBUG_BOUND_CHECK
 97 |   range<1> _range;
 98 | #endif
 99 |   id<1> _offset;
100 |   accessor<T, 1, acc_mode, acc_target> _acc;
101 | };
102 | 
103 | template <class T, access::mode acc_mode, data_dim D, access::target acc_target>
104 | class buffer_2d_acc_t {
105 |  public:
106 |   buffer_2d_acc_t(handler& cgh, buffer_t<T, 2>* b)
107 |       : _range(b->get_kernel_range()),
108 |         _offset(b->sub_buffer_offset),
109 |         _acc(b->template get_access<acc_mode>(cgh, b->sub_buffer_range,
110 |                                               b->sub_buffer_offset)) {}
111 | 
112 |   inline typename is_reference_access<T, acc_mode>::value operator()(
113 |       SYCLIndexT r, SYCLIndexT c) const {
114 |     auto idx =
115 |         _offset.get(0) + detail::get_index_2d<D>::compute(r, c, _range[1]);
116 | #if ML_DEBUG_BOUND_CHECK
117 |     if (idx >= _range.size()) {
118 |       printf(
119 |           "Warning accessing at (%lu, %lu)+%lu from buffer of size (%lu, "
120 |           "%lu)\n",
121 |           r, c, _offset.get(0), access_rng<D>(_range, 0),
122 |           access_rng<D>(_range, 1));
123 |     }
124 | #endif
125 |     return _acc[idx];
126 |   }
127 | 
128 |   inline accessor<T, 1, acc_mode, acc_target> get() { return _acc; }
129 | 
130 |  private:
131 |   range<2> _range;
132 |   id<1> _offset;
133 |   accessor<T, 1, acc_mode, acc_target> _acc;
134 | };
135 | 
136 | template <class T, access::mode acc_mode, access::target acc_target>
137 | class buffer_3d_acc_t {
138 |  public:
139 |   buffer_3d_acc_t(handler& cgh, buffer_t<T, 3>* b)
140 |       : _range(b->get_kernel_range()),
141 |         _acc(b->template get_access<acc_mode>(cgh, b->sub_buffer_range,
142 |                                               b->sub_buffer_offset)) {}
143 | 
144 |   inline typename is_reference_access<T, acc_mode>::value operator()(
145 |       SYCLIndexT x, SYCLIndexT y, SYCLIndexT z) const {
146 |     auto idx = _offset.get(0) + x + _range[1] * (y + _range[2] * z);
147 | #if ML_DEBUG_BOUND_CHECK
148 |     if (idx >= _range.size()) {
149 |       printf(
150 |           "Warning accessing at (%lu, %lu, %lu)+%lu from buffer of size (%lu, "
151 |           "%lu, "
152 |           "%lu)\n",
153 |           x, y, z, _offset.get(0), _range[0], _range[1], _range[2]);
154 |     }
155 | #endif
156 |     return _acc[idx];
157 |   }
158 | 
159 |   inline accessor<T, 1, acc_mode, acc_target> get() { return _acc; }
160 | 
161 |  private:
162 |   range<3> _range;
163 |   id<1> _offset;
164 |   accessor<T, 1, acc_mode, acc_target> _acc;
165 | };
166 | 
167 | }  // namespace detail
168 | 
169 | }  // namespace ml
170 | 
171 | #endif  // INCLUDE_ML_UTILS_BUFFER_ACC_HPP
172 | 


--------------------------------------------------------------------------------
/include/ml/utils/common.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) Codeplay Software Limited.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *   http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | /**
17 |  * @file
18 |  * @brief Regroup common headers to all files that submit SYCL kernels.
19 |  */
20 | 
21 | #ifndef INCLUDE_ML_UTILS_COMMON_HPP
22 | #define INCLUDE_ML_UTILS_COMMON_HPP
23 | 
24 | #include <cassert>
25 | 
26 | #include "ml/eigen/sycl_to_eigen.hpp"
27 | #include "ml/utils/save_utils.hpp"
28 | 
29 | // Debug
30 | #include "ml/utils/debug/assert.hpp"
31 | #include "ml/utils/debug/print_utils.hpp"
32 | #include "ml/utils/debug/write_bmp.hpp"
33 | 
34 | #endif  // INCLUDE_ML_UTILS_COMMON_HPP
35 | 


--------------------------------------------------------------------------------
/include/ml/utils/debug/assert.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) Codeplay Software Limited.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *   http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | /**
 17 |  * @file
 18 |  * @brief Common assert functions, only active in debug mode.
 19 |  */
 20 | 
 21 | #ifndef INCLUDE_ML_UTILS_DEBUG_ASSERT_HPP
 22 | #define INCLUDE_ML_UTILS_DEBUG_ASSERT_HPP
 23 | 
 24 | #include <cassert>
 25 | #include <cmath>
 26 | #include <iostream>
 27 | 
 28 | #include "ml/utils/access.hpp"
 29 | 
 30 | namespace ml {
 31 | 
 32 | #define STATIC_ASSERT_A_IMPLIES_B(a, b) static_assert(((a) && (b)) || !(a), "")
 33 | #define STATIC_ASSERT_DATA_DIM_FOR_DIM_2(dim, d) \
 34 |   STATIC_ASSERT_A_IMPLIES_B(dim != 2, d == LIN)
 35 | 
 36 | #ifndef NDEBUG
 37 | template <class T>
 38 | void assert_eq(T actual, T expected) {
 39 |   if (actual != expected) {
 40 |     std::cerr << "Error: got " << actual << " expected " << expected
 41 |               << std::endl;
 42 |     assert(false);
 43 |   }
 44 | }
 45 | 
 46 | template <class T>
 47 | void assert_vec_eq(const T& actual, const T& expected, size_t size) {
 48 |   for (size_t i = 0; i < size; ++i) {
 49 |     assert_eq(actual[i], expected[i]);
 50 |   }
 51 | }
 52 | 
 53 | template <int DIM>
 54 | void assert_rng_eq(const range<DIM>& actual, const range<DIM>& expected) {
 55 |   assert_vec_eq(actual, expected, DIM);
 56 | }
 57 | 
 58 | template <class T>
 59 | void assert_less_or_eq(T x, T high) {
 60 |   if (x > high) {
 61 |     std::stringstream ss;
 62 |     ss << "Error: " << x << " larger than " << high;
 63 |     std::cerr << ss.str() << std::endl;
 64 |     assert(false);
 65 |   }
 66 | }
 67 | 
 68 | template <int DIM>
 69 | inline void assert_rng_size_less_or_eq(const range<DIM>& r,
 70 |                                        SYCLIndexT high_size) {
 71 |   assert_less_or_eq(r.size(), high_size);
 72 | }
 73 | 
 74 | template <data_dim D = LIN, int DIM>
 75 | void assert_rng_less_or_eq(const range<DIM>& r, const range<DIM>& high_r) {
 76 |   for (int i = 0; i < DIM; ++i) {
 77 |     assert_less_or_eq(r[i], high_r[i]);
 78 |   }
 79 | }
 80 | 
 81 | template <>
 82 | inline void assert_rng_less_or_eq<TR, 2>(const range<2>& r,
 83 |                                          const range<2>& high_r) {
 84 |   assert_less_or_eq(r[1], high_r[0]);
 85 |   assert_less_or_eq(r[0], high_r[1]);
 86 | }
 87 | 
 88 | template <data_dim = LIN>
 89 | inline void assert_rng_less_or_eq(const range<1>& r, SYCLIndexT high0) {
 90 |   assert_rng_less_or_eq(r, range<1>(high0));
 91 | }
 92 | 
 93 | template <data_dim D = LIN>
 94 | inline void assert_rng_less_or_eq(const range<2>& r, SYCLIndexT high0,
 95 |                                   SYCLIndexT high1) {
 96 |   assert_rng_less_or_eq(range<2>(access_rng<D>(r, 0), access_rng<D>(r, 1)),
 97 |                         range<2>(high0, high1));
 98 | }
 99 | 
100 | template <data_dim = LIN>
101 | inline void assert_rng_less_or_eq(const range<3>& r, SYCLIndexT high0,
102 |                                   SYCLIndexT high1, SYCLIndexT high2) {
103 |   assert_rng_less_or_eq(r, range<3>(high0, high1, high2));
104 | }
105 | 
106 | template <class T>
107 | void assert_real(T x) {
108 |   if (!std::isfinite(x)) {
109 |     std::stringstream ss;
110 |     ss << "Error: value is ";
111 |     if (std::isnan(x)) {
112 |       ss << "nan";
113 |     } else if (std::isinf(x)) {
114 |       ss << "inf";
115 |     } else {
116 |       ss << x;
117 |     }
118 |     std::cerr << ss.str() << std::endl;
119 |     assert(false);
120 |   }
121 | }
122 | 
123 | inline void assert_rng_square(const range<2>& r) {
124 |   assert_eq(r[0], r[1]);
125 | }
126 | 
127 | #else   // NDEBUG
128 | template <class T>
129 | inline void assert_eq(T, T) {}
130 | template <class T>
131 | inline void assert_vec_eq(const T&, const T&, size_t) {}
132 | template <int DIM>
133 | inline void assert_rng_eq(const range<DIM>&, const range<DIM>&) {}
134 | template <class T>
135 | inline void assert_less_or_eq(T, T) {}
136 | template <int DIM>
137 | inline void assert_rng_size_less_or_eq(range<DIM>, SYCLIndexT) {}
138 | template <data_dim = LIN, int DIM>
139 | inline void assert_rng_less_or_eq(const range<DIM>&, const range<DIM>&) {}
140 | template <data_dim = LIN>
141 | inline void assert_rng_less_or_eq(const range<1>&, SYCLIndexT) {}
142 | template <data_dim = LIN>
143 | inline void assert_rng_less_or_eq(const range<2>&, SYCLIndexT, SYCLIndexT) {}
144 | template <data_dim = LIN>
145 | inline void assert_rng_less_or_eq(const range<3>&, SYCLIndexT, SYCLIndexT,
146 |                                   SYCLIndexT) {}
147 | template <class T>
148 | inline void assert_real(T) {}
149 | inline void assert_rng_square(const range<2>&) {}
150 | #endif  // end NDEBUG
151 | 
152 | }  // namespace ml
153 | 
154 | #endif  // INCLUDE_ML_UTILS_DEBUG_ASSERT_HPP
155 | 


--------------------------------------------------------------------------------
/include/ml/utils/debug/print_utils.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) Codeplay Software Limited.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *   http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | /**
 17 |  * @file
 18 |  * @brief Allow to print generic array, std pair as well as sycl id, range and
 19 |  * nd_range
 20 |  */
 21 | 
 22 | #ifndef INCLUDE_ML_UTILS_DEBUG_PRINT_UTILS_HPP
 23 | #define INCLUDE_ML_UTILS_DEBUG_PRINT_UTILS_HPP
 24 | 
 25 | #include <iostream>
 26 | #include <sstream>
 27 | 
 28 | #include "ml/utils/sycl_types.hpp"
 29 | 
 30 | namespace ml {
 31 | 
 32 | /**
 33 |  * @brief Print std::pair
 34 |  *
 35 |  * @tparam T1
 36 |  * @tparam T2
 37 |  * @param os
 38 |  * @param p
 39 |  * @return os
 40 |  */
 41 | template <class T1, class T2>
 42 | std::ostream& operator<<(std::ostream& os, const std::pair<T1, T2>& p) {
 43 |   os << "(" << p.first << "," << p.second << ")";
 44 |   return os;
 45 | }
 46 | 
 47 | /**
 48 |  * @brief Print cl::sycl::id
 49 |  *
 50 |  * @tparam DIM
 51 |  * @param os
 52 |  * @param id_
 53 |  * @return os
 54 |  */
 55 | template <int DIM>
 56 | std::ostream& operator<<(std::ostream& os, const cl::sycl::id<DIM>& id_) {
 57 |   os << "[" << id_[0];
 58 |   for (int i = 1; i < DIM; ++i) {
 59 |     os << ", " << id_[i];
 60 |   }
 61 |   os << "]";
 62 |   return os;
 63 | }
 64 | 
 65 | /**
 66 |  * @brief Print cl::sycl::range
 67 |  *
 68 |  * @tparam DIM
 69 |  * @param os
 70 |  * @param r
 71 |  * @return os
 72 |  */
 73 | template <int DIM>
 74 | std::ostream& operator<<(std::ostream& os, const cl::sycl::range<DIM>& r) {
 75 |   os << "[" << r[0];
 76 |   for (int i = 1; i < DIM; ++i) {
 77 |     os << ", " << r[i];
 78 |   }
 79 |   os << "]";
 80 |   return os;
 81 | }
 82 | 
 83 | /**
 84 |  * @brief Print cl::sycl::nd_range
 85 |  *
 86 |  * @tparam DIM
 87 |  * @param os
 88 |  * @param r
 89 |  * @return os
 90 |  */
 91 | template <int DIM>
 92 | std::ostream& operator<<(std::ostream& os, const cl::sycl::nd_range<DIM>& r) {
 93 |   return os << r.get_global_range() << "@" << r.get_local_range() << "@"
 94 |             << r.get_offset();
 95 | }
 96 | 
 97 | /**
 98 |  * @brief Print any data array as a matrix
 99 |  *
100 |  * @tparam T data type with a [] accessor
101 |  * @param os
102 |  * @param data
103 |  * @param nrows
104 |  * @param ncols
105 |  * @param off
106 |  * @return os
107 |  */
108 | template <class T>
109 | std::ostream& print(std::ostream& os, const T& data, size_t nrows, size_t ncols,
110 |                     size_t off = 0) {
111 |   for (size_t r = 0; r < nrows; ++r) {
112 |     for (size_t c = 0; c < ncols; ++c) {
113 |       os << data[r * ncols + c + off] << ' ';
114 |     }
115 |     os << std::endl;
116 |   }
117 |   return os;
118 | }
119 | 
120 | /**
121 |  * @brief Print any data array as a matrix
122 |  *
123 |  * @tparam T data type with a [] accessor
124 |  * @param os
125 |  * @param data
126 |  * @param nrows
127 |  * @param ncols
128 |  * @param off
129 |  * @return os
130 |  */
131 | template <class T>
132 | std::ostream& print(const T& data, size_t nrows, size_t ncols, size_t off = 0) {
133 |   return print(std::cout, data, nrows, ncols, off);
134 | }
135 | 
136 | }  // namespace ml
137 | 
138 | #endif  // INCLUDE_ML_UTILS_DEBUG_PRINT_UTILS_HPP
139 | 


--------------------------------------------------------------------------------
/include/ml/utils/device_constants.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) Codeplay Software Limited.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *   http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | #ifndef INCLUDE_ML_UTILS_DEVICE_CONSTANTS_HPP
 17 | #define INCLUDE_ML_UTILS_DEVICE_CONSTANTS_HPP
 18 | 
 19 | #include <iostream>
 20 | #include <string>
 21 | 
 22 | #include "ml/utils/sycl_types.hpp"
 23 | 
 24 | namespace ml {
 25 | 
 26 | /**
 27 |  * @brief Singleton that holds device specific constant.
 28 |  *
 29 |  * The user must initialize the instance before using it.\n
 30 |  * This will create a \p sycl::queue which can be retrieved with the \p
 31 |  * Eigen::SyclDevice. Note that the library assumes that only one device is used
 32 |  * for now.
 33 |  * @tparam Void Do not use, only here to avoid the use of a source file
 34 |  */
 35 | template <class Void = void>
 36 | class device_constants {
 37 |  public:
 38 |   static device_constants<Void>* instance;
 39 | 
 40 |   device_constants()
 41 |       : _eigen_queue(default_selector()), _eigen_device(&_eigen_queue) {
 42 |     const cl::sycl::device& sycl_device =
 43 |         _eigen_queue.sycl_queue().get_device();
 44 |     const cl::sycl::platform& platform = sycl_device.get_platform();
 45 |     using namespace cl::sycl::info;
 46 |     std::cout << "Selected device: "
 47 |               << sycl_device.get_info<info::device::name>() << ", ";
 48 |     std::cout << "type: "
 49 |               << device_type_to_str(
 50 |                      sycl_device.get_info<info::device::device_type>())
 51 |               << ", ";
 52 |     std::cout << "platform: " << platform.get_info<info::platform::name>();
 53 |     std::cout << " [" << platform.get_info<info::platform::vendor>() << "]\n";
 54 |     std::cout << std::endl;
 55 | 
 56 |     MAX_WORK_GROUP_SIZE =
 57 |         sycl_device.get_info<info::device::max_work_group_size>();
 58 |     MEM_BASE_ADDR_ALIGN =
 59 |         sycl_device.get_info<info::device::mem_base_addr_align>();
 60 |     MAX_WORK_ITEM_SIZES =
 61 |         sycl_device.get_info<info::device::max_work_item_sizes>();
 62 |   }
 63 | 
 64 |   inline size_t get_max_work_group_size() { return MAX_WORK_GROUP_SIZE; }
 65 |   inline size_t get_mem_base_addr_align() { return MEM_BASE_ADDR_ALIGN; }
 66 |   inline id<3> get_max_work_item_sizes() { return MAX_WORK_ITEM_SIZES; }
 67 | 
 68 |   /**
 69 |    * @tparam T
 70 |    * @return Return the value by which the size of a sub-buffer of type T must
 71 |    * be divisible.
 72 |    */
 73 |   template <class T>
 74 |   inline size_t get_sub_buffer_range_divisor() {
 75 |     return get_mem_base_addr_align() / (sizeof(T) * CHAR_BIT);
 76 |   }
 77 | 
 78 |   /**
 79 |    * @brief Round size up to be used by a sub-buffer.
 80 |    *
 81 |    * @see get_sub_buffer_range_divisor
 82 |    * @tparam T
 83 |    * @param size
 84 |    * @return a size usable by a sub-buffer
 85 |    */
 86 |   template <class T>
 87 |   inline size_t pad_sub_buffer_size(size_t size) {
 88 |     auto divisor = get_sub_buffer_range_divisor<T>();
 89 |     return static_cast<size_t>((size / divisor + (size % divisor > 0)) *
 90 |                                divisor);
 91 |   }
 92 | 
 93 |   inline Eigen::SyclDevice& get_eigen_device() { return _eigen_device; }
 94 | 
 95 |  private:
 96 |   size_t MAX_WORK_GROUP_SIZE;
 97 |   size_t MEM_BASE_ADDR_ALIGN;
 98 |   id<3> MAX_WORK_ITEM_SIZES;
 99 | 
100 |   Eigen::QueueInterface _eigen_queue;
101 |   Eigen::SyclDevice _eigen_device;
102 | 
103 |   inline std::string device_type_to_str(cl::sycl::info::device_type type) {
104 |     using namespace cl::sycl::info;
105 |     switch (type) {
106 |       case info::device_type::cpu:
107 |         return "CPU";
108 |       case info::device_type::gpu:
109 |         return "GPU";
110 |       case info::device_type::accelerator:
111 |         return "accelerator";
112 |       case info::device_type::custom:
113 |         return "custom";
114 |       case info::device_type::automatic:
115 |         return "automatic";
116 |       case info::device_type::host:
117 |         return "host";
118 |       default:
119 |         return "NONE";
120 |     }
121 |   }
122 | };
123 | 
124 | template <>
125 | device_constants<>* device_constants<>::instance = nullptr;
126 | 
127 | /// @brief Return the device_constants instance.
128 | inline device_constants<>* get_device_constants() {
129 |   return device_constants<>::instance;
130 | }
131 | 
132 | /// @brief Return the \p Eigen::SyclDevice.
133 | inline Eigen::SyclDevice& get_eigen_device() {
134 |   return get_device_constants()->get_eigen_device();
135 | }
136 | 
137 | }  // namespace ml
138 | 
139 | #endif  // INCLUDE_ML_UTILS_DEVICE_CONSTANTS_HPP
140 | 


--------------------------------------------------------------------------------
/include/ml/utils/optimal_range.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) Codeplay Software Limited.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *   http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | #ifndef INCLUDE_ML_UTILS_OPTIMAL_RANGE_HPP
 17 | #define INCLUDE_ML_UTILS_OPTIMAL_RANGE_HPP
 18 | 
 19 | #include <array>
 20 | 
 21 | #include "ml/utils/access.hpp"
 22 | #include "ml/utils/device_constants.hpp"
 23 | 
 24 | namespace ml {
 25 | 
 26 | /**
 27 |  * @tparam T
 28 |  * @param x
 29 |  * @return true if x is a power of 2
 30 |  */
 31 | template <class T>
 32 | inline bool is_pow2(T x) {
 33 |   return (x & (x - 1)) == 0;
 34 | }
 35 | 
 36 | /**
 37 |  * @tparam T
 38 |  * @param x
 39 |  * @return the closest power of 2 higher or equal to x
 40 |  */
 41 | template <class T>
 42 | inline T to_pow2(T x) {
 43 |   return std::pow(2, std::ceil(std::log2(x)));
 44 | }
 45 | 
 46 | /**
 47 |  * @brief Compute the best suitable local_range associated to global_range.
 48 |  *
 49 |  * The function is trivial if the global range is smaller or equal to the max
 50 |  * work group size.\n If not the function only tries to find divisor that are
 51 |  * power of 2. Finding all possible divisors would be too costly otherwise.
 52 |  *
 53 |  * @tparam DIM
 54 |  * @param global_range
 55 |  * @return local_range
 56 |  */
 57 | template <int DIM>
 58 | range<DIM> get_optimal_local_range(const range<DIM>& global_range) {
 59 |   auto max_work_group_size = get_device_constants()->get_max_work_group_size();
 60 |   range<DIM> local_range;
 61 |   if (global_range.size() <= max_work_group_size) {
 62 |     local_range = global_range;
 63 |   } else {
 64 |     auto max_work_group_item_sizes =
 65 |         get_device_constants()->get_max_work_item_sizes();
 66 |     for (int i = 0; i < DIM; ++i) {
 67 |       local_range[i] = max_work_group_item_sizes[i];
 68 |       while (global_range[i] % local_range[i]) {
 69 |         local_range[i] >>= 1;
 70 |       }
 71 |     }
 72 | 
 73 |     // Make sure the local size does not exceed the maximum
 74 |     for (int i = 0; i < DIM && local_range.size() > max_work_group_size; ++i) {
 75 |       // Try to divide the ith local size to reach a size of max_work_group_size
 76 |       auto divide_by = local_range.size() / max_work_group_size;
 77 |       local_range[i] /= std::min(local_range[i], divide_by);
 78 |     }
 79 |   }
 80 | 
 81 |   return local_range;
 82 | }
 83 | 
 84 | /**
 85 |  * @see get_optimal_local_range
 86 |  * @tparam DIM
 87 |  * @param global_range
 88 |  * @param offset
 89 |  * @return the nd_range built from \p global_range with a local range as big as
 90 |  * possible
 91 |  */
 92 | template <int DIM>
 93 | inline nd_range<DIM> get_optimal_nd_range(const range<DIM>& global_range,
 94 |                                           const id<DIM>& offset = id<DIM>()) {
 95 |   return nd_range<DIM>(global_range, get_optimal_local_range(global_range),
 96 |                        offset);
 97 | }
 98 | 
 99 | /**
100 |  * @see get_optimal_nd_range(const range<DIM>&, const id<DIM>&)
101 |  * @tparam Args
102 |  * @param args
103 |  * @return the nd_range built from \p args with a local range as big as possible
104 |  */
105 | template <class... Args>
106 | inline nd_range<sizeof...(Args)> get_optimal_nd_range(Args... args) {
107 |   return get_optimal_nd_range(range<sizeof...(Args)>(args...));
108 | }
109 | 
110 | }  // namespace ml
111 | 
112 | #endif  // INCLUDE_ML_UTILS_OPTIMAL_RANGE_HPP
113 | 


--------------------------------------------------------------------------------
/include/ml/utils/save_utils.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) Codeplay Software Limited.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *   http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | /**
17 |  * @file
18 |  * @brief Allow the loading and saving of generic arrays and SYCL buffers to and
19 |  * from disk
20 |  */
21 | 
22 | #ifndef INCLUDE_ML_UTILS_SAVE_UTILS_HPP
23 | #define INCLUDE_ML_UTILS_SAVE_UTILS_HPP
24 | 
25 | #include <fstream>
26 | #include <iostream>
27 | #include <string>
28 | 
29 | #include "ml/utils/copy.hpp"
30 | 
31 | namespace ml {
32 | 
33 | template <class T>
34 | void save_array(const T* data, size_t length, const std::string& file_path) {
35 |   std::cout << "Saving to " << file_path << "..." << std::endl;
36 |   std::ofstream os(file_path.c_str(), std::ios::binary | std::ios::out);
37 |   if (!os.is_open()) {
38 |     std::cerr << "Could not open " << file_path << std::endl;
39 |     return;
40 |   }
41 |   os.write(reinterpret_cast<const char*>(data), length * sizeof(T));
42 |   os.close();
43 | }
44 | 
45 | template <class T>
46 | void load_array(T* data, size_t length, const std::string& file_path) {
47 |   std::cout << "Loading from " << file_path << "..." << std::endl;
48 |   std::ifstream is(file_path.c_str(), std::ios::binary | std::ios::in);
49 |   if (!is.is_open()) {
50 |     std::cerr << "Could not open " << file_path << std::endl;
51 |     return;
52 |   }
53 |   is.read(reinterpret_cast<char*>(data), length * sizeof(T));
54 |   is.close();
55 | }
56 | 
57 | template <class T, int DIM>
58 | void save_array(queue& q, buffer_t<T, DIM>& buf, const std::string& file_path) {
59 |   std::vector<T> host_buf(buf.get_kernel_size());
60 |   auto event = sycl_copy_device_to_host(q, buf, host_buf.data());
61 |   event.wait_and_throw();
62 |   save_array(host_buf.data(), host_buf.size(), file_path);
63 | }
64 | 
65 | template <class T, int DIM>
66 | void load_array(queue& q, buffer_t<T, DIM>& buf, const std::string& file_path) {
67 |   std::vector<T> host_buf(buf.get_kernel_size());
68 |   load_array(host_buf.data(), host_buf.size(), file_path);
69 |   auto event = sycl_copy_host_to_device(q, host_buf.data(), buf);
70 |   event.wait_and_throw();
71 | }
72 | 
73 | }  // namespace ml
74 | 
75 | #endif  // INCLUDE_ML_UTILS_SAVE_UTILS_HPP
76 | 


--------------------------------------------------------------------------------
/include/ml/utils/sycl_types.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) Codeplay Software Limited.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *   http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | /**
17 |  * @file
18 |  * @brief Common SYCL aliases
19 |  */
20 | 
21 | #ifndef INCLUDE_ML_UTILS_SYCL_TYPES_HPP
22 | #define INCLUDE_ML_UTILS_SYCL_TYPES_HPP
23 | 
24 | #include <CL/sycl.hpp>
25 | 
26 | #include "ml/eigen/eigen.hpp"
27 | 
28 | namespace ml {
29 | 
30 | using namespace cl::sycl;
31 | 
32 | using SYCLIndexT = size_t;
33 | 
34 | template <class T, class Alloc = cl::sycl::default_allocator>
35 | using sycl_vec_t = buffer<T, 1, Alloc>;
36 | 
37 | template <int Index, typename... Details>
38 | class NameGen {};
39 | 
40 | }  // namespace ml
41 | 
42 | #endif  // INCLUDE_ML_UTILS_SYCL_TYPES_HPP
43 | 


--------------------------------------------------------------------------------
/tests/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (C) Codeplay Software Limited.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #    http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | include_directories("src")
16 | 
17 | # Build each test
18 | file(GLOB_RECURSE SOURCES "src/*.cpp")
19 | foreach(SOURCE ${SOURCES})
20 |   add_sycl_ml_executable(${SOURCE})
21 |   if(SYCLML_TEST_DOUBLE)
22 |     target_compile_definitions(${TARGET_NAME} PUBLIC SYCLML_TEST_DOUBLE)
23 |   endif()
24 |   add_test(NAME ${TARGET_NAME} COMMAND ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME} ${MNIST_RELATIVE_PATH})
25 |   set_tests_properties(${TARGET_NAME} PROPERTIES LABELS "${OUTPUT_SYCL_DIR}")
26 | endforeach()
27 | 


--------------------------------------------------------------------------------
/tests/src/math/test_center.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) Codeplay Software Limited.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *   http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #include <iostream>
17 | 
18 | #include "ml/math/mat_ops.hpp"
19 | #include "utils/utils.hpp"
20 | 
21 | template <class T, ml::data_dim D>
22 | void test_center() {
23 |   constexpr auto NB_OBS = 5LU;
24 |   constexpr auto ACT_SIZE_OBS = 3LU;
25 |   std::array<T, NB_OBS * ACT_SIZE_OBS> host_data{1.0,  4.0, 7.0,  2.0,  0.0,
26 |                                                  -8.0, 1.0, 2.0,  1.0,  0.0,
27 |                                                  0.0,  1.0, -5.0, -4.0, -3.0};
28 | 
29 |   std::array<T, ACT_SIZE_OBS> host_avg_data;
30 |   std::array<T, NB_OBS * ACT_SIZE_OBS> host_center_data;
31 |   {
32 |     cl::sycl::queue& q = create_queue();
33 |     ml::matrix_t<T> sycl_data(host_data.data(),
34 |                               cl::sycl::range<2>(NB_OBS, ACT_SIZE_OBS));
35 |     ml::vector_t<T> sycl_data_avg{cl::sycl::range<1>(ACT_SIZE_OBS)};
36 | 
37 |     ml::avg<D>(q, sycl_data, sycl_data_avg);
38 |     ml::center_data<ml::opp<D>()>(q, sycl_data, sycl_data_avg);
39 | 
40 |     sycl_data.set_final_data(host_center_data.data());
41 |     sycl_data_avg.set_final_data(host_avg_data.data());
42 |     clear_eigen_device();
43 |   }
44 | 
45 |   /*
46 |   std::cout << "host data:\n";
47 |   ml::print(host_data, NB_OBS, ACT_SIZE_OBS);
48 |   std::cout << "\navg data:\n";
49 |   ml::print(host_avg_data, 1, ACT_SIZE_OBS);
50 |   std::cout << "\ncenter data:\n";
51 |   ml::print(host_center_data, NB_OBS, ACT_SIZE_OBS);
52 |   */
53 | 
54 |   // avg data
55 |   assert_vec_almost_eq(host_avg_data, {-0.2, 0.4, -0.4});
56 | 
57 |   // center data
58 |   assert_vec_almost_eq(host_center_data,
59 |                        {1.2, 3.6, 7.4, 2.2, -0.4, -7.6, 1.2, 1.6, 1.4, 0.2,
60 |                         -0.4, 1.4, -4.8, -4.4, -2.6});
61 | }
62 | 
63 | int main() {
64 |   try {
65 |     test_center<float, ml::ROW>();
66 | #ifdef SYCLML_TEST_DOUBLE
67 |     test_center<double, ml::ROW>();
68 | #endif
69 |   } catch (cl::sycl::exception e) {
70 |     std::cerr << e.what();
71 |   }
72 | 
73 |   return 0;
74 | }
75 | 


--------------------------------------------------------------------------------
/tests/src/math/test_cov.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) Codeplay Software Limited.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *   http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | #include <iostream>
 17 | 
 18 | #include "ml/math/cov.hpp"
 19 | #include "ml/math/mat_ops.hpp"
 20 | #include "utils/utils.hpp"
 21 | 
 22 | template <class T>
 23 | void test_cov_square() {
 24 |   static constexpr ml::data_dim D = ml::LIN;
 25 |   std::array<T, 9> host_data{1.0, 4.0, 7.0, 2.0, 0.0, -8.0, 1.0, 2.0, 1.0};
 26 | 
 27 |   std::array<T, 9> host_cov;
 28 |   {
 29 |     cl::sycl::queue& q = create_queue();
 30 |     ml::matrix_t<T> sycl_data(host_data.data(), cl::sycl::range<2>(3, 3));
 31 |     sycl_data.set_final_data(nullptr);
 32 |     ml::vector_t<T> sycl_data_avg{cl::sycl::range<1>(3)};
 33 | 
 34 |     ml::avg<D>(q, sycl_data, sycl_data_avg);
 35 |     ml::center_data<ml::opp<D>()>(q, sycl_data, sycl_data_avg);
 36 | 
 37 |     ml::matrix_t<T> sycl_cov(cl::sycl::range<2>(3, 3));
 38 |     ml::cov<D>(q, sycl_data, sycl_cov);
 39 |     sycl_cov.set_final_data(host_cov.data());
 40 |     clear_eigen_device();
 41 |   }
 42 | 
 43 |   /*
 44 |   std::cout << "host data:\n";
 45 |   ml::print(host_data, 3, 3);
 46 |   std::cout << "\ncov:\n";
 47 |   ml::print(host_cov, 3, 3);
 48 |   */
 49 | 
 50 |   std::array<T, 9> expected{2.0 / 9.0,   -2.0 / 3.0,  -8.0 / 3.0,
 51 |                             host_cov[1], 8.0 / 3.0,   10.0,
 52 |                             host_cov[2], host_cov[5], 38.0};
 53 |   assert_vec_almost_eq(host_cov, expected);
 54 | }
 55 | 
 56 | template <class T>
 57 | void test_cov_general() {
 58 |   static constexpr ml::data_dim D = ml::TR;
 59 |   // 3 observations that have 2 variables each (transposed)
 60 |   std::array<T, 6> host_data{1.0, 2.0, 3.0, 2.0, 2.0, 11.0};
 61 | 
 62 |   std::array<T, 4> host_cov;
 63 |   {
 64 |     cl::sycl::queue& q = create_queue();
 65 |     ml::matrix_t<T> sycl_data(host_data.data(), cl::sycl::range<2>(2, 3));
 66 |     ml::vector_t<T> sycl_data_avg(cl::sycl::range<1>(2));
 67 | 
 68 |     ml::avg<D>(q, sycl_data, sycl_data_avg);
 69 |     ml::center_data<ml::opp<D>()>(q, sycl_data, sycl_data_avg);
 70 | 
 71 |     ml::matrix_t<T> sycl_cov(cl::sycl::range<2>(2, 2));
 72 |     ml::cov<D>(q, sycl_data, sycl_cov);
 73 |     sycl_cov.set_final_data(host_cov.data());
 74 |     clear_eigen_device();
 75 |   }
 76 | 
 77 |   /*
 78 |   std::cout << "data:\n";
 79 |   ml::print(host_data, 3, 2);
 80 |   std::cout << "\ncov:\n";
 81 |   ml::print(host_cov, 2, 2);
 82 |   */
 83 | 
 84 |   std::array<T, 4> expected{2.0 / 3.0, 3.0, host_cov[1], 18.0};
 85 |   assert_vec_almost_eq(host_cov, expected);
 86 | }
 87 | 
 88 | template <class T>
 89 | void test_all() {
 90 |   test_cov_square<T>();
 91 |   test_cov_general<T>();
 92 | }
 93 | 
 94 | int main(void) {
 95 |   try {
 96 |     test_all<float>();
 97 | #ifdef SYCLML_TEST_DOUBLE
 98 |     test_all<double>();
 99 | #endif
100 |   } catch (cl::sycl::exception e) {
101 |     std::cerr << e.what();
102 |   }
103 | 
104 |   return 0;
105 | }
106 | 


--------------------------------------------------------------------------------
/tests/src/math/test_dot_product.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) Codeplay Software Limited.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *   http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #include <iostream>
17 | 
18 | #include "ml/math/vec_ops.hpp"
19 | #include "utils/utils.hpp"
20 | 
21 | template <class T>
22 | void test_dot_product_self() {
23 |   constexpr size_t SIZE = 4;
24 |   std::array<T, SIZE> in{1, 0.5, -1, 0};
25 |   T res;
26 | 
27 |   {
28 |     cl::sycl::queue& q = create_queue();
29 |     ml::vector_t<T> sycl_vec(in.data(), cl::sycl::range<1>(in.size()));
30 |     res = ml::sycl_dot_product(q, sycl_vec);
31 |     clear_eigen_device();
32 |   }
33 | 
34 |   /*
35 |   for (unsigned i = 0; i < SIZE; ++i) {
36 |     std::cout << in[i] << " ";
37 |   }
38 |   std::cout << "\nres=" << res << std::endl;
39 |   */
40 | 
41 |   assert_almost_eq(res, T(2.25));
42 | }
43 | 
44 | template <class T>
45 | void test_dot_product_other() {
46 |   constexpr size_t SIZE = 4;
47 |   std::array<T, SIZE> in1{1, 2, 3, 4};
48 |   std::array<T, SIZE> in2{2, 2, 1, 0.5};
49 |   T res;
50 | 
51 |   {
52 |     cl::sycl::queue& q = create_queue();
53 |     ml::vector_t<T> sycl_vec1(in1.data(), cl::sycl::range<1>(in1.size()));
54 |     sycl_vec1.set_final_data(nullptr);
55 |     ml::vector_t<T> sycl_vec2(in2.data(), cl::sycl::range<1>(in2.size()));
56 |     sycl_vec2.set_final_data(nullptr);
57 |     res = ml::sycl_dot_product(q, sycl_vec1, sycl_vec2);
58 |     clear_eigen_device();
59 |   }
60 | 
61 |   /*
62 |   for (unsigned i = 0; i < SIZE; ++i) {
63 |     std::cout << in1[i] << " ";
64 |   }
65 |   std::cout << std::endl;
66 |   for (unsigned i = 0; i < SIZE; ++i) {
67 |     std::cout << in2[i] << " ";
68 |   }
69 |   std::cout << "\nres=" << res << std::endl;
70 |   */
71 | 
72 |   assert_almost_eq(res, T(11));
73 | }
74 | 
75 | template <class T>
76 | void test_all() {
77 |   test_dot_product_self<T>();
78 |   test_dot_product_other<T>();
79 | }
80 | 
81 | int main() {
82 |   try {
83 |     test_all<float>();
84 | #ifdef SYCLML_TEST_DOUBLE
85 |     test_all<double>();
86 | #endif
87 |   } catch (cl::sycl::exception e) {
88 |     std::cerr << e.what();
89 |   }
90 | 
91 |   return 0;
92 | }
93 | 


--------------------------------------------------------------------------------
/tests/src/math/test_inv.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) Codeplay Software Limited.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *   http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | #include <iostream>
 17 | #include <random>
 18 | 
 19 | #include "ml/math/mat_inv.hpp"
 20 | #include "ml/math/tri_inv.hpp"
 21 | #include "utils/utils.hpp"
 22 | 
 23 | template <class T>
 24 | void test_inv() {
 25 |   std::array<T, 9> host_data{1.0, 4.0, 6.0, 0.0, -1.0, 2.0, 5.0, 3.0, 4.0};
 26 | 
 27 |   std::array<T, 9> host_inv;
 28 |   {
 29 |     cl::sycl::queue& q = create_queue();
 30 |     ml::matrix_t<T> sycl_data(host_data.data(), cl::sycl::range<2>(3, 3));
 31 |     sycl_data.set_final_data(nullptr);
 32 | 
 33 |     ml::matrix_t<T> sycl_inv{cl::sycl::range<2>(3, 3)};
 34 |     ml::mat_inv(q, sycl_data, sycl_inv);
 35 | 
 36 |     sycl_inv.set_final_data(host_inv.data());
 37 |     clear_eigen_device();
 38 |   }
 39 | 
 40 |   /*
 41 |   std::cout << "data:\n";
 42 |   ml::print(host_data, 3, 3);
 43 |   std::cout << "\ninv:\n";
 44 |   ml::print(host_inv, 3, 3);
 45 |   */
 46 | 
 47 |   std::array<T, 9> expected{-0.166667, 0.0333335, 0.233333,
 48 |                             0.166667,  -0.433333, -0.0333333,
 49 |                             0.0833333, 0.283333,  -0.0166667};
 50 |   assert_vec_almost_eq(host_inv, expected);
 51 | }
 52 | 
 53 | template <class T>
 54 | void test_inv_big() {
 55 |   static constexpr unsigned SIDE = 100;
 56 |   static constexpr unsigned SIZE = SIDE * SIDE;
 57 |   static constexpr T MAX = 1E2;
 58 |   std::array<T, SIZE> host_data;
 59 |   srand(time(0));
 60 |   std::generate(std::begin(host_data), std::end(host_data), [=]() {
 61 |     return MAX * ((2 * (static_cast<T>(rand()) / RAND_MAX)) - 1);
 62 |   });
 63 | 
 64 |   // Make the input matrix diagonally dominant to ensure that it is invertible
 65 |   for (unsigned r = 0; r < SIDE; ++r) {
 66 |     T abs_max = host_data[r];
 67 |     for (unsigned c = 0; c < SIDE; ++c) {
 68 |       T abs_rc = std::abs(host_data[r * SIDE + c]);
 69 |       if (abs_rc > abs_max) {
 70 |         abs_max = abs_rc;
 71 |       }
 72 |     }
 73 |     auto& x = host_data[r * SIDE + r];
 74 |     x = cl::sycl::sign(x) * (std::abs(x) + abs_max);
 75 |   }
 76 | 
 77 |   std::array<T, SIZE> host_diff;
 78 |   {
 79 |     cl::sycl::queue& q = create_queue();
 80 |     cl::sycl::range<2> rng(SIDE, SIDE);
 81 |     ml::matrix_t<T> sycl_data(host_data.data(), rng);
 82 |     sycl_data.set_final_data(nullptr);
 83 | 
 84 |     ml::matrix_t<T> sycl_inv{rng};
 85 |     ml::mat_inv(q, sycl_data, sycl_inv);
 86 |     // ml::write_bmp_grayscale("inv_" + std::to_string(SIDE), sycl_inv, true,
 87 |     // true);
 88 | 
 89 |     ml::matrix_t<T> multiplication{rng};
 90 |     ml::mat_mul(q, sycl_data, sycl_inv, multiplication);
 91 |     // ml::write_bmp_grayscale("inv_multiplication_" + std::to_string(SIDE),
 92 |     // multiplication, true, true);
 93 | 
 94 |     ml::matrix_t<T> identity{rng};
 95 |     ml::eye(q, identity);
 96 |     ml::matrix_t<T> diff{rng};
 97 |     ml::sycl_copy(q, identity, diff);
 98 |     ml::mat_inplace_binary_op(q, diff, multiplication, std::minus<T>());
 99 |     // ml::write_bmp_grayscale("inv_diff_" + std::to_string(SIDE), diff, true,
100 |     // true);
101 |     diff.set_final_data(host_diff.data());
102 |     clear_eigen_device();
103 |   }
104 | 
105 |   for (unsigned i = 0; i < SIZE; ++i) {
106 |     assert_almost_eq(host_diff[i], T(0), T(1E-3));
107 |   }
108 | }
109 | 
110 | template <class T>
111 | void test_tri_inv() {
112 |   std::array<T, 16> host_data{1.0, 2.0, 3.0, 4.0, 0.0, 5.0, 6.0, 7.0,
113 |                               0.0, 0.0, 8.0, 9.0, 0.0, 0.0, 0.0, 10.0};
114 | 
115 |   std::array<T, 16> host_inv;
116 |   {
117 |     cl::sycl::queue& q = create_queue();
118 |     ml::matrix_t<T> sycl_data(host_data.data(), cl::sycl::range<2>(4, 4));
119 |     sycl_data.set_final_data(nullptr);
120 |     ml::matrix_t<T> sycl_inv{cl::sycl::range<2>(4, 4)};
121 |     ml::tri_inv(q, sycl_data, sycl_inv);
122 |     sycl_inv.set_final_data(host_inv.data());
123 |     clear_eigen_device();
124 |   }
125 | 
126 |   /*
127 |   std::cout << "data:\n";
128 |   ml::print(host_data, 4, 4);
129 |   std::cout << "\ninv:\n";
130 |   ml::print(host_inv, 4, 4);
131 |   */
132 | 
133 |   std::array<T, 16> expected{1.0,   -0.4,   -0.075, -0.0525, 0.0,   0.2,
134 |                              -0.15, -0.005, 0.0,    0.0,     0.125, -0.1125,
135 |                              0.0,   0.0,    0.0,    0.1};
136 |   assert_vec_almost_eq(host_inv, expected);
137 | }
138 | 
139 | template <class T>
140 | void test_tri_inv_big() {
141 |   static constexpr unsigned SIDE = 64;
142 |   static constexpr unsigned SIZE = SIDE * SIDE;
143 |   std::array<T, SIZE> host_data;
144 |   for (unsigned r = 0; r < SIDE; ++r) {
145 |     for (unsigned c = 0; c < SIDE; ++c) {
146 |       host_data[r * SIDE + c] = r > c ? 0 : r * SIDE + c + 1;
147 |     }
148 |   }
149 | 
150 |   std::array<T, SIZE> host_diff;
151 |   {
152 |     cl::sycl::queue& q = create_queue();
153 |     cl::sycl::range<2> rng(SIDE, SIDE);
154 |     ml::matrix_t<T> sycl_data(host_data.data(), rng);
155 |     sycl_data.set_final_data(nullptr);
156 | 
157 |     ml::matrix_t<T> sycl_tri_inv{rng};
158 |     ml::tri_inv(q, sycl_data, sycl_tri_inv);
159 |     // ml::write_bmp_grayscale("tri_inv_" + std::to_string(SIDE), sycl_tri_inv,
160 |     // true, true);
161 | 
162 |     ml::matrix_t<T> multiplication{rng};
163 |     ml::mat_mul(q, sycl_data, sycl_tri_inv, multiplication);
164 |     // ml::write_bmp_grayscale("tri_inv_multiplication_" + std::to_string(SIDE),
165 |     // multiplication, true, true);
166 | 
167 |     ml::matrix_t<T> identity{rng};
168 |     ml::eye(q, identity);
169 |     ml::matrix_t<T> diff{rng};
170 |     ml::sycl_copy(q, identity, diff);
171 |     ml::mat_inplace_binary_op(q, diff, multiplication, std::minus<T>());
172 |     // ml::write_bmp_grayscale("tri_inv_diff_" + std::to_string(SIDE), diff,
173 |     // true, true);
174 | 
175 |     diff.set_final_data(host_diff.data());
176 |     clear_eigen_device();
177 |   }
178 | 
179 |   for (unsigned i = 0; i < SIZE; ++i) {
180 |     assert_almost_eq(host_diff[i], T(0), T(1E-2));
181 |   }
182 | }
183 | 
184 | template <class T>
185 | void test_all() {
186 |   test_inv<T>();
187 |   // test_inv_big<T>();
188 |   test_tri_inv<T>();
189 |   // test_tri_inv_big<T>();
190 | }
191 | 
192 | int main(void) {
193 |   try {
194 |     test_all<float>();
195 | #ifdef SYCLML_TEST_DOUBLE
196 |     test_all<double>();
197 | #endif
198 |   } catch (cl::sycl::exception e) {
199 |     std::cerr << e.what();
200 |   }
201 | 
202 |   return 0;
203 | }
204 | 


--------------------------------------------------------------------------------
/tests/src/math/test_mat_mul.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) Codeplay Software Limited.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *   http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | #include <iostream>
 17 | #include <random>
 18 | 
 19 | #include "ml/math/mat_mul.hpp"
 20 | #include "utils/utils.hpp"
 21 | 
 22 | template <class T>
 23 | void test_square() {
 24 |   std::array<T, 4> m1{1.0, 2.0, 3.0, 4.0};
 25 |   std::array<T, 4> m2{-1.0, 1.0, 5.0, -2.0};
 26 |   std::array<T, 4> m3;
 27 | 
 28 |   {
 29 |     cl::sycl::queue& q = create_queue();
 30 |     ml::matrix_t<T> m1_buffer(m1.data(), cl::sycl::range<2>(2, 2));
 31 |     m1_buffer.set_final_data(nullptr);
 32 |     ml::matrix_t<T> m2_buffer(m2.data(), cl::sycl::range<2>(2, 2));
 33 |     m2_buffer.set_final_data(nullptr);
 34 |     ml::matrix_t<T> out_buffer(cl::sycl::range<2>(2, 2));
 35 |     ml::mat_mul(q, m1_buffer, m2_buffer, out_buffer);
 36 |     out_buffer.set_final_data(m3.data());
 37 |     clear_eigen_device();
 38 |   }
 39 | 
 40 |   /*
 41 |   std::cout << "m1:\n";
 42 |   ml::print(m1, 2, 2);
 43 |   std::cout << "\nm2:\n";
 44 |   ml::print(m2, 2, 2);
 45 |   std::cout << "\nm3:\n";
 46 |   ml::print(m3, 2, 2);
 47 |   */
 48 | 
 49 |   assert_vec_almost_eq(m3, {9.0, -3.0, 17.0, -5.0});
 50 | }
 51 | 
 52 | template <class T>
 53 | void test_general() {
 54 |   std::array<T, 6> m1{1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
 55 |   std::array<T, 3> m2{-1.0, 5.0, 2.0};
 56 |   std::array<T, 2> m3;
 57 | 
 58 |   {
 59 |     cl::sycl::queue& q = create_queue();
 60 |     ml::matrix_t<T> m1_buffer(m1.data(), cl::sycl::range<2>(2, 3));
 61 |     m1_buffer.set_final_data(nullptr);
 62 |     ml::matrix_t<T> m2_buffer(m2.data(), cl::sycl::range<2>(3, 1));
 63 |     m2_buffer.set_final_data(nullptr);
 64 |     ml::matrix_t<T> out_buffer(cl::sycl::range<2>(2, 1));
 65 |     ml::mat_mul(q, m1_buffer, m2_buffer, out_buffer);
 66 |     out_buffer.set_final_data(m3.data());
 67 |     clear_eigen_device();
 68 |   }
 69 | 
 70 |   /*
 71 |   std::cout << "m1:\n";
 72 |   ml::print(m1, 2, 3);
 73 |   std::cout << "\nm2:\n";
 74 |   ml::print(m2, 3, 1);
 75 |   std::cout << "\nm3:\n";
 76 |   ml::print(m3, 2, 1);
 77 |   */
 78 | 
 79 |   assert_vec_almost_eq(m3, {15.0, 33.0});
 80 | }
 81 | 
 82 | template <class T>
 83 | void test_all() {
 84 |   test_square<T>();
 85 |   test_general<T>();
 86 | }
 87 | 
 88 | int main() {
 89 |   try {
 90 |     test_all<float>();
 91 | #ifdef SYCLML_TEST_DOUBLE
 92 |     test_all<double>();
 93 | #endif
 94 |   } catch (cl::sycl::exception e) {
 95 |     std::cerr << e.what();
 96 |   }
 97 | 
 98 |   return 0;
 99 | }
100 | 


--------------------------------------------------------------------------------
/tests/src/math/test_qr.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) Codeplay Software Limited.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *   http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | #include <iostream>
 17 | 
 18 | #include "ml/math/cov.hpp"
 19 | #include "ml/math/mat_mul.hpp"
 20 | #include "ml/math/mat_ops.hpp"
 21 | #include "ml/math/qr.hpp"
 22 | #include "utils/utils.hpp"
 23 | 
 24 | template <class T>
 25 | void test_small_qr() {
 26 |   static constexpr unsigned NB_OBS = 5;
 27 |   static constexpr unsigned DATA_DIM = 3;
 28 |   std::array<T, NB_OBS * DATA_DIM> host_data{1.0,  4.0,  7.0, 2.0,  0.0,
 29 |                                              -8.0, 1.0,  2.0, 1.0,  -3.0,
 30 |                                              -1.0, -1.0, 0.0, -9.0, 6.0};
 31 | 
 32 |   std::array<T, NB_OBS * DATA_DIM> host_qr;
 33 |   {
 34 |     cl::sycl::queue& q = create_queue();
 35 |     ml::matrix_t<T> sycl_data(host_data.data(),
 36 |                               cl::sycl::range<2>(NB_OBS, DATA_DIM));
 37 |     qr(q, sycl_data);
 38 |     sycl_data.set_final_data(host_qr.data());
 39 |     clear_eigen_device();
 40 |   }
 41 | 
 42 |   /*
 43 |   std::cout << "host data:\n";
 44 |   ml::print(host_data, NB_OBS, DATA_DIM);
 45 |   std::cout << "\nhost R:\n";
 46 |   ml::print(host_qr, NB_OBS, DATA_DIM);
 47 |   */
 48 | 
 49 |   // Multiple correct results are possible. Each row can be multiplied by -1.
 50 |   // In the current implementation all values on the diagonal are positive.
 51 |   // Only test the upper triangle matrix as the rest can have any value.
 52 |   assert_almost_eq(host_qr[0], T(3.87298));
 53 |   assert_almost_eq(host_qr[1], T(2.32379));
 54 |   assert_almost_eq(host_qr[2], T(-1.29099));
 55 |   assert_almost_eq(host_qr[4], T(9.82853));
 56 |   assert_almost_eq(host_qr[5], T(-2.03489));
 57 |   assert_almost_eq(host_qr[8], T(12.04959));
 58 | }
 59 | 
 60 | template <class T>
 61 | void test_qr_square() {
 62 |   static constexpr unsigned N = 2;
 63 |   static constexpr T DET_SIGN = -((N % 2) * 2) + 1;
 64 |   std::array<T, N * N> host_data;
 65 | 
 66 |   // Generate a random matrix with determinant 1
 67 |   fill_random(host_data, T(0.0), T(1.0));
 68 |   T det_data = compute_det(host_data);
 69 |   if (det_data < 0) {
 70 |     det_data *= -1;
 71 |     std::transform(begin(host_data), begin(host_data) + N, begin(host_data),
 72 |                    [](T x) { return -x; });
 73 |   }
 74 |   T factor = std::pow(det_data, -T(1.0) / N);
 75 |   std::transform(begin(host_data), end(host_data), begin(host_data),
 76 |                  [factor](T x) { return factor * x; });
 77 |   det_data = compute_det(host_data);
 78 |   assert_almost_eq(det_data, T(1));
 79 | 
 80 |   std::array<T, N * N> host_qr;
 81 |   T det_r;
 82 |   {
 83 |     cl::sycl::queue& q = create_queue();
 84 |     ml::matrix_t<T> sycl_data(host_data.data(), cl::sycl::range<2>(N, N));
 85 |     ml::qr(q, sycl_data);
 86 |     det_r = DET_SIGN * reduce_diag(q, sycl_data, 0, T(1), std::multiplies<T>());
 87 |     sycl_data.set_final_data(host_qr.data());
 88 |     clear_eigen_device();
 89 |   }
 90 | 
 91 |   /*
 92 |   std::cout << "host data:\n";
 93 |   ml::print(host_data, N, N);
 94 |   std::cout << "\nhost R:\n";
 95 |   ml::print(host_qr, N, N);
 96 |   std::cout << "\ndeterminant: " << det_r << std::endl;
 97 |   */
 98 | 
 99 |   assert_almost_eq(det_r, DET_SIGN * host_qr[0] * host_qr[3]);
100 |   assert_almost_eq(det_r, det_data);
101 | }
102 | 
103 | class MLNormalizeR;
104 | template <class T>
105 | void test_qr() {
106 |   static constexpr unsigned NB_OBS = 103;
107 |   static constexpr unsigned DATA_DIM = 64;
108 |   std::array<T, NB_OBS * DATA_DIM> host_data;
109 |   fill_random(host_data, T(-10), T(10));
110 | 
111 |   std::array<T, DATA_DIM * DATA_DIM> host_cov;
112 |   std::array<T, DATA_DIM * DATA_DIM> host_r2;
113 |   {
114 |     cl::sycl::queue& q = create_queue();
115 |     ml::matrix_t<T> sycl_data(host_data.data(),
116 |                               cl::sycl::range<2>(NB_OBS, DATA_DIM));
117 |     sycl_data.set_final_data(nullptr);
118 | 
119 |     // Center data
120 |     ml::vector_t<T> sycl_data_avg((cl::sycl::range<1>(DATA_DIM)));
121 |     ml::avg(q, sycl_data, sycl_data_avg);
122 |     ml::center_data<ml::COL>(q, sycl_data, sycl_data_avg);
123 | 
124 |     // Expected cov
125 |     ml::matrix_t<T> sycl_cov(cl::sycl::range<2>(DATA_DIM, DATA_DIM));
126 |     ml::cov(q, sycl_data, sycl_cov);
127 | 
128 |     // QR
129 |     ml::qr(q, sycl_data);
130 |     ml::matrix_t<T> sycl_r(cl::sycl::range<2>(DATA_DIM, DATA_DIM));
131 |     q.submit([&sycl_data, &sycl_r](cl::sycl::handler& cgh) {
132 |       auto old_r_acc =
133 |           sycl_data.template get_access_2d<cl::sycl::access::mode::read>(cgh);
134 |       auto new_r_acc =
135 |           sycl_r.template get_access_2d<cl::sycl::access::mode::discard_write>(
136 |               cgh);
137 |       cgh.parallel_for<ml::NameGen<0, MLNormalizeR, T>>(
138 |           sycl_r.get_nd_range(), [=](cl::sycl::nd_item<2> item) {
139 |             auto row = item.get_global_id(0);
140 |             auto col = item.get_global_id(1);
141 |             new_r_acc(row, col) =
142 |                 col >= row ? old_r_acc(row, col) / cl::sycl::sqrt(T(NB_OBS))
143 |                            : 0;
144 |           });
145 |     });
146 | 
147 |     // Reconstructed cov
148 |     ml::matrix_t<T> sycl_r2(cl::sycl::range<2>(DATA_DIM, DATA_DIM));
149 |     ml::mat_mul<ml::TR, ml::LIN>(q, sycl_r, sycl_r, sycl_r2);
150 | 
151 |     sycl_cov.set_final_data(host_cov.data());
152 |     sycl_r2.set_final_data(host_r2.data());
153 |     clear_eigen_device();
154 |   }
155 | 
156 |   assert_vec_almost_eq(host_r2, host_cov, T(1E-3));
157 | }
158 | 
159 | template <class T>
160 | void test_all() {
161 |   test_small_qr<T>();
162 |   test_qr_square<T>();
163 |   test_qr<T>();
164 | }
165 | 
166 | int main(void) {
167 |   try {
168 |     test_all<float>();
169 | #ifdef SYCLML_TEST_DOUBLE
170 |     test_all<double>();
171 | #endif
172 |   } catch (cl::sycl::exception e) {
173 |     std::cerr << e.what();
174 |   }
175 | 
176 |   return 0;
177 | }
178 | 


--------------------------------------------------------------------------------
/tests/src/math/test_svd.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) Codeplay Software Limited.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *   http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | #include <iostream>
 17 | 
 18 | #include "ml/math/mat_mul.hpp"
 19 | #include "ml/math/mat_ops.hpp"
 20 | #include "ml/math/svd.hpp"
 21 | #include "utils/utils.hpp"
 22 | 
 23 | template <class T, ml::data_dim D>
 24 | void test_svd_general() {
 25 |   constexpr auto NB_OBS = 4LU;
 26 |   constexpr auto ACT_SIZE_OBS = NB_OBS;
 27 |   constexpr auto SIZE_OBS_POW2 = ACT_SIZE_OBS;
 28 |   std::array<T, NB_OBS * SIZE_OBS_POW2> host_data{
 29 |       1.0, 2.0, 0.0,  -3.0, 2.0,  -5.0, 2.0,  1.0,
 30 |       0.0, 2.0, -1.0, -1.0, -3.0, 1.0,  -1.0, 3.0};
 31 | 
 32 |   constexpr auto NB_VEC = ACT_SIZE_OBS;
 33 | 
 34 |   std::array<T, NB_OBS * NB_VEC> host_V;
 35 |   std::array<T, NB_VEC> host_L;
 36 |   std::array<T, NB_VEC * SIZE_OBS_POW2> host_U;
 37 |   std::array<T, NB_OBS * SIZE_OBS_POW2> host_residual;
 38 |   std::array<T, NB_OBS * SIZE_OBS_POW2> host_data_svd;
 39 |   std::array<T, NB_OBS * SIZE_OBS_POW2> host_centered_data;
 40 |   {
 41 |     cl::sycl::queue& q = create_queue();
 42 |     ml::matrix_t<T> sycl_data(host_data.data(),
 43 |                               cl::sycl::range<2>(NB_OBS, SIZE_OBS_POW2));
 44 |     sycl_data.data_range = cl::sycl::range<2>(NB_OBS, ACT_SIZE_OBS);
 45 |     ml::vector_t<T> sycl_data_avg(cl::sycl::range<1>(ACT_SIZE_OBS),
 46 |                                   ml::get_optimal_nd_range(SIZE_OBS_POW2));
 47 | 
 48 |     ml::avg<D>(q, sycl_data, sycl_data_avg);
 49 |     ml::center_data<ml::opp<D>()>(q, sycl_data, sycl_data_avg);
 50 |     ml::matrix_t<T> sycl_centered_data(sycl_data.data_range,
 51 |                                        sycl_data.kernel_range);
 52 |     ml::sycl_copy(q, sycl_data, sycl_centered_data);
 53 | 
 54 |     auto VLU = ml::svd<true, true, true>(q, sycl_data);
 55 |     auto& sycl_U = VLU.U;
 56 |     auto& sycl_V = VLU.V;
 57 |     auto& vec_L = VLU.L;
 58 |     ml::assert_rng_eq({NB_OBS, NB_VEC}, sycl_U.data_range);
 59 |     ml::assert_eq(NB_VEC, vec_L.size());
 60 |     ml::assert_rng_eq({NB_VEC, ACT_SIZE_OBS}, sycl_V.data_range);
 61 | 
 62 |     std::copy(std::begin(vec_L), std::end(vec_L), std::begin(host_L));
 63 |     ml::vector_t<T> sycl_L(host_L.data(), cl::sycl::range<1>(host_L.size()));
 64 |     sycl_L.set_final_data(nullptr);
 65 | 
 66 |     ml::matrix_t<T> sycl_data_svd(sycl_data.data_range, sycl_data.kernel_range);
 67 |     ml::matrix_t<T> sycl_copy_V(sycl_V.data_range, sycl_V.kernel_range);
 68 |     ml::sycl_copy(q, sycl_V, sycl_copy_V);
 69 |     ml::mat_vec_apply_op(q, sycl_copy_V, sycl_L,
 70 |                          std::multiplies<T>());  // diag(L) * V
 71 |     ml::mat_mul(q, sycl_U, sycl_copy_V, sycl_data_svd);
 72 |     ml::mat_inplace_binary_op(q, sycl_data_svd, sycl_data,
 73 |                               std::plus<T>());  // Add residual
 74 | 
 75 |     sycl_data.set_final_data(host_residual.data());
 76 |     sycl_centered_data.set_final_data(host_centered_data.data());
 77 |     sycl_data_svd.set_final_data(host_data_svd.data());
 78 |     sycl_U.set_final_data(host_U.data());
 79 |     sycl_V.set_final_data(host_V.data());
 80 |     clear_eigen_device();
 81 |   }
 82 | 
 83 |   /*
 84 |   std::cout << "host data:\n";
 85 |   ml::print(host_data, NB_OBS, SIZE_OBS_POW2);
 86 |   std::cout << "\nU:\n";
 87 |   ml::print(host_U, NB_VEC, SIZE_OBS_POW2);
 88 |   std::cout << "\nL:\n";
 89 |   ml::print(host_L, 1, NB_VEC);
 90 |   std::cout << "\nV:\n";
 91 |   ml::print(host_V, NB_OBS, NB_VEC);
 92 |   std::cout << "\nR:\n";
 93 |   ml::print(host_residual, NB_OBS, SIZE_OBS_POW2);
 94 |   std::cout << "\ndata svd:\n";
 95 |   ml::print(host_data_svd, NB_OBS, SIZE_OBS_POW2);
 96 |   */
 97 | 
 98 |   assert_vec_almost_eq(host_centered_data, host_data_svd);
 99 |   for (unsigned i = 0; i < NB_OBS * SIZE_OBS_POW2; ++i) {
100 |     assert_almost_eq(host_residual[i], T(0));
101 |   }
102 | }
103 | 
104 | int main(void) {
105 |   try {
106 |     test_svd_general<float, ml::LIN>();
107 | #ifdef SYCLML_TEST_DOUBLE
108 |     test_svd_general<double, ml::LIN>();
109 | #endif
110 |   } catch (cl::sycl::exception e) {
111 |     std::cerr << e.what();
112 |   }
113 | 
114 |   return 0;
115 | }
116 | 


--------------------------------------------------------------------------------
/tests/src/math/test_tr_op.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) Codeplay Software Limited.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *   http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | #include <iostream>
 17 | #include <random>
 18 | 
 19 | #include "ml/math/mat_ops.hpp"
 20 | #include "utils/utils.hpp"
 21 | 
 22 | template <class T>
 23 | void test_lin_tr_inplace_mat_op() {
 24 |   static constexpr size_t M = 10;
 25 |   static constexpr size_t N = 2;
 26 | 
 27 |   std::array<T, M * N> m1;
 28 |   std::array<T, M * N> expected_m1;
 29 |   std::array<T, N * M> m2;
 30 | 
 31 |   srand(time(0));
 32 |   static constexpr T MAX = 1E2;
 33 |   fill_random(m1, -MAX, MAX);
 34 | 
 35 |   // Expect first column unchanged, second multiplied by 2
 36 |   for (size_t i = 0; i < M; ++i) {
 37 |     for (size_t j = 0; j < N; ++j) {
 38 |       m2[j * M + i] = j + 1;
 39 |       expected_m1[i * N + j] = m1[i * N + j] * m2[j * M + i];
 40 |     }
 41 |   }
 42 | 
 43 |   {
 44 |     cl::sycl::queue& q = create_queue();
 45 |     ml::matrix_t<T> m1_buffer(m1.data(), cl::sycl::range<2>(M, N));
 46 |     ml::matrix_t<T> m2_buffer(m2.data(), cl::sycl::range<2>(N, M));  // TR
 47 |     m2_buffer.set_final_data(nullptr);
 48 | 
 49 |     ml::mat_inplace_binary_op<ml::LIN, ml::TR>(q, m1_buffer, m2_buffer,
 50 |                                                std::multiplies<T>());
 51 |     clear_eigen_device();
 52 |   }
 53 | 
 54 |   assert_vec_almost_eq(m1, expected_m1);
 55 | }
 56 | 
 57 | template <class T>
 58 | void test_tr_lin_inplace_mat_op() {
 59 |   static constexpr size_t M = 2;
 60 |   static constexpr size_t N = 10;
 61 | 
 62 |   std::array<T, M * N> m1;
 63 |   std::array<T, M * N> expected_m1;
 64 |   std::array<T, N * M> m2;
 65 | 
 66 |   srand(time(0));
 67 |   static constexpr T MAX = 1E2;
 68 |   fill_random(m1, -MAX, MAX);
 69 | 
 70 |   // Expect first column unchanged, second multiplied by 2
 71 |   for (size_t i = 0; i < M; ++i) {
 72 |     for (size_t j = 0; j < N; ++j) {
 73 |       m2[j * M + i] = i + 1;
 74 |       expected_m1[i * N + j] = m1[i * N + j] * m2[j * M + i];
 75 |     }
 76 |   }
 77 | 
 78 |   {
 79 |     cl::sycl::queue& q = create_queue();
 80 |     ml::matrix_t<T> m1_buffer(m1.data(), cl::sycl::range<2>(M, N));  // TR
 81 |     ml::matrix_t<T> m2_buffer(m2.data(), cl::sycl::range<2>(N, M));
 82 |     m2_buffer.set_final_data(nullptr);
 83 | 
 84 |     ml::mat_inplace_binary_op<ml::TR, ml::LIN>(q, m1_buffer, m2_buffer,
 85 |                                                std::multiplies<T>());
 86 |     clear_eigen_device();
 87 |   }
 88 | 
 89 |   assert_vec_almost_eq(m1, expected_m1);
 90 | }
 91 | 
 92 | template <class T>
 93 | void test_all() {
 94 |   test_lin_tr_inplace_mat_op<T>();
 95 |   test_tr_lin_inplace_mat_op<T>();
 96 | }
 97 | 
 98 | int main() {
 99 |   try {
100 |     test_all<float>();
101 | #ifdef SYCLML_TEST_DOUBLE
102 |     test_all<double>();
103 | #endif
104 |   } catch (cl::sycl::exception e) {
105 |     std::cerr << e.what();
106 |   }
107 | 
108 |   return 0;
109 | }
110 | 


--------------------------------------------------------------------------------
/tests/src/math/test_tri_solve.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) Codeplay Software Limited.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *   http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | #include <iostream>
 17 | #include <random>
 18 | 
 19 | #include "ml/math/tri_solve.hpp"
 20 | #include "utils/utils.hpp"
 21 | 
 22 | template <class T>
 23 | void test_tri_solve() {
 24 |   std::array<T, 9> host_A{1.0, 2.0, 3.0, 0.0, 4.0, 5.0, 0.0, 0.0, 6.0};
 25 |   std::array<T, 12> host_B{9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
 26 |                            3.0, 2.0, 1.0, 0.0, 1.0, 0.0};
 27 | 
 28 |   std::array<T, 12> host_Y;
 29 |   std::array<T, 12> host_X;
 30 | 
 31 |   {
 32 |     cl::sycl::queue& q = create_queue();
 33 |     ml::matrix_t<T> sycl_A(host_A.data(), cl::sycl::range<2>(3, 3));
 34 |     sycl_A.set_final_data(nullptr);
 35 |     ml::matrix_t<T> sycl_B(host_B.data(), cl::sycl::range<2>(3, 4));
 36 |     sycl_B.set_final_data(nullptr);
 37 | 
 38 |     ml::matrix_t<T> sycl_Y{cl::sycl::range<2>(3, 4)};
 39 |     ml::matrix_t<T> sycl_X{cl::sycl::range<2>(3, 4)};
 40 | 
 41 |     ml::tri_solve<ml::LIN, ml::TR>(q, sycl_Y, sycl_A, sycl_B);
 42 |     ml::tri_solve<ml::LIN, ml::LIN>(q, sycl_X, sycl_A, sycl_Y);
 43 | 
 44 |     sycl_Y.set_final_data(host_Y.data());
 45 |     sycl_X.set_final_data(host_X.data());
 46 |     clear_eigen_device();
 47 |   }
 48 | 
 49 |   /*
 50 |   std::cout << "Y:\n";
 51 |   ml::print(host_Y, 3, 4);
 52 |   std::cout << "\nX:\n";
 53 |   ml::print(host_X, 3, 4);
 54 |   */
 55 | 
 56 |   std::array<T, 12> expected_Y{9.0,   8.0,  7.0,    6.0,  -3.25,    -3.0,
 57 |                                -2.75, -2.5, -1.625, -1.5, -1.04167, -0.91667};
 58 |   std::array<T, 12> expected_X{10.76042, 9.62500,  8.46181,  7.32639,
 59 |                                -0.47396, -0.43750, -0.47049, -0.43403,
 60 |                                -0.27083, -0.25000, -0.17361, -0.15278};
 61 | 
 62 |   assert_vec_almost_eq(host_Y, expected_Y);
 63 |   assert_vec_almost_eq(host_X, expected_X);
 64 | }
 65 | 
 66 | template <class T>
 67 | void test_tri_solve_tr() {
 68 |   std::array<T, 9> host_A{1.0, 2.0, 3.0, 0.0, 4.0, 5.0, 0.0, 0.0, 6.0};
 69 |   std::array<T, 12> host_B{9.0, 5.0, 1.0, 8.0, 4.0, 0.0,
 70 |                            7.0, 3.0, 1.0, 6.0, 2.0, 0.0};
 71 | 
 72 |   std::array<T, 12> host_Y;
 73 |   std::array<T, 12> host_X;
 74 | 
 75 |   {
 76 |     cl::sycl::queue& q = create_queue();
 77 |     ml::matrix_t<T> sycl_A(host_A.data(), cl::sycl::range<2>(3, 3));
 78 |     sycl_A.set_final_data(nullptr);
 79 |     ml::matrix_t<T> sycl_B(host_B.data(), cl::sycl::range<2>(4, 3));
 80 |     sycl_B.set_final_data(nullptr);
 81 | 
 82 |     ml::matrix_t<T> sycl_Y{cl::sycl::range<2>(4, 3)};
 83 |     ml::matrix_t<T> sycl_X{cl::sycl::range<2>(4, 3)};
 84 | 
 85 |     ml::tri_solve<ml::TR, ml::TR>(q, sycl_Y, sycl_A, sycl_B);
 86 |     ml::tri_solve<ml::TR, ml::LIN>(q, sycl_X, sycl_A, sycl_Y);
 87 | 
 88 |     sycl_Y.set_final_data(host_Y.data());
 89 |     sycl_X.set_final_data(host_X.data());
 90 |     clear_eigen_device();
 91 |   }
 92 | 
 93 |   /*
 94 |   std::cout << "Y:\n";
 95 |   ml::print(host_Y, 4, 3);
 96 |   std::cout << "\nX:\n";
 97 |   ml::print(host_X, 4, 3);
 98 |   */
 99 | 
100 |   std::array<T, 12> expected_Y{9.0, -3.25, -1.625,   8.0, -3.0, -1.5,
101 |                                7.0, -2.75, -1.04167, 6.0, -2.5, -0.91667};
102 |   std::array<T, 12> expected_X{10.76042, -0.47396, -0.27083, 9.62500,
103 |                                -0.43750, -0.25000, 8.46181,  -0.47049,
104 |                                -0.17361, 7.32639,  -0.43403, -0.15278};
105 | 
106 |   assert_vec_almost_eq(host_Y, expected_Y);
107 |   assert_vec_almost_eq(host_X, expected_X);
108 | }
109 | 
110 | template <class T>
111 | void test_all() {
112 |   test_tri_solve<T>();
113 |   test_tri_solve_tr<T>();
114 | }
115 | 
116 | int main(void) {
117 |   try {
118 |     test_all<float>();
119 | #ifdef SYCLML_TEST_DOUBLE
120 |     test_all<double>();
121 | #endif
122 |   } catch (cl::sycl::exception e) {
123 |     std::cerr << e.what();
124 |   }
125 | 
126 |   return 0;
127 | }
128 | 


--------------------------------------------------------------------------------
/tests/src/svm/test_arg_extremum_cond.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) Codeplay Software Limited.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *   http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #include <iostream>
17 | 
18 | #include "ml/classifiers/svm/svm.hpp"
19 | #include "utils/utils.hpp"
20 | 
21 | template <class T>
22 | void test_argmin_cond() {
23 |   constexpr auto NB_ELT = 128LU;
24 |   constexpr auto EXPECTED_MIN_IDX = 13LU;
25 |   constexpr auto TRUE_MIN_IDX = 15LU;
26 |   std::array<T, NB_ELT> host_data;
27 |   fill_random(host_data, 0, 100);
28 |   host_data[EXPECTED_MIN_IDX] = -1;
29 |   host_data[TRUE_MIN_IDX] = -2;
30 | 
31 |   unsigned long min_idx;
32 |   {
33 |     cl::sycl::queue& q = create_queue();
34 |     ml::vector_t<T> sycl_data(host_data.data(), cl::sycl::range<1>(NB_ELT));
35 |     ml::vector_t<T> sycl_cond((cl::sycl::range<1>(NB_ELT)));
36 | 
37 |     ml::sycl_memset(q, sycl_cond, T(true));
38 |     // Ignore this index so it should not be returned
39 |     sycl_cond.write_from_host(TRUE_MIN_IDX, false);
40 | 
41 |     {
42 |       ml::vector_t<ml::SYCLIndexT> device_scalar(ml::range<1>(1));
43 |       auto eig_scalar = ml::sycl_to_eigen<1, 0>(device_scalar);
44 |       bool found =
45 |           ml::detail::argmin_cond(q, sycl_cond, sycl_data, eig_scalar, min_idx);
46 |       assert(found);
47 |     }
48 | 
49 |     sycl_data.set_final_data(nullptr);
50 |     clear_eigen_device();
51 |   }
52 | 
53 |   assert_eq(min_idx, EXPECTED_MIN_IDX);
54 | }
55 | 
56 | int main() {
57 |   try {
58 |     test_argmin_cond<float>();
59 | #ifdef SYCLML_TEST_DOUBLE
60 |     test_argmin_cond<double>();
61 | #endif
62 |   } catch (cl::sycl::exception e) {
63 |     std::cerr << e.what();
64 |   }
65 | 
66 |   return 0;
67 | }
68 | 


--------------------------------------------------------------------------------
/tests/src/svm/test_svm_linear.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) Codeplay Software Limited.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *   http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #include <iostream>
17 | 
18 | #include "ml/classifiers/svm/svm.hpp"
19 | #include "utils/utils.hpp"
20 | 
21 | template <class DataT, class LabelT>
22 | void test_svm_linear() {
23 |   /*
24 |    * Solves the OR problem, kernel can be linear.
25 |    *   y  0  1
26 |    * x
27 |    * 0    0  1
28 |    * 1    1  1
29 |    */
30 |   std::array<DataT, 8> host_data{0, 0, 0, 1, 1, 0, 1, 1};
31 |   std::vector<LabelT> host_labels{0, 1, 1, 1};
32 |   std::vector<DataT> host_alphas;
33 |   DataT host_rho;
34 | 
35 |   {
36 |     cl::sycl::queue& q = create_queue();
37 |     ml::matrix_t<DataT> sycl_data(host_data.data(), cl::sycl::range<2>(4, 2));
38 | 
39 |     ml::svm<ml::svm_linear_kernel<DataT>, LabelT> svm(10);
40 |     svm.train_binary(q, sycl_data, host_labels);
41 | 
42 |     auto smo_out = svm.get_smo_outs().front();
43 |     assert_eq(smo_out.alphas.data_range[0], 3LU);
44 |     host_alphas.resize(smo_out.alphas.get_kernel_size());
45 |     auto event =
46 |         ml::sycl_copy_device_to_host(q, smo_out.alphas, host_alphas.data());
47 |     event.wait_and_throw();
48 |     host_rho = smo_out.rho;
49 | 
50 |     sycl_data.set_final_data(nullptr);
51 |     clear_eigen_device();
52 |   }
53 | 
54 |   /*
55 |   std::cout << "alphas:\n";
56 |   ml::print(host_alphas.data(), 1, 3);
57 |   std::cout << "\nrho: " << host_rho << std::endl;
58 |   */
59 | 
60 |   std::array<DataT, 3> expected_alphas{-4, 2, 2};
61 |   assert_vec_almost_eq(host_alphas.data(), expected_alphas.data(),
62 |                        expected_alphas.size());
63 |   assert_almost_eq(host_rho, DataT(-1));
64 | }
65 | 
66 | int main() {
67 |   try {
68 |     test_svm_linear<float, uint8_t>();
69 | #ifdef SYCLML_TEST_DOUBLE
70 |     test_svm_linear<double, uint8_t>();
71 | #endif
72 |   } catch (cl::sycl::exception e) {
73 |     std::cerr << e.what();
74 |   }
75 | 
76 |   return 0;
77 | }
78 | 


--------------------------------------------------------------------------------
/tests/src/svm/test_svm_poly.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) Codeplay Software Limited.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *   http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #include <iostream>
17 | 
18 | #include "ml/classifiers/svm/svm.hpp"
19 | #include "utils/utils.hpp"
20 | 
21 | template <class DataT, class LabelT>
22 | void test_svm_poly() {
23 |   /*
24 |    * Solves the XOR problem, kernel has to be at least polynomial or more
25 |    * complex. y  0  1
26 |    * x
27 |    * 0    0  1
28 |    * 1    1  0
29 |    */
30 |   std::array<DataT, 8> host_data{0, 0, 0, 1, 1, 0, 1, 1};
31 |   std::vector<LabelT> host_labels{0, 1, 1, 0};
32 |   std::vector<DataT> host_alphas;
33 |   DataT host_rho;
34 | 
35 |   {
36 |     cl::sycl::queue& q = create_queue();
37 |     ml::matrix_t<DataT> sycl_data(host_data.data(), cl::sycl::range<2>(4, 2));
38 | 
39 |     using KernelType = ml::svm_polynomial_kernel<DataT>;
40 |     ml::svm<KernelType, LabelT> svm(1000, KernelType(1, 1, 2), 2, 1E-6);
41 |     svm.train_binary(q, sycl_data, host_labels);
42 | 
43 |     auto smo_out = svm.get_smo_outs().front();
44 |     assert_eq(smo_out.alphas.data_range[0], 4LU);
45 |     host_alphas.resize(smo_out.alphas.get_kernel_size());
46 |     auto event =
47 |         ml::sycl_copy_device_to_host(q, smo_out.alphas, host_alphas.data());
48 |     event.wait_and_throw();
49 |     host_rho = smo_out.rho;
50 | 
51 |     sycl_data.set_final_data(nullptr);
52 |     clear_eigen_device();
53 |   }
54 | 
55 |   /*
56 |   std::cout << "alphas:\n";
57 |   ml::print(host_alphas.data(), 1, 4);
58 |   std::cout << "\nrho: " << host_rho << std::endl;
59 |   */
60 | 
61 |   std::array<DataT, 4> expected_alphas{-3.332425, 2.665940, 2.665940,
62 |                                        -1.999455};
63 |   assert_vec_almost_eq(host_alphas.data(), expected_alphas.data(),
64 |                        expected_alphas.size(), DataT(1E-3));
65 |   assert_almost_eq(host_rho, DataT(-0.999728), DataT(1E-3));
66 | }
67 | 
68 | int main() {
69 |   try {
70 |     test_svm_poly<float, uint8_t>();
71 | #ifdef SYCLML_TEST_DOUBLE
72 |     test_svm_poly<double, uint8_t>();
73 | #endif
74 |   } catch (cl::sycl::exception e) {
75 |     std::cerr << e.what();
76 |   }
77 | 
78 |   return 0;
79 | }
80 | 


--------------------------------------------------------------------------------
/tests/src/utils/assert_utils.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) Codeplay Software Limited.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *   http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #ifndef TEST_SRC_UTILS_ASSERT_UTILS_HPP
17 | #define TEST_SRC_UTILS_ASSERT_UTILS_HPP
18 | 
19 | #include <array>
20 | #include <cmath>
21 | #include <iostream>
22 | 
23 | #undef NDEBUG
24 | #include <cassert>
25 | 
26 | #define EPS 1E-5
27 | 
28 | template <class T>
29 | void assert_eq(T actual, T expected) {
30 |   if (actual != expected) {
31 |     std::cerr << "Error: got " << actual << " expected " << expected
32 |               << std::endl;
33 |     assert(false);
34 |   }
35 | }
36 | 
37 | template <class T>
38 | void assert_almost_eq(T actual, T expected, const T eps = EPS) {
39 |   if (std::fabs(actual - expected) > eps) {
40 |     std::cerr << "Error: got " << actual << " expected " << expected
41 |               << std::endl;
42 |     assert(false);
43 |   }
44 | }
45 | 
46 | template <class T>
47 | void assert_vec_almost_eq(const T* actual, const T* expected, size_t size,
48 |                           const T eps = EPS) {
49 |   for (size_t i = 0; i < size; ++i) {
50 |     assert_almost_eq(actual[i], expected[i], eps);
51 |   }
52 | }
53 | 
54 | template <class T, size_t DIM>
55 | void assert_vec_almost_eq(const std::array<T, DIM>& actual,
56 |                           const std::array<T, DIM>& expected,
57 |                           const T eps = EPS) {
58 |   assert_vec_almost_eq(actual.data(), expected.data(), DIM, eps);
59 | }
60 | 
61 | template <class T, int DIM>
62 | void assert_vector_almost_eq_no_direction(const T* actual, const T* expected,
63 |                                           const T eps = EPS) {
64 |   T norm_pos = 0;
65 |   T norm_neg = 0;
66 |   for (unsigned i = 0; i < DIM; ++i) {
67 |     T diff = actual[i] - expected[i];
68 |     T sum = actual[i] + expected[i];
69 |     norm_pos += diff * diff;
70 |     norm_neg += sum * sum;
71 |   }
72 |   T norm = std::min(norm_neg, norm_pos);
73 |   assert_almost_eq(norm, 0.0f, eps);
74 | }
75 | 
76 | #endif  // TEST_SRC_UTILS_ASSERT_UTILS_HPP
77 | 


--------------------------------------------------------------------------------
/tests/src/utils/sycl_utils.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) Codeplay Software Limited.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *   http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #ifndef TEST_SRC_UTILS_SYCL_UTILS_HPP
17 | #define TEST_SRC_UTILS_SYCL_UTILS_HPP
18 | 
19 | #include "ml/utils/common.hpp"
20 | 
21 | class init_first_kernel;
22 | 
23 | /**
24 |  * @brief Used to avoid measuring OpenCL initialization overhead
25 |  * @param q
26 |  */
27 | void launch_first_kernel(cl::sycl::queue& q) {
28 |   q.submit([](cl::sycl::handler& cgh) {
29 |     cgh.single_task<init_first_kernel>([]() {});
30 |   });
31 | }
32 | 
33 | /**
34 |  * @brief Initialize device_constants and return the queue.
35 |  * @return the sycl queue
36 |  */
37 | cl::sycl::queue& create_queue() {
38 |   ml::device_constants<void>::instance = new ml::device_constants<void>();
39 |   auto& q = ml::get_eigen_device().sycl_queue();
40 |   launch_first_kernel(q);
41 |   return q;
42 | }
43 | 
44 | /**
45 |  * @brief Free the singleton device_constants.
46 |  */
47 | void clear_eigen_device() {
48 |   ml::get_eigen_device().synchronize();
49 |   delete ml::get_device_constants();
50 | }
51 | 
52 | #endif  // TEST_SRC_UTILS_SYCL_UTILS_HPP
53 | 


--------------------------------------------------------------------------------
/tests/src/utils/test_save_load.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) Codeplay Software Limited.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *   http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #include <iostream>
17 | 
18 | #include "ml/utils/buffer_t.hpp"
19 | #include "utils/utils.hpp"
20 | 
21 | template <class T>
22 | void test_save_load_host() {
23 |   constexpr size_t SIZE = 4;
24 |   std::array<T, SIZE> buf{-1, 0, -1.5, 0.5};
25 |   std::array<T, SIZE> res;
26 | 
27 |   ml::save_array(buf.data(), SIZE, "test_buf");
28 |   ml::load_array(res.data(), SIZE, "test_buf");
29 | 
30 |   /*
31 |   std::cout << "Saved: ";
32 |   ml::print(buf, 1, SIZE);
33 |   std::cout << "Loaded: ";
34 |   ml::print(res, 1, SIZE);
35 |   */
36 | 
37 |   assert_vec_almost_eq(res, buf);
38 | }
39 | 
40 | template <class T>
41 | void test_save_load_device() {
42 |   constexpr size_t SIZE = 6;
43 |   std::array<T, SIZE> buf{-10, 0, -1.5, 3, 3, 1};
44 |   std::array<T, SIZE> res;
45 | 
46 |   {
47 |     cl::sycl::queue& q = create_queue();
48 |     {
49 |       ml::matrix_t<T> sycl_buf(const_cast<const T*>(buf.data()),
50 |                                cl::sycl::range<2>(2, 3));
51 |       ml::save_array(q, sycl_buf, "test_buf");
52 |     }
53 |     ml::matrix_t<T> sycl_res(cl::sycl::range<2>(2, 3));
54 |     ml::load_array(q, sycl_res, "test_buf");
55 | 
56 |     sycl_res.set_final_data(res.data());
57 |     clear_eigen_device();
58 |   }
59 | 
60 |   /*
61 |   std::cout << "Saved: ";
62 |   ml::print(buf, 1, SIZE);
63 |   std::cout << "Loaded: ";
64 |   ml::print(res, 1, SIZE);
65 |   */
66 | 
67 |   assert_vec_almost_eq(res, buf);
68 | }
69 | 
70 | template <class T>
71 | void test_all() {
72 |   test_save_load_host<T>();
73 |   test_save_load_device<T>();
74 | }
75 | 
76 | int main() {
77 |   try {
78 |     test_all<float>();
79 | #ifdef SYCLML_TEST_DOUBLE
80 |     test_all<double>();
81 | #endif
82 |   } catch (cl::sycl::exception e) {
83 |     std::cerr << e.what();
84 |   }
85 | 
86 |   return 0;
87 | }
88 | 


--------------------------------------------------------------------------------
/tests/src/utils/utils.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) Codeplay Software Limited.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *   http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #ifndef TEST_SRC_UTILS_UTILS_HPP
17 | #define TEST_SRC_UTILS_UTILS_HPP
18 | 
19 | #include <random>
20 | 
21 | #include "assert_utils.hpp"
22 | #include "sycl_utils.hpp"
23 | 
24 | template <class T, class Array>
25 | void fill_random(Array& a, T min, T max) {
26 |   std::generate(begin(a), end(a), [=]() {
27 |     return (max - min) * (static_cast<T>(rand()) / RAND_MAX) + min;
28 |   });
29 | }
30 | 
31 | template <class T>
32 | T compute_det(const std::array<T, 4>& d) {
33 |   return d[0] * d[3] - d[2] * d[1];
34 | }
35 | 
36 | #endif  // TEST_SRC_UTILS_UTILS_HPP
37 | 


--------------------------------------------------------------------------------