├── .clang-format ├── .gitignore ├── CMakeLists.txt ├── CODE_OF_CONDUCT.md ├── Doxyfile ├── LICENSE ├── README.md ├── cmake ├── HandleEigen.cmake └── Modules │ ├── ComputeCppCompilerChecks.cmake │ ├── ComputeCppIRMap.cmake │ ├── FindComputeCpp.cmake │ └── FindEigen.cmake ├── example ├── CMakeLists.txt └── src │ ├── mnist │ ├── read_mnist.hpp │ ├── run_classifier.hpp │ ├── run_gauss_classifier.cpp │ ├── run_gmm.cpp │ ├── run_lin_classifier.cpp │ └── run_svm.cpp │ └── utils │ ├── scoped_timer.hpp │ └── sycl_utils.hpp ├── include └── ml │ ├── classifiers │ ├── bayes │ │ ├── bayes_classifier.hpp │ │ ├── distributions │ │ │ └── log_gaussian_distribution.hpp │ │ └── linear_classifier.hpp │ ├── classifier.hpp │ ├── data_splitter.hpp │ ├── data_splitter_extremum_dist.hpp │ ├── em │ │ ├── em_classifier.hpp │ │ └── log_model_per_label.hpp │ ├── extremum_dist.hpp │ └── svm │ │ ├── kernel_cache.hpp │ │ ├── smo.hpp │ │ ├── svm.hpp │ │ └── svm_kernels.hpp │ ├── eigen │ ├── eigen.hpp │ └── sycl_to_eigen.hpp │ ├── math │ ├── cov.hpp │ ├── functors.hpp │ ├── helper.hpp │ ├── mat_inv.hpp │ ├── mat_mul.hpp │ ├── mat_ops.hpp │ ├── qr.hpp │ ├── svd.hpp │ ├── tri_inv.hpp │ ├── tri_solve.hpp │ └── vec_ops.hpp │ ├── preprocess │ ├── apply_pca.hpp │ └── pca.hpp │ └── utils │ ├── access.hpp │ ├── buffer_acc.hpp │ ├── buffer_t.hpp │ ├── common.hpp │ ├── copy.hpp │ ├── debug │ ├── assert.hpp │ ├── print_utils.hpp │ └── write_bmp.hpp │ ├── device_constants.hpp │ ├── optimal_range.hpp │ ├── save_utils.hpp │ └── sycl_types.hpp └── tests ├── CMakeLists.txt └── src ├── math ├── test_center.cpp ├── test_cov.cpp ├── test_dot_product.cpp ├── test_inv.cpp ├── test_mat_mul.cpp ├── test_qr.cpp ├── test_svd.cpp ├── test_tr_op.cpp └── test_tri_solve.cpp ├── svm ├── test_arg_extremum_cond.cpp ├── test_svm_linear.cpp └── test_svm_poly.cpp └── utils ├── assert_utils.hpp ├── sycl_utils.hpp ├── test_save_load.cpp └── utils.hpp /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: Cpp 3 | # BasedOnStyle: Google 4 | AccessModifierOffset: -1 5 | AlignAfterOpenBracket: Align 6 | AlignConsecutiveAssignments: false 7 | AlignConsecutiveDeclarations: false 8 | AlignEscapedNewlines: Left 9 | AlignOperands: true 10 | AlignTrailingComments: true 11 | AllowAllParametersOfDeclarationOnNextLine: true 12 | AllowShortBlocksOnASingleLine: false 13 | AllowShortCaseLabelsOnASingleLine: false 14 | AllowShortFunctionsOnASingleLine: Inline 15 | AllowShortIfStatementsOnASingleLine: false 16 | AllowShortLoopsOnASingleLine: false 17 | AlwaysBreakAfterDefinitionReturnType: None 18 | AlwaysBreakAfterReturnType: None 19 | AlwaysBreakBeforeMultilineStrings: true 20 | AlwaysBreakTemplateDeclarations: true 21 | BinPackArguments: true 22 | BinPackParameters: true 23 | BraceWrapping: 24 | AfterClass: false 25 | AfterControlStatement: false 26 | AfterEnum: false 27 | AfterFunction: false 28 | AfterNamespace: false 29 | AfterObjCDeclaration: false 30 | AfterStruct: false 31 | AfterUnion: false 32 | AfterExternBlock: false 33 | BeforeCatch: false 34 | BeforeElse: false 35 | IndentBraces: false 36 | SplitEmptyFunction: true 37 | SplitEmptyRecord: true 38 | SplitEmptyNamespace: true 39 | BreakBeforeBinaryOperators: None 40 | BreakBeforeBraces: Attach 41 | BreakBeforeInheritanceComma: true 42 | BreakBeforeTernaryOperators: true 43 | BreakConstructorInitializersBeforeComma: false 44 | BreakConstructorInitializers: BeforeColon 45 | BreakStringLiterals: true 46 | ColumnLimit: 80 47 | CommentPragmas: '^ IWYU pragma:' 48 | CompactNamespaces: false 49 | ConstructorInitializerAllOnOneLineOrOnePerLine: true 50 | ConstructorInitializerIndentWidth: 4 51 | ContinuationIndentWidth: 4 52 | Cpp11BracedListStyle: true 53 | DerivePointerAlignment: false 54 | DisableFormat: false 55 | FixNamespaceComments: true 56 | ForEachMacros: 57 | - foreach 58 | - Q_FOREACH 59 | - BOOST_FOREACH 60 | IncludeBlocks: Preserve 61 | IncludeCategories: 62 | - Regex: '^' 63 | Priority: 2 64 | - Regex: '^<.*\.h>' 65 | Priority: 1 66 | - Regex: '^<.*' 67 | Priority: 2 68 | - Regex: '.*' 69 | Priority: 3 70 | IncludeIsMainRegex: '([-_](test|unittest))?$' 71 | IndentCaseLabels: true 72 | IndentPPDirectives: None 73 | IndentWidth: 2 74 | IndentWrappedFunctionNames: false 75 | KeepEmptyLinesAtTheStartOfBlocks: false 76 | MaxEmptyLinesToKeep: 1 77 | NamespaceIndentation: None 78 | PenaltyBreakAssignment: 2 79 | PenaltyBreakBeforeFirstCallParameter: 1 80 | PenaltyBreakComment: 300 81 | PenaltyBreakFirstLessLess: 120 82 | PenaltyBreakString: 1000 83 | PenaltyExcessCharacter: 1000000 84 | PenaltyReturnTypeOnItsOwnLine: 200 85 | PointerAlignment: Left 86 | ReflowComments: true 87 | SortIncludes: true 88 | SortUsingDeclarations: true 89 | SpaceAfterCStyleCast: true 90 | SpaceAfterTemplateKeyword: true 91 | SpaceBeforeAssignmentOperators: true 92 | SpaceBeforeParens: ControlStatements 93 | SpaceInEmptyParentheses: false 94 | SpacesBeforeTrailingComments: 2 95 | SpacesInAngles: false 96 | SpacesInContainerLiterals: true 97 | SpacesInCStyleCastParentheses: false 98 | SpacesInParentheses: false 99 | SpacesInSquareBrackets: false 100 | Standard: Cpp11 101 | TabWidth: 8 102 | UseTab: Never 103 | ... 104 | 105 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build*/ 2 | doc/ 3 | 4 | *~ 5 | *.bak 6 | *.swp 7 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (C) Codeplay Software Limited. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | cmake_minimum_required(VERSION 3.4.3) 16 | project(SYCL-ML) 17 | 18 | if(MSVC) 19 | message(WARNING "Windows support is only experimental for now") 20 | endif() 21 | 22 | list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/") 23 | list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/Modules/") 24 | 25 | # Configuration options controlling automatic downloading of dependencies. 26 | option(SYCLML_DOWNLOAD_EIGEN "Download Eigen headers" ON) 27 | option(SYCLML_DOWNLOAD_MISSING_DEPS 28 | "Download any dependencies which cannot be found" ON) 29 | 30 | # Eigen configuration options. 31 | option(SYCLML_EIGEN_LOCAL_MEM 32 | "Only compile the local memory versions of Eigen kernels" ON) 33 | option(SYCLML_EIGEN_NO_LOCAL_MEM 34 | "Only compile the no local memory versions of Eigen kernels" OFF) 35 | option(SYCLML_EIGEN_COMPRESS_NAMES 36 | "Compress Eigen SYCL kernel names" OFF) 37 | option(SYCLML_EIGEN_NO_BARRIER 38 | "Use Eigen matmul which does not use barriers (implies NO_LOCAL_MEM)" OFF) 39 | 40 | # ComputeCpp configuration options. 41 | include(FindComputeCpp) 42 | # Avoid namespace ambiguities with older compilers 43 | list(APPEND COMPUTECPP_USER_FLAGS -DCOMPUTECPP_DISABLE_SYCL_NAMESPACE_ALIAS) 44 | option(SYCLML_COMPUTECPP_USE_SERIAL_MEMOP 45 | "Replace memory operations (eg memset) in kernels with serial operations." OFF) 46 | if(NOT SYCLML_COMPUTECPP_USE_SERIAL_MEMOP) 47 | list(APPEND COMPUTECPP_USER_FLAGS -no-serial-memop) 48 | endif() 49 | set(COMPUTECPP_DRIVER_DEFAULT_VALUE OFF) 50 | if (NOT MSVC) 51 | set(COMPUTECPP_DRIVER_DEFAULT_VALUE ON) 52 | endif() 53 | option(SYCLML_COMPUTECPP_USE_COMPILER_DRIVER 54 | "Use ComputeCpp driver instead of a 2 steps compilation" 55 | ${COMPUTECPP_DRIVER_DEFAULT_VALUE} 56 | ) 57 | if(SYCLML_COMPUTECPP_USE_COMPILER_DRIVER) 58 | set(CMAKE_CXX_COMPILER ${ComputeCpp_DEVICE_COMPILER_EXECUTABLE}) 59 | endif() 60 | option(SYCLML_COMPUTECPP_SPLIT_MODULES 61 | "Split modules to speed up compilation." ON) 62 | if(NOT SYCLML_COMPUTECPP_SPLIT_MODULES) 63 | list(APPEND COMPUTECPP_USER_FLAGS -fsycl-split-modules=20) 64 | endif() 65 | 66 | # Tests configuration 67 | option(SYCLML_TEST_DOUBLE "Add tests using double type." OFF) 68 | 69 | # Set compile options for host and device compilers 70 | set(CMAKE_CXX_STANDARD 14) 71 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 72 | set(CMAKE_CXX_EXTENSIONS OFF) 73 | 74 | include(CheckCXXCompilerFlag) 75 | foreach(flag -Wall -Wextra -Wpedantic) 76 | check_cxx_compiler_flag(${flag} is_flag_supported) 77 | if(is_flag_supported) 78 | add_compile_options(${flag}) 79 | endif() 80 | endforeach() 81 | if(MSVC) 82 | add_compile_options(/bigobj) 83 | endif() 84 | 85 | include(HandleEigen) 86 | 87 | # Include SYCL-ML 88 | include_directories(${CMAKE_SOURCE_DIR}/include) 89 | add_library(sycl_ml INTERFACE) 90 | target_include_directories(sycl_ml INTERFACE 91 | $ 92 | ) 93 | target_link_libraries(sycl_ml INTERFACE 94 | ComputeCpp::ComputeCpp 95 | Eigen 96 | ) 97 | 98 | function(add_sycl_ml_executable SOURCE) 99 | get_filename_component(SOURCE_NAME_WE ${SOURCE} NAME_WE) 100 | get_filename_component(DIR ${SOURCE} DIRECTORY) 101 | string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}/" "${CMAKE_CURRENT_BINARY_DIR}/" OUTPUT_SYCL_DIR ${DIR}) 102 | file(MAKE_DIRECTORY ${OUTPUT_SYCL_DIR}) 103 | set(TARGET_NAME ${SOURCE_NAME_WE}) 104 | set(TARGET_NAME ${TARGET_NAME} PARENT_SCOPE) 105 | set(OUTPUT_SYCL_DIR ${OUTPUT_SYCL_DIR} PARENT_SCOPE) 106 | 107 | add_executable(${TARGET_NAME} ${SOURCE}) 108 | target_link_libraries(${TARGET_NAME} sycl_ml) 109 | set_property(TARGET ${TARGET_NAME} PROPERTY COMPUTECPP_INCLUDE_AFTER 1) 110 | # Workaround INTERFACE_SYSTEM_INCLUDE_DIRECTORIES not being propagated 111 | # Add Eigen as system include 112 | target_include_directories(${TARGET_NAME} SYSTEM PRIVATE $) 113 | add_sycl_to_target(TARGET ${TARGET_NAME} SOURCES ${SOURCE}) 114 | endfunction() 115 | 116 | add_subdirectory(example) 117 | 118 | include(CTest) 119 | if(BUILD_TESTING) 120 | enable_testing() 121 | add_subdirectory(tests) 122 | endif() 123 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression 9 | , level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies within all project spaces, and it also applies when 49 | an individual is representing the project or its community in public spaces. 50 | Examples of representing a project or community include using an official 51 | project e-mail address, posting via an official social media account, or acting 52 | as an appointed representative at an online or offline event. Representation of 53 | a project may be further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at 59 | [sycl@codeplay.com](mailto:sycl@codeplay.com). All complaints will be reviewed 60 | and investigated and will result in a response that is deemed necessary and 61 | appropriate to the circumstances. The project team is obligated to maintain 62 | confidentiality with regard to the reporter of an incident. Further details of 63 | specific enforcement policies may be posted separately. 64 | 65 | Project maintainers who do not follow or enforce the Code of Conduct in good 66 | faith may face temporary or permanent repercussions as determined by other 67 | members of the project's leadership. 68 | 69 | ## Attribution 70 | 71 | This Code of Conduct is adapted from the 72 | [Contributor Covenant](https://www.contributor-covenant.org/), version 1.4, 73 | available at 74 | https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 75 | 76 | --- 77 | 78 | If there are any issues or suggestions relating to the current set of rules, you 79 | can reach us at [sycl@codeplay.com](mailto:sycl@codeplay.com). 80 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SYCL-ML 2 | 3 | ## What is it? 4 | SYCL-ML is a framework providing simple classical machine learning algorithms using SYCL. 5 | It is meant to be accelerated on any OpenCL device supporting SPIR or SPIR-V. 6 | The following links give more details on what SYCL is: 7 | - https://www.khronos.org/sycl 8 | - https://developer.codeplay.com/computecppce/latest/sycl-guide-introduction 9 | 10 | ## What can it do? 11 | Some linear algebra operations had to be implemented such as: 12 | - **Matrix inversion** 13 | - **SVD decomposition** 14 | - **QR decomposition** 15 | 16 | In terms of machine learning related algorithms it includes: 17 | - **Principal Component Analysis**: used to reduce the dimensionality of a problem. 18 | - **Linear Classifier** (see naive Bayes classifier): classify assuming all variables are equally as important. 19 | - **Gaussian Classifier**: classify using the Gaussian distribution. 20 | - **Gaussian Mixture Model**: based on the EM algorithm, uses multiple Gaussian distribution for each labels. 21 | - **Support Vector Machine**: C-SVM with any kernel function. 22 | 23 | SYCL-ML is a header only library which makes it easy to integrate. 24 | 25 | More details on what the project implements and how it works can be found on our [website](https://www.codeplay.com/portal/12-21-17-alternative-machine-learning-algorithms-using-sycl-and-opencl). 26 | 27 | ## TODO list 28 | - Optimize **SVD** decomposition for faster PCA. The algorithm probably needs to be changed to compute eigenpairs differently. 29 | - Optimize **SVM** for GPU. More recent papers on SVM for GPU should be experimented. 30 | - Implement an **LDA** (or dimensionality reduction algorithms) which would be used as a preprocessing step similarly to a PCA. 31 | - Implement a **K-means** (or other clustering algorithms) which could be used to improve the initialization of the EM. 32 | - Add a proper way to select a SYCL device. 33 | 34 | ## Prerequisites 35 | SYCL-ML has been tested with: 36 | - Ubuntu 16.04, amdgpu pro driver 17.40 37 | - CMake 3.0 38 | - g++ 5.4 39 | - ComputeCpp 1.2.0 40 | 41 | ComputeCpp can be downloaded from the [CodePlay](https://www.codeplay.com/products/computesuite/computecpp) website. 42 | Once extracted, ComputeCpp path should be set as an environment variable to `COMPUTECPP_DIR` (usually `/usr/local/computecpp`). 43 | Alternatively, it can be given as an argument to cmake with `-DComputeCpp_DIR=path/to/computecpp`. 44 | 45 | ## Building 46 | Build all the targets with: 47 | ```bash 48 | mkdir build 49 | cd build 50 | cmake .. 51 | make 52 | ``` 53 | CMake will take care of downloading the Eigen dependency and MNIST dataset. 54 | On Unix it will automatically extract the MNIST dataset using `gunzip`. 55 | 56 | It is recommended to run the tests before running the examples: 57 | ```bash 58 | cd build/tests 59 | ctest --output-on-failure 60 | ``` 61 | 62 | The documentation can be built with `doxygen`. It requires `dot` from the `graphviz` package. Simply run: 63 | ```bash 64 | doxygen 65 | ``` 66 | 67 | ## Contributing 68 | The project is under the Apache 2.0 license. Any contribution is welcome! Also feel free to raise an issue for any 69 | questions or suggestions. 70 | -------------------------------------------------------------------------------- /cmake/HandleEigen.cmake: -------------------------------------------------------------------------------- 1 | # Copyright (C) Codeplay Software Limited. 2 | 3 | cmake_minimum_required(VERSION 3.2.2) 4 | 5 | if(NOT SYCLML_DOWNLOAD_EIGEN) 6 | find_package(Eigen) 7 | endif() 8 | 9 | if(NOT Eigen_FOUND AND (SYCLML_DOWNLOAD_EIGEN OR SYCLML_DOWNLOAD_MISSING_DEPS)) 10 | include(ExternalProject) 11 | set(EIGEN_REPO "https://gitlab.com/libeigen/eigen" CACHE STRING 12 | "Eigen repository to use" 13 | ) 14 | set(EIGEN_GIT_TAG "d0ae052" CACHE STRING 15 | "Git tag, branch or commit to use for the Eigen library" 16 | ) 17 | set(EIGEN_SOURCE_DIR ${PROJECT_BINARY_DIR}/Eigen-src) 18 | if(NOT TARGET Eigen_download) 19 | ExternalProject_Add(Eigen_download 20 | GIT_REPOSITORY ${EIGEN_REPO} 21 | GIT_TAG ${EIGEN_GIT_TAG} 22 | SOURCE_DIR ${EIGEN_SOURCE_DIR} 23 | CONFIGURE_COMMAND "" 24 | BUILD_COMMAND "" 25 | INSTALL_COMMAND "" 26 | TEST_COMMAND "" 27 | ) 28 | endif() 29 | set(EIGEN_INCLUDE_DIR ${EIGEN_SOURCE_DIR}) 30 | file(MAKE_DIRECTORY ${EIGEN_INCLUDE_DIR}) 31 | 32 | find_package(Eigen) 33 | add_dependencies(Eigen Eigen_download) 34 | mark_as_advanced(EIGEN_REPO EIGEN_GIT_TAG) 35 | endif() 36 | 37 | if(NOT Eigen_FOUND) 38 | message(FATAL_ERROR 39 | "Could not find Eigen, consider setting SYCLML_DOWNLOAD_MISSING_DEPS") 40 | endif() 41 | -------------------------------------------------------------------------------- /cmake/Modules/ComputeCppCompilerChecks.cmake: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.4.3) 2 | 3 | if(CMAKE_COMPILER_IS_GNUCXX) 4 | if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8) 5 | message(FATAL_ERROR "host compiler - gcc version must be > 4.8") 6 | endif() 7 | elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") 8 | if (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.6) 9 | message(FATAL_ERROR "host compiler - clang version must be > 3.6") 10 | endif() 11 | endif() 12 | 13 | if(MSVC) 14 | set(ComputeCpp_STL_CHECK_SRC __STL_check) 15 | file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/${ComputeCpp_STL_CHECK_SRC}.cpp 16 | "#include \n" 17 | "int main() { return 0; }\n") 18 | execute_process( 19 | COMMAND ${ComputeCpp_DEVICE_COMPILER_EXECUTABLE} 20 | ${COMPUTECPP_DEVICE_COMPILER_FLAGS} 21 | -isystem ${ComputeCpp_INCLUDE_DIRS} 22 | -o ${ComputeCpp_STL_CHECK_SRC}.sycl 23 | -c ${ComputeCpp_STL_CHECK_SRC}.cpp 24 | WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} 25 | RESULT_VARIABLE ComputeCpp_STL_CHECK_RESULT 26 | ERROR_QUIET 27 | OUTPUT_QUIET) 28 | if(NOT ${ComputeCpp_STL_CHECK_RESULT} EQUAL 0) 29 | # Try disabling compiler version checks 30 | execute_process( 31 | COMMAND ${ComputeCpp_DEVICE_COMPILER_EXECUTABLE} 32 | ${COMPUTECPP_DEVICE_COMPILER_FLAGS} 33 | -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH 34 | -isystem ${ComputeCpp_INCLUDE_DIRS} 35 | -o ${ComputeCpp_STL_CHECK_SRC}.cpp.sycl 36 | -c ${ComputeCpp_STL_CHECK_SRC}.cpp 37 | WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} 38 | RESULT_VARIABLE ComputeCpp_STL_CHECK_RESULT 39 | ERROR_QUIET 40 | OUTPUT_QUIET) 41 | if(NOT ${ComputeCpp_STL_CHECK_RESULT} EQUAL 0) 42 | message(STATUS "Device compiler cannot consume hosted STL headers. Using any parts of the STL will likely result in device compiler errors.") 43 | else() 44 | message(STATUS "Device compiler does not meet certain STL version requirements. Disabling version checks and hoping for the best.") 45 | list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH) 46 | endif() 47 | endif() 48 | file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/${ComputeCpp_STL_CHECK_SRC}.cpp 49 | ${CMAKE_CURRENT_BINARY_DIR}/${ComputeCpp_STL_CHECK_SRC}.cpp.sycl) 50 | endif(MSVC) 51 | -------------------------------------------------------------------------------- /cmake/Modules/ComputeCppIRMap.cmake: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.4.3) 2 | 3 | # These should match the types of IR output by compute++ 4 | set(IR_MAP_spir bc) 5 | set(IR_MAP_spir64 bc) 6 | set(IR_MAP_spir32 bc) 7 | set(IR_MAP_spirv spv) 8 | set(IR_MAP_spirv64 spv) 9 | set(IR_MAP_spirv32 spv) 10 | set(IR_MAP_aorta-x86_64 o) 11 | set(IR_MAP_aorta-aarch64 o) 12 | set(IR_MAP_aorta-rcar-cve o) 13 | set(IR_MAP_custom-spir64 bc) 14 | set(IR_MAP_custom-spir32 bc) 15 | set(IR_MAP_custom-spirv64 spv) 16 | set(IR_MAP_custom-spirv32 spv) 17 | set(IR_MAP_ptx64 s) 18 | set(IR_MAP_amdgcn s) 19 | -------------------------------------------------------------------------------- /cmake/Modules/FindEigen.cmake: -------------------------------------------------------------------------------- 1 | # Copyright (C) Codeplay Software Limited. 2 | 3 | # Try to find the Eigen library and its Tensor module. 4 | # 5 | # If the library is found then the `eigen::eigen` target will be exported with 6 | # the required include directories. 7 | # 8 | # Sets the following variables: 9 | # eigen_FOUND - whether the system has Eigen 10 | # eigen_INCLUDE_DIRS - the Eigen include directory 11 | 12 | find_path(EIGEN_INCLUDE_DIR 13 | NAMES unsupported/Eigen/CXX11/Tensor 14 | PATH_SUFFIXES eigen3 Eigen3 15 | DOC "The Eigen SYCL Tensor module" 16 | ) 17 | include(FindPackageHandleStandardArgs) 18 | find_package_handle_standard_args(Eigen 19 | FOUND_VAR Eigen_FOUND 20 | REQUIRED_VARS EIGEN_INCLUDE_DIR 21 | ) 22 | mark_as_advanced(Eigen_FOUND EIGEN_INCLUDE_DIRS) 23 | if(Eigen_FOUND) 24 | set(EIGEN_INCLUDE_DIRS ${EIGEN_INCLUDE_DIR}) 25 | endif() 26 | 27 | if(Eigen_FOUND AND NOT TARGET Eigen) 28 | add_library(Eigen INTERFACE) 29 | set_target_properties(Eigen PROPERTIES 30 | INTERFACE_INCLUDE_DIRECTORIES "${EIGEN_INCLUDE_DIR}" 31 | ) 32 | endif() 33 | if(Eigen_FOUND) 34 | set(eigen_definitions EIGEN_EXCEPTIONS=1 35 | EIGEN_USE_SYCL=1 36 | EIGEN_SYCL_USE_DEFAULT_SELECTOR=1) 37 | find_package(Threads) 38 | if(Threads_FOUND) 39 | list(APPEND eigen_definitions EIGEN_SYCL_ASYNC_EXECUTION=1) 40 | set_property(TARGET Eigen 41 | APPEND PROPERTY INTERFACE_LINK_LIBRARIES Threads::Threads 42 | ) 43 | endif() 44 | if(SYCLML_EIGEN_NO_BARRIER) 45 | list(APPEND eigen_definitions EIGEN_SYCL_DISABLE_ARM_GPU_CACHE_OPTIMISATION=1 46 | EIGEN_SYCL_NO_LOCAL_MEM=1) 47 | else() 48 | if(SYCLML_EIGEN_LOCAL_MEM) 49 | list(APPEND eigen_definitions EIGEN_SYCL_LOCAL_MEM=1) 50 | endif() 51 | if(SYCLML_EIGEN_NO_LOCAL_MEM) 52 | list(APPEND eigen_definitions EIGEN_SYCL_NO_LOCAL_MEM=1) 53 | endif() 54 | endif() 55 | set_target_properties(Eigen PROPERTIES 56 | INTERFACE_COMPILE_DEFINITIONS "${eigen_definitions}" 57 | ) 58 | if(SYCLML_EIGEN_COMPRESS_NAMES) 59 | set_target_properties(Eigen PROPERTIES 60 | INTERFACE_COMPUTECPP_FLAGS "-sycl-compress-name" 61 | ) 62 | endif() 63 | endif() 64 | -------------------------------------------------------------------------------- /example/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (C) Codeplay Software Limited. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | include_directories("src") 16 | 17 | # Download and extract MNIST 18 | set(MNIST_FILES "train-images-idx3-ubyte" 19 | "train-labels-idx1-ubyte" 20 | "t10k-images-idx3-ubyte" 21 | "t10k-labels-idx1-ubyte") 22 | set(MNIST_RELATIVE_PATH "data/mnist/") 23 | set(MNIST_FULL_PATH ${CMAKE_CURRENT_BINARY_DIR}/${MNIST_RELATIVE_PATH}) 24 | file(MAKE_DIRECTORY ${MNIST_FULL_PATH}) 25 | foreach(FILENAME ${MNIST_FILES}) 26 | if(NOT EXISTS ${MNIST_FULL_PATH}/${FILENAME}) 27 | set(FILE "${FILENAME}.gz") 28 | set(DL_LINK "http://yann.lecun.com/exdb/mnist/${FILE}") 29 | file(DOWNLOAD ${DL_LINK} ${MNIST_FULL_PATH}/${FILE} SHOW_PROGRESS) 30 | if(UNIX) 31 | execute_process(COMMAND gunzip ${FILE} 32 | WORKING_DIRECTORY ${MNIST_FULL_PATH} 33 | RESULT_VARIABLE PROC_RET) 34 | if(${PROC_RET}) 35 | message(FATAL_ERROR "Could not extract ${FILE}") 36 | endif() 37 | else() 38 | message(WARNING "Please, extract the file ${MNIST_FULL_PATH}/${FILE}") 39 | endif() 40 | endif() 41 | endforeach() 42 | 43 | # Build each example 44 | file(GLOB_RECURSE SOURCES "src/*.cpp") 45 | foreach(SOURCE ${SOURCES}) 46 | add_sycl_ml_executable(${SOURCE}) 47 | endforeach() 48 | -------------------------------------------------------------------------------- /example/src/mnist/read_mnist.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef EXAMPLE_SRC_MNIST_READ_MNIST_HPP 17 | #define EXAMPLE_SRC_MNIST_READ_MNIST_HPP 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | /* 28 | * Load the MNIST data set: http://yann.lecun.com/exdb/mnist/ 29 | * The functions read *ubyte files meaning the .gz files have to be decompressed 30 | * (handled by CMake) 31 | */ 32 | 33 | // Convert from little to big endian. 34 | static uint32_t reverse_int(uint32_t i) { 35 | unsigned char c1 = i & 255; 36 | unsigned char c2 = (i >> 8) & 255; 37 | unsigned char c3 = (i >> 16) & 255; 38 | unsigned char c4 = (i >> 24) & 255; 39 | 40 | return ((uint32_t) c1 << 24) + ((uint32_t) c2 << 16) + ((uint32_t) c3 << 8) + 41 | c4; 42 | } 43 | 44 | static void read_int(std::ifstream& file, uint32_t& i) { 45 | file.read(reinterpret_cast(&i), sizeof(i)); 46 | i = reverse_int(i); 47 | } 48 | 49 | // Return the closest power of 2 higher or equal to x 50 | template 51 | static inline T to_pow2(T x) { 52 | return std::pow(2, std::ceil(std::log2(x))); 53 | } 54 | 55 | template 56 | struct static_cast_func { 57 | template 58 | T operator()(const U& x) const { 59 | return static_cast(x); 60 | } 61 | }; 62 | 63 | static std::ifstream open_mnist_file(const std::string& full_path) { 64 | std::ifstream file(full_path, std::ios::in | std::ios::binary); 65 | if (!file.is_open()) { 66 | // The gz format does not specify the output filename. 67 | // If the file couldn't open with the suffix "-ubyte", try with ".ubyte" 68 | std::string other_full_path = full_path; 69 | other_full_path[other_full_path.size() - 6] = '.'; 70 | file = std::ifstream(other_full_path, std::ios::in | std::ios::binary); 71 | if (!file.is_open()) { 72 | std::cerr << "Cannot open file `" << full_path << "` nor `" 73 | << other_full_path << "`" << std::endl; 74 | } 75 | } 76 | return file; 77 | } 78 | 79 | // Read mnist, cast uchar to type T and transpose it (so that an image is a 80 | // column) 81 | template 82 | std::vector read_mnist_images(const std::string& full_path, 83 | unsigned& image_size, 84 | unsigned& padded_image_size, 85 | unsigned& nb_images, bool transpose, 86 | bool round_pow2, T norm_factor = 1) { 87 | std::ifstream file = open_mnist_file(full_path); 88 | if (!file.is_open()) { 89 | std::cerr << "Could not open file: " << full_path << std::endl; 90 | return {}; 91 | } 92 | 93 | uint32_t magic_number = 0; 94 | read_int(file, magic_number); 95 | if (magic_number != 2051) { 96 | std::cerr << "Invalid MNIST file: " << full_path << std::endl; 97 | return {}; 98 | } 99 | 100 | uint32_t read_nb_images = 0, read_nb_rows = 0, read_nb_cols = 0; 101 | read_int(file, read_nb_images); 102 | read_int(file, read_nb_rows); 103 | read_int(file, read_nb_cols); 104 | 105 | uint32_t out_read_nb_rows = read_nb_rows; 106 | uint32_t out_read_nb_cols = read_nb_cols; 107 | if (round_pow2) { 108 | out_read_nb_rows = to_pow2(read_nb_rows); 109 | out_read_nb_cols = to_pow2(read_nb_cols); 110 | } 111 | 112 | nb_images = read_nb_images; 113 | image_size = read_nb_rows * read_nb_cols; 114 | unsigned buffer_total_size = nb_images * image_size; 115 | std::vector buffer(buffer_total_size); 116 | 117 | padded_image_size = out_read_nb_rows * out_read_nb_cols; 118 | unsigned dataset_total_size = nb_images * padded_image_size; 119 | std::vector dataset(dataset_total_size); 120 | 121 | file.read(reinterpret_cast(buffer.data()), buffer_total_size); 122 | 123 | if (transpose) { 124 | for (unsigned c = 0; c < nb_images; ++c) { 125 | for (unsigned r = 0; r < image_size; ++r) { 126 | // Cast, normalize and transpose 127 | dataset[r * nb_images + c] = 128 | static_cast(buffer[c * image_size + r]) / norm_factor; 129 | } 130 | } 131 | if (round_pow2) { // Set all zeros in the end 132 | std::memset(&dataset[image_size * nb_images], 0, 133 | (padded_image_size - image_size) * nb_images * sizeof(T)); 134 | } 135 | } else { 136 | for (unsigned r = 0; r < nb_images; ++r) { 137 | for (unsigned c = 0; c < image_size; ++c) { 138 | // Cast and normalize 139 | dataset[r * padded_image_size + c] = 140 | static_cast(buffer[r * image_size + c]) / norm_factor; 141 | } 142 | std::memset(&dataset[r * padded_image_size + image_size], 0, 143 | (padded_image_size - image_size) * sizeof(T)); 144 | } 145 | } 146 | 147 | return dataset; 148 | } 149 | 150 | template 151 | std::vector read_mnist_labels(const std::string& full_path, 152 | unsigned& nb_labels) { 153 | std::ifstream file = open_mnist_file(full_path); 154 | if (!file.is_open()) { 155 | std::cerr << "Could not open file: " << full_path << std::endl; 156 | return {}; 157 | } 158 | 159 | uint32_t magic_number = 0; 160 | read_int(file, magic_number); 161 | if (magic_number != 2049) { 162 | std::cerr << "Invalid MNIST file: " << full_path << std::endl; 163 | return {}; 164 | } 165 | 166 | uint32_t read_nb_labels = 0; 167 | read_int(file, read_nb_labels); 168 | nb_labels = read_nb_labels; 169 | 170 | std::vector buffer(nb_labels); 171 | std::vector labels(nb_labels); 172 | 173 | file.read(reinterpret_cast(buffer.data()), nb_labels); 174 | std::transform(buffer.begin(), buffer.end(), labels.begin(), 175 | static_cast_func()); 176 | 177 | return labels; 178 | } 179 | 180 | inline std::string mnist_get_train_images_path(const std::string& prefix) { 181 | return prefix + "/train-images-idx3-ubyte"; 182 | } 183 | 184 | inline std::string mnist_get_train_labels_path(const std::string& prefix) { 185 | return prefix + "/train-labels-idx1-ubyte"; 186 | } 187 | 188 | inline std::string mnist_get_test_images_path(const std::string& prefix) { 189 | return prefix + "/t10k-images-idx3-ubyte"; 190 | } 191 | 192 | inline std::string mnist_get_test_labels_path(const std::string& prefix) { 193 | return prefix + "/t10k-labels-idx1-ubyte"; 194 | } 195 | 196 | #endif // EXAMPLE_SRC_MNIST_READ_MNIST_HPP 197 | -------------------------------------------------------------------------------- /example/src/mnist/run_classifier.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef EXAMPLE_SRC_MNIST_RUN_CLASSIFIER_HPP 17 | #define EXAMPLE_SRC_MNIST_RUN_CLASSIFIER_HPP 18 | 19 | #include 20 | 21 | #include "ml/preprocess/apply_pca.hpp" 22 | 23 | #include "read_mnist.hpp" 24 | #include "utils/scoped_timer.hpp" 25 | #include "utils/sycl_utils.hpp" 26 | 27 | /** 28 | * @brief Train and test any given classifier. 29 | * 30 | * @tparam ClassifierT 31 | * @param mnist_path 32 | * @param pca_args arguments given to the PCA 33 | * @param classifier 34 | */ 35 | template 36 | void run_classifier( 37 | const std::string& mnist_path, 38 | const ml::pca_args& pca_args, 39 | ClassifierT classifier = ClassifierT()) { 40 | // The TIME macro creates an object that will print the time elapsed between 41 | // its construction and destruction 42 | TIME(run_classifier); 43 | 44 | using DataType = typename ClassifierT::DataType; 45 | using LabelType = typename ClassifierT::LabelType; 46 | 47 | // MNIST specific 48 | std::vector label_set(10); 49 | // Create the set of labels here instead of computing it during the training 50 | std::iota(label_set.begin(), label_set.end(), 0); 51 | // Data will be shifted in the range [0, 1] 52 | const DataType normalize_factor = 255; 53 | 54 | // Load and save options 55 | const bool load_classifier = false; 56 | const bool save_classifier = false; 57 | 58 | // What the classifier will compute 59 | std::vector host_predicted_test_labels; 60 | 61 | { // Scope with a SYCL queue 62 | cl::sycl::queue& q = create_queue(); 63 | 64 | ml::apply_pca apply_pca; 65 | 66 | // Load the train data, perform PCA and train the classifier 67 | { 68 | unsigned obs_size, padded_obs_size, nb_train_obs; 69 | // Load train data 70 | ml::matrix_t sycl_train_data; 71 | { 72 | auto host_train_data = read_mnist_images( 73 | mnist_get_train_images_path(mnist_path), obs_size, padded_obs_size, 74 | nb_train_obs, false, true, normalize_factor); 75 | if (host_train_data.empty()) { 76 | return; 77 | } 78 | ml::matrix_t sycl_train_data_raw( 79 | host_train_data.data(), 80 | cl::sycl::range<2>(nb_train_obs, padded_obs_size)); 81 | // Specify the real size of an observation 82 | sycl_train_data_raw.data_range[1] = obs_size; 83 | sycl_train_data_raw.set_final_data(nullptr); 84 | 85 | sycl_train_data = 86 | apply_pca.compute_and_apply(q, sycl_train_data_raw, pca_args); 87 | } 88 | 89 | // Load labels 90 | auto host_train_labels = read_mnist_labels( 91 | mnist_get_train_labels_path(mnist_path), nb_train_obs); 92 | if (host_train_labels.empty()) { 93 | return; 94 | } 95 | 96 | if (load_classifier) { 97 | classifier.load_from_disk(q); 98 | } else { 99 | { // Create a scope to time only the training 100 | TIME(train_classifier); 101 | // Give the sets of labels to avoid computing it during the training 102 | classifier.set_label_set(label_set); 103 | classifier.train(q, sycl_train_data, host_train_labels); 104 | // Wait to measure the correct training time 105 | q.wait_and_throw(); 106 | } 107 | if (save_classifier) { 108 | classifier.save_to_disk(q); 109 | } 110 | } 111 | } // End of train 112 | 113 | // Load the test data, apply the PCA using the eigenvectors from the 114 | // training and test the classifier 115 | { 116 | unsigned obs_size, padded_obs_size, nb_test_obs; 117 | ml::matrix_t sycl_test_data; 118 | { // Load test data 119 | auto host_test_data = read_mnist_images( 120 | mnist_get_test_images_path(mnist_path), obs_size, padded_obs_size, 121 | nb_test_obs, false, true, normalize_factor); 122 | if (host_test_data.empty()) { 123 | return; 124 | } 125 | ml::matrix_t sycl_test_data_raw( 126 | host_test_data.data(), 127 | cl::sycl::range<2>(nb_test_obs, padded_obs_size)); 128 | // Specify the real size of an observation 129 | sycl_test_data_raw.data_range[1] = obs_size; 130 | sycl_test_data_raw.set_final_data(nullptr); 131 | 132 | sycl_test_data = apply_pca.apply(q, sycl_test_data_raw); 133 | } 134 | 135 | // Inference 136 | TIME(predict_classifier); 137 | auto sycl_predicted_test_labels = classifier.predict(q, sycl_test_data); 138 | // Can be rounded up to a power of 2 139 | auto nb_labels_predicted = sycl_predicted_test_labels.get_kernel_size(); 140 | host_predicted_test_labels.resize(nb_labels_predicted); 141 | sycl_predicted_test_labels.set_final_data( 142 | host_predicted_test_labels.data()); 143 | // Wait to measure the correct prediction time 144 | q.wait_and_throw(); 145 | } // End of tests 146 | 147 | clear_eigen_device(); 148 | } // SYCL queue is destroyed 149 | 150 | // Compare predicted labels and expected labels 151 | unsigned nb_test_obs; 152 | auto host_expected_test_labels = read_mnist_labels( 153 | mnist_get_test_labels_path(mnist_path), nb_test_obs); 154 | if (host_expected_test_labels.empty()) { 155 | return; 156 | } 157 | classifier.print_score(host_predicted_test_labels.data(), 158 | host_expected_test_labels.data(), nb_test_obs); 159 | } 160 | 161 | #endif // EXAMPLE_SRC_MNIST_RUN_CLASSIFIER_HPP 162 | -------------------------------------------------------------------------------- /example/src/mnist/run_gauss_classifier.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include "ml/classifiers/bayes/bayes_classifier.hpp" 17 | #include "ml/classifiers/bayes/distributions/log_gaussian_distribution.hpp" 18 | #include "run_classifier.hpp" 19 | 20 | int main(int argc, char** argv) { 21 | std::string mnist_path = "data/mnist"; 22 | if (argc >= 2) { 23 | mnist_path = argv[1]; 24 | } 25 | 26 | // Runs the gaussian classifier on MNIST with a PCA 27 | using data_t = float; 28 | using distribution_t = ml::buffered_log_gaussian_distribution; 29 | ml::pca_args pca_args; 30 | pca_args.min_nb_vecs = 64; // Keep at least 64 basis vector 31 | pca_args.keep_percent = 0.8; // Keep at least 80% of information 32 | pca_args.scale_factor = 1E2; // More accurate but slower PCA 33 | try { 34 | run_classifier>(mnist_path, 35 | pca_args); 36 | } catch (cl::sycl::exception e) { 37 | std::cerr << e.what(); 38 | } 39 | 40 | return 0; 41 | } 42 | -------------------------------------------------------------------------------- /example/src/mnist/run_gmm.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include "ml/classifiers/bayes/distributions/log_gaussian_distribution.hpp" 17 | #include "ml/classifiers/em/em_classifier.hpp" 18 | #include "ml/classifiers/em/log_model_per_label.hpp" 19 | #include "run_classifier.hpp" 20 | 21 | int main(int argc, char** argv) { 22 | std::string mnist_path = "data/mnist"; 23 | if (argc >= 2) { 24 | mnist_path = argv[1]; 25 | } 26 | 27 | // Runs the GMM with 8 models per label on MNIST with a PCA 28 | using data_t = float; 29 | using label_t = uint8_t; 30 | using distribution_t = ml::buffered_log_gaussian_distribution; 31 | static constexpr unsigned M = 8; 32 | ml::pca_args pca_args; 33 | pca_args.min_nb_vecs = 64; // Keep at least 64 basis vector 34 | pca_args.keep_percent = 0.8; // Keep at least 80% of information 35 | pca_args.scale_factor = 1E2; // More accurate but slower PCA 36 | try { 37 | run_classifier< 38 | ml::em_classifier>>( 39 | mnist_path, pca_args); 40 | } catch (cl::sycl::exception e) { 41 | std::cerr << e.what(); 42 | } 43 | 44 | return 0; 45 | } 46 | -------------------------------------------------------------------------------- /example/src/mnist/run_lin_classifier.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include "ml/classifiers/bayes/linear_classifier.hpp" 17 | #include "run_classifier.hpp" 18 | 19 | int main(int argc, char** argv) { 20 | std::string mnist_path = "data/mnist"; 21 | if (argc >= 2) { 22 | mnist_path = argv[1]; 23 | } 24 | // Runs the linear classifier on MNIST with a PCA 25 | using data_t = float; 26 | ml::pca_args pca_args; 27 | pca_args.min_nb_vecs = 128; // Keep at least 128 basis vector 28 | pca_args.keep_percent = 0.8; // Keep at least 80% of information 29 | pca_args.scale_factor = 1E2; // More accurate but slower PCA 30 | try { 31 | run_classifier>(mnist_path, 32 | pca_args); 33 | } catch (cl::sycl::exception e) { 34 | std::cerr << e.what(); 35 | } 36 | 37 | return 0; 38 | } 39 | -------------------------------------------------------------------------------- /example/src/mnist/run_svm.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include "ml/classifiers/svm/svm.hpp" 17 | #include "run_classifier.hpp" 18 | 19 | int main(int argc, char** argv) { 20 | std::string mnist_path = "data/mnist"; 21 | if (argc >= 2) { 22 | mnist_path = argv[1]; 23 | } 24 | 25 | // Runs the SVM with the RBF kernel on MNIST with a PCA. 26 | // The SVM will store 2 rows of the kernel matrix and has a tolerance of 0.1 27 | using data_t = float; 28 | using label_t = uint8_t; 29 | using svm_kernel_t = ml::svm_rbf_kernel; 30 | 31 | const data_t C = 5; // Parameter of a C-SVM 32 | const svm_kernel_t ker(0.05); // Parameter of the RBF kernel 33 | 34 | ml::pca_args pca_args; 35 | pca_args.min_nb_vecs = 64; // Keep at least 64 basis vector 36 | pca_args.keep_percent = 0.8; // Keep at least 80% of information 37 | pca_args.scale_factor = 1E2; // More accurate but slower PCA 38 | 39 | try { 40 | run_classifier(mnist_path, pca_args, 41 | ml::svm(C, ker, 2, 0.1, 0.1)); 42 | } catch (cl::sycl::exception e) { 43 | std::cerr << e.what(); 44 | } 45 | 46 | return 0; 47 | } 48 | -------------------------------------------------------------------------------- /example/src/utils/scoped_timer.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef EXAMPLE_SRC_UTILS_SCOPED_TIMER_HPP 17 | #define EXAMPLE_SRC_UTILS_SCOPED_TIMER_HPP 18 | 19 | #include 20 | #include 21 | #include 22 | 23 | /** 24 | * @brief scoped_timer measures and print the time between the creation and 25 | * destruction of the object. Also print with an indentation when several 26 | * scoped_timer are used. 27 | */ 28 | class scoped_timer { 29 | using sc = std::chrono::high_resolution_clock; 30 | 31 | public: 32 | scoped_timer(const std::string& name) : _name(name) { 33 | std::cout << std::string(indent, ' ') << "Starting " << _name << std::endl; 34 | indent += 2; 35 | _t0 = sc::now(); 36 | } 37 | 38 | ~scoped_timer() { 39 | std::chrono::duration diff = sc::now() - _t0; 40 | indent -= 2; 41 | std::cout << std::string(indent, ' ') << _name << ": " << diff.count() 42 | << "s" << std::endl; 43 | } 44 | 45 | private: 46 | static unsigned indent; 47 | std::string _name; 48 | sc::time_point _t0; 49 | }; 50 | 51 | unsigned scoped_timer::indent = 0; 52 | 53 | /** 54 | * @brief Create a timer and mark the variable unused. 55 | */ 56 | #define TIME(name) \ 57 | scoped_timer _timer_##name(#name); \ 58 | (void) _timer_##name 59 | 60 | #endif // EXAMPLE_SRC_UTILS_SCOPED_TIMER_HPP 61 | -------------------------------------------------------------------------------- /example/src/utils/sycl_utils.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef EXAMPLE_SRC_UTILS_SYCL_UTILS_HPP 17 | #define EXAMPLE_SRC_UTILS_SYCL_UTILS_HPP 18 | 19 | #include "ml/utils/common.hpp" 20 | 21 | class init_first_kernel; 22 | 23 | /** 24 | * @brief Used to avoid measuring OpenCL initialization overhead 25 | * @param q 26 | */ 27 | void launch_first_kernel(cl::sycl::queue& q) { 28 | q.submit([](cl::sycl::handler& cgh) { 29 | cgh.single_task([]() {}); 30 | }); 31 | } 32 | 33 | /** 34 | * @brief Initialize device_constants and return the queue. 35 | * @return the sycl queue 36 | */ 37 | cl::sycl::queue& create_queue() { 38 | ml::device_constants<>::instance = new ml::device_constants<>(); 39 | auto& q = ml::get_eigen_device().sycl_queue(); 40 | launch_first_kernel(q); 41 | return q; 42 | } 43 | 44 | /** 45 | * @brief Free the singleton device_constants. 46 | */ 47 | void clear_eigen_device() { 48 | delete ml::get_device_constants(); 49 | } 50 | 51 | #endif // EXAMPLE_SRC_UTILS_SYCL_UTILS_HPP 52 | -------------------------------------------------------------------------------- /include/ml/classifiers/bayes/bayes_classifier.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef INCLUDE_ML_CLASSIFIERS_BAYES_BAYES_CLASSIFIER_HPP 17 | #define INCLUDE_ML_CLASSIFIERS_BAYES_BAYES_CLASSIFIER_HPP 18 | 19 | #include "ml/classifiers/data_splitter_extremum_dist.hpp" 20 | 21 | namespace ml { 22 | 23 | /** 24 | * @brief Naive Bayes Classifier 25 | * 26 | * Compute the parameters of a distribution during the training. 27 | * Use the parameters during the inference. 28 | * 29 | * @tparam DistributionT 30 | * @tparam LabelT 31 | */ 32 | template 33 | class bayes_classifier 34 | : public data_splitter_extremum_dist { 36 | public: 37 | using DataType = typename DistributionT::DataType; 38 | 39 | protected: 40 | std::vector _distributions; 41 | 42 | virtual void train_setup_for_each_label(queue& q) override { 43 | data_splitter_extremum_dist::train_setup_for_each_label(q); 45 | 46 | range<1> data_dim_rng(this->_data_dim); 47 | auto data_dim_pow2_rng = get_optimal_nd_range(this->_data_dim_pow2); 48 | range<2> data_dim_rng_d2(this->_data_dim, this->_data_dim); 49 | auto data_dim_pow2_rng_d2 = 50 | get_optimal_nd_range(this->_data_dim_pow2, this->_data_dim_pow2); 51 | 52 | auto nb_labels = this->get_nb_labels(); 53 | for (unsigned l = 0; l < nb_labels; ++l) { 54 | _distributions.emplace_back(); 55 | _distributions.back().init(data_dim_rng, data_dim_pow2_rng, 56 | data_dim_rng_d2, data_dim_pow2_rng_d2); 57 | } 58 | } 59 | 60 | virtual void train_for_each_label(queue& q, unsigned label_idx, 61 | matrix_t& act_data) override { 62 | _distributions[label_idx].compute(q, act_data); 63 | } 64 | 65 | virtual void compute_dist(queue& q, matrix_t& dataset, 66 | matrix_t& dist) override { 67 | auto nb_labels = this->get_nb_labels(); 68 | for (SYCLIndexT l = 0; l < nb_labels; ++l) { 69 | auto dist_row = dist.get_row(l); 70 | _distributions[l].compute_dist(q, dataset, dist_row); 71 | } 72 | } 73 | }; 74 | 75 | } // namespace ml 76 | 77 | #endif // INCLUDE_ML_CLASSIFIERS_BAYES_BAYES_CLASSIFIER_HPP 78 | -------------------------------------------------------------------------------- /include/ml/classifiers/bayes/linear_classifier.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef INCLUDE_ML_CLASSIFIERS_BAYES_LINEAR_CLASSIFIER_HPP 17 | #define INCLUDE_ML_CLASSIFIERS_BAYES_LINEAR_CLASSIFIER_HPP 18 | 19 | #include "ml/classifiers/data_splitter_extremum_dist.hpp" 20 | 21 | namespace ml { 22 | 23 | /** 24 | * @brief Naive Bayes Classifier with a linear function. 25 | * 26 | * During the training, compute the average for each label. 27 | * The distance is the Euclidean distance between the learned average and the 28 | * given sample. The index of the smallest distance then gives the predicted 29 | * label. 30 | * 31 | * The linear_classifier could be written using the bayes_classifier but is 32 | * simpler and faster this way. 33 | * 34 | * @tparam DataT 35 | * @tparam LabelT 36 | */ 37 | template 38 | class linear_classifier : public data_splitter_extremum_dist { 39 | protected: 40 | vector_t _act_data_avg; 41 | matrix_t _data_avg_per_label; 42 | 43 | virtual void train_setup_for_each_label(queue& q) override { 44 | data_splitter_extremum_dist::train_setup_for_each_label(q); 45 | 46 | auto nb_labels = this->get_nb_labels(); 47 | _act_data_avg = vector_t(range<1>(this->_data_dim), 48 | get_optimal_nd_range(this->_data_dim_pow2)); 49 | _data_avg_per_label = 50 | matrix_t(range<2>(nb_labels, this->_data_dim), 51 | get_optimal_nd_range(nb_labels, this->_data_dim_pow2)); 52 | } 53 | 54 | virtual void train_for_each_label(queue& q, unsigned label_idx, 55 | matrix_t& act_data) override { 56 | avg(q, act_data, _act_data_avg); 57 | copy_vec_to_mat( 58 | q, _data_avg_per_label, _act_data_avg, _act_data_avg.kernel_range, 59 | static_cast(label_idx)); 60 | } 61 | 62 | virtual void compute_dist(queue&, matrix_t& dataset, 63 | matrix_t& dist) override { 64 | // Sum squared each pixel 65 | eig_index_t nb_labels = static_cast(access_data_dim(dist, 0)); 66 | eig_index_t nb_obs = static_cast(access_data_dim(dataset, 0)); 67 | eig_index_t data_dim_pow2 = static_cast(this->_data_dim_pow2); 68 | 69 | auto eig_dataset = sycl_to_eigen(dataset); 70 | auto eig_data_avg_per_label = sycl_to_eigen(_data_avg_per_label); 71 | 72 | auto eig_dist = sycl_to_eigen(dist); 73 | auto dataset_3d = eig_dataset.tensor() 74 | .reshape(eig_dims_t<3>{nb_obs, 1, data_dim_pow2}) 75 | .broadcast(eig_dims_t<3>{1, nb_labels, 1}); 76 | auto data_avg_per_label_3d = 77 | eig_data_avg_per_label.tensor() 78 | .reshape(eig_dims_t<3>{1, nb_labels, data_dim_pow2}) 79 | .broadcast(eig_dims_t<3>{nb_obs, 1, 1}); 80 | auto sliced_dist = eig_dist.tensor().slice( 81 | eig_dsize_t<2>{0, 0}, eig_dsize_t<2>(nb_labels, nb_obs)); 82 | sliced_dist.device(get_eigen_device()) = 83 | (dataset_3d - data_avg_per_label_3d) 84 | .square() 85 | .sum(eig_dims_t<1>{2}) 86 | .shuffle(eig_dims_t<2>{1, 0}); 87 | } 88 | }; 89 | 90 | } // namespace ml 91 | 92 | #endif // INCLUDE_ML_CLASSIFIERS_BAYES_LINEAR_CLASSIFIER_HPP 93 | -------------------------------------------------------------------------------- /include/ml/classifiers/classifier.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef INCLUDE_ML_CLASSIFIERS_CLASSIFIER_HPP 17 | #define INCLUDE_ML_CLASSIFIERS_CLASSIFIER_HPP 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #include "ml/utils/common.hpp" 25 | 26 | namespace ml { 27 | 28 | /** 29 | * @brief Abstract class for all classifiers. 30 | * 31 | * @tparam DataT type of the dataset 32 | * @tparam LabelT type of the labels 33 | */ 34 | template 35 | class classifier { 36 | public: 37 | using DataType = DataT; 38 | using LabelType = LabelT; 39 | 40 | /** 41 | * @brief Train the classifier. 42 | * 43 | * @param q 44 | * @param dataset 45 | * @param labels 46 | * @param nb_labels number of different labels, must be set if set_label_set 47 | * has not been called 48 | */ 49 | virtual void train(queue& q, matrix_t& dataset, 50 | std::vector& labels, unsigned nb_labels = 0) = 0; 51 | 52 | /** 53 | * @brief Predict labels with the given observations. 54 | * 55 | * @param q 56 | * @param dataset 57 | * @return the predicted labels 58 | */ 59 | virtual vector_t predict(queue& q, matrix_t& dataset) = 0; 60 | 61 | /** 62 | * @brief Print statistics about the predicted labels. 63 | * 64 | * Compute and print the confusion matrix as well as the success rate, 65 | * precision, recall and F1-score. 66 | * 67 | * @param[in] predicted 68 | * @param[in] expected 69 | * @param nb_obs 70 | */ 71 | static void print_score( 72 | const LabelT* predicted, const LabelT* expected, unsigned nb_obs, 73 | unsigned nb_labels, 74 | const std::unordered_map& label_user_to_label_idx) { 75 | std::vector cm(nb_labels * nb_labels, 0); 76 | for (unsigned i = 0; i < nb_obs; ++i) { 77 | cm[label_user_to_label_idx.at(expected[i]) * nb_labels + 78 | label_user_to_label_idx.at(predicted[i])] += 1; 79 | } 80 | 81 | double success_rate = 0; 82 | double precision = 0; 83 | double recall = 0; 84 | double sum_row; 85 | double sum_col; 86 | double diag_val; 87 | for (unsigned i = 0; i < nb_labels; ++i) { 88 | sum_row = 0; 89 | sum_col = 0; 90 | for (unsigned j = 0; j < nb_labels; ++j) { 91 | sum_row += cm[i * nb_labels + j]; 92 | sum_col += cm[j * nb_labels + i]; 93 | } 94 | 95 | diag_val = cm[i * nb_labels + i]; 96 | success_rate += diag_val; 97 | precision += diag_val / sum_row; 98 | recall += diag_val / sum_col; 99 | } 100 | 101 | success_rate /= nb_obs; 102 | precision /= nb_labels; 103 | recall /= nb_labels; 104 | 105 | double f1_score = 2 * (precision * recall) / (precision + recall); 106 | 107 | std::cout << "\nSuccess rate: " << success_rate * 100 << "%\n"; 108 | std::cout << "Precision: " << precision * 100 << "%\n"; 109 | std::cout << "Recall: " << recall * 100 << "%\n"; 110 | std::cout << "F1-score: " << f1_score << "\n\n"; 111 | 112 | std::cout << "Confusion matrix:\n"; 113 | char prev_fill = std::cout.fill(' '); 114 | for (unsigned i = 0; i < nb_labels; ++i) { 115 | for (unsigned j = 0; j < nb_labels; ++j) { 116 | if (j < nb_labels - 1) { 117 | std::cout << std::left << std::setw(5) << cm[i * nb_labels + j] 118 | << ' '; 119 | } else { 120 | std::cout << std::left << cm[i * nb_labels + j] << '\n'; 121 | } 122 | } 123 | } 124 | std::cout.fill(prev_fill); 125 | } 126 | 127 | inline void print_score(const LabelT* predicted, const LabelT* expected, 128 | unsigned nb_obs) { 129 | classifier::print_score( 130 | predicted, expected, nb_obs, get_nb_labels(), _label_user_to_label_idx); 131 | } 132 | 133 | virtual void load_from_disk(queue&) { assert(false); } 134 | virtual void save_to_disk(queue&) { assert(false); } 135 | 136 | inline unsigned get_nb_labels() const { 137 | return _host_label_idx_to_label_user.size(); 138 | } 139 | 140 | /** 141 | * @brief Give the set of labels instead of computing it during the training. 142 | * 143 | * Optional function called before the training. 144 | * 145 | * @tparam LabelSet any type with a begin and end method used for copy 146 | * @param[in] label_set 147 | */ 148 | template 149 | void set_label_set(const LabelSet& label_set) { 150 | std::copy(label_set.begin(), label_set.end(), 151 | std::back_inserter(_host_label_idx_to_label_user)); 152 | assert(_host_label_idx_to_label_user.size() > 0); 153 | setup_host_label_idx_to_label_user(); 154 | } 155 | 156 | /** 157 | * @brief Compute the list of indexes of each labels. 158 | * 159 | * @tparam HostLabelsT any type with a squared bracket accessor 160 | * @param host_labels 161 | * @param nb_labels number of different labels 162 | * @param nb_obs number of element in host_labels 163 | * @return labels_indices 164 | */ 165 | template 166 | std::vector> get_labels_indices( 167 | const HostLabelsT& host_labels, unsigned nb_labels, unsigned nb_obs) { 168 | std::vector> labels_indices(nb_labels); 169 | for (unsigned i = 0; i < nb_obs; ++i) { 170 | labels_indices[this->_label_user_to_label_idx[host_labels[i]]].push_back( 171 | i); 172 | } 173 | return labels_indices; 174 | } 175 | 176 | protected: 177 | std::vector _host_label_idx_to_label_user; 178 | vector_t _label_idx_to_label_user; 179 | std::unordered_map _label_user_to_label_idx; 180 | 181 | /** 182 | * @brief Fill _label_idx_to_label_user and _label_user_to_label_idx 183 | * 184 | * @tparam HostLabelT any type accessible with square brackets 185 | * @param[in] labels 186 | * @param nb_labels 187 | */ 188 | template 189 | void process_labels(const HostLabelT& host_labels, unsigned nb_labels) { 190 | // Labels have been set by the user beforehand 191 | if (!_label_user_to_label_idx.empty()) { 192 | return; 193 | } 194 | 195 | // Find all different labels 196 | _host_label_idx_to_label_user.reserve(nb_labels); 197 | for (unsigned i = 0; _host_label_idx_to_label_user.size() < nb_labels; 198 | ++i) { 199 | auto user_label = host_labels[i]; 200 | auto it = std::find(_host_label_idx_to_label_user.begin(), 201 | _host_label_idx_to_label_user.end(), user_label); 202 | if (it == _host_label_idx_to_label_user.end()) { 203 | _host_label_idx_to_label_user.push_back(user_label); 204 | } 205 | } 206 | std::sort(_host_label_idx_to_label_user.begin(), 207 | _host_label_idx_to_label_user.end()); 208 | setup_host_label_idx_to_label_user(); 209 | } 210 | 211 | /** 212 | * @brief Copy _host_label_idx_to_label_user to the device and to 213 | * _label_user_to_label_idx. 214 | */ 215 | void setup_host_label_idx_to_label_user() { 216 | auto nb_labels = _host_label_idx_to_label_user.size(); 217 | _label_idx_to_label_user = vector_t( 218 | const_cast(_host_label_idx_to_label_user.data()), 219 | range<1>(nb_labels)); 220 | 221 | // Map label user back to label idx 222 | for (unsigned i = 0; i < nb_labels; ++i) { 223 | _label_user_to_label_idx[_host_label_idx_to_label_user[i]] = i; 224 | } 225 | } 226 | 227 | /** 228 | * @brief Check that nb_labels was given or set_label_set has been called. 229 | * 230 | * @param[in, out] nb_labels 231 | * @return true if nb_labels was given or set_label_set has been called 232 | */ 233 | bool check_nb_labels(unsigned& nb_labels) { 234 | if (nb_labels == 0) { 235 | nb_labels = get_nb_labels(); 236 | if (nb_labels == 0) { 237 | std::cerr << "Error: set_label_set must be called before training if " 238 | "nb_labels is 0." 239 | << std::endl; 240 | return false; 241 | } 242 | } 243 | if (nb_labels == 1) { 244 | std::cerr << "Error: must have more than one label." << std::endl; 245 | return false; 246 | } 247 | return true; 248 | } 249 | }; 250 | 251 | } // namespace ml 252 | 253 | #endif // INCLUDE_ML_CLASSIFIERS_CLASSIFIER_HPP 254 | -------------------------------------------------------------------------------- /include/ml/classifiers/data_splitter.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef INCLUDE_ML_CLASSIFIERS_DATA_SPLITTER_HPP 17 | #define INCLUDE_ML_CLASSIFIERS_DATA_SPLITTER_HPP 18 | 19 | #include 20 | #include 21 | 22 | #include "ml/classifiers/classifier.hpp" 23 | #include "ml/math/mat_ops.hpp" 24 | 25 | namespace ml { 26 | 27 | /** 28 | * @brief Abstract class of all classifiers needing to split the data for each 29 | * label. 30 | * 31 | * @tparam DataT 32 | * @tparam LabelT 33 | */ 34 | template 35 | class data_splitter : public virtual classifier { 36 | template 37 | using NameGenDS = NameGen; 38 | 39 | public: 40 | /** 41 | * @brief Call train_for_each_label with a sub-dataset. 42 | * 43 | * Assumes labels are integers in [min(labels), max(labels)] 44 | * 45 | * @param q 46 | * @param dataset 47 | * @param labels 48 | * @param nb_labels number of different labels 49 | */ 50 | virtual void train(queue& q, matrix_t& dataset, 51 | std::vector& host_labels, 52 | unsigned nb_labels = 0) override { 53 | if (!this->check_nb_labels(nb_labels)) { 54 | return; 55 | } 56 | 57 | auto nb_obs = access_data_dim(dataset, 0); 58 | assert_eq(nb_obs, host_labels.size()); 59 | 60 | _data_dim = access_data_dim(dataset, 1); 61 | _data_dim_pow2 = access_ker_dim(dataset, 1); 62 | 63 | this->process_labels(host_labels, nb_labels); 64 | 65 | // Compute indices for each labels 66 | auto labels_indices = 67 | this->get_labels_indices(host_labels, nb_labels, nb_obs); 68 | 69 | // Train for each label 70 | train_setup_for_each_label(q); 71 | for (unsigned i = 0; i < nb_labels; ++i) { 72 | const auto& act_labels_indices = labels_indices[i]; 73 | auto act_data = split_by_index(q, dataset, act_labels_indices); 74 | train_for_each_label(q, i, act_data); 75 | } 76 | } 77 | 78 | protected: 79 | SYCLIndexT _data_dim; 80 | SYCLIndexT _data_dim_pow2; 81 | 82 | virtual void train_setup_for_each_label(queue&) {} 83 | virtual void train_for_each_label(queue& q, unsigned label_idx, 84 | matrix_t& act_data) = 0; 85 | }; 86 | 87 | } // namespace ml 88 | 89 | #endif // INCLUDE_ML_CLASSIFIERS_DATA_SPLITTER_HPP 90 | -------------------------------------------------------------------------------- /include/ml/classifiers/data_splitter_extremum_dist.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef INCLUDE_ML_CLASSIFIERS_DATA_SPLITTER_MIN_DIST_HPP 17 | #define INCLUDE_ML_CLASSIFIERS_DATA_SPLITTER_MIN_DIST_HPP 18 | 19 | #include "ml/classifiers/data_splitter.hpp" 20 | #include "ml/classifiers/extremum_dist.hpp" 21 | 22 | namespace ml { 23 | 24 | /** 25 | * @brief Abstract class regrouping the data_splitter and extremum_dist classes. 26 | * 27 | * @tparam DataT 28 | * @tparam LabelT 29 | * @tparam Compare minimize or maximize the computed distance 30 | */ 31 | template 32 | class data_splitter_extremum_dist 33 | : public data_splitter 34 | , public extremum_dist { 35 | protected: 36 | inline virtual void train_setup_for_each_label(queue&) override { 37 | this->_predict_data_dim_assert = this->_data_dim; 38 | } 39 | }; 40 | 41 | } // namespace ml 42 | 43 | #endif // INCLUDE_ML_CLASSIFIERS_DATA_SPLITTER_MIN_DIST_HPP 44 | -------------------------------------------------------------------------------- /include/ml/classifiers/em/em_classifier.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef INCLUDE_ML_CLASSIFIERS_EM_EM_CLASSIFIER_HPP 17 | #define INCLUDE_ML_CLASSIFIERS_EM_EM_CLASSIFIER_HPP 18 | 19 | #include 20 | 21 | #include "ml/classifiers/data_splitter_extremum_dist.hpp" 22 | 23 | namespace ml { 24 | 25 | /** 26 | * @brief Classifier using the EM algorithm. 27 | * 28 | * If used with log_model_per_label and log_gaussian_distribution this 29 | * implements a GMM. The GMM learn M models per label (instead of M=1 with a 30 | * GaussClassifier). 31 | * 32 | * @see log_model_per_label 33 | * @tparam LabelT 34 | * @tparam ModelPerLabelT type of the model to use 35 | */ 36 | template 37 | class em_classifier 38 | : public data_splitter_extremum_dist { 40 | public: 41 | using DataType = typename ModelPerLabelT::DataType; 42 | 43 | em_classifier(ModelPerLabelT model_impl = ModelPerLabelT()) 44 | : data_splitter_extremum_dist(), 46 | _model_impl(model_impl) {} 47 | 48 | virtual void load_from_disk(queue& q) override { 49 | for (unsigned i = 0; i < this->get_nb_labels(); ++i) { 50 | _ems[i].load_from_disk(q); 51 | } 52 | } 53 | 54 | virtual void save_to_disk(queue& q) override { 55 | for (unsigned i = 0; i < this->get_nb_labels(); ++i) { 56 | _ems[i].save_to_disk(q); 57 | } 58 | } 59 | 60 | protected: 61 | std::vector _ems; 62 | 63 | virtual void train_setup_for_each_label(queue& q) override { 64 | data_splitter_extremum_dist::train_setup_for_each_label(q); 66 | 67 | for (unsigned i = 0; i < this->get_nb_labels(); ++i) { 68 | _ems.push_back(_model_impl); // Copy model parameters 69 | _ems.back().set_idx(i); 70 | } 71 | } 72 | 73 | virtual inline void train_for_each_label( 74 | queue& q, unsigned label_idx, matrix_t& act_data) override { 75 | _ems[label_idx].train(q, act_data); 76 | } 77 | 78 | virtual void compute_dist(queue& q, matrix_t& dataset, 79 | matrix_t& dist) override { 80 | for (unsigned label_idx = 0; label_idx < this->get_nb_labels(); 81 | ++label_idx) { 82 | auto dist_row = dist.get_row(label_idx); 83 | _ems[label_idx].compute_llk(q, dataset, dist_row); 84 | } 85 | } 86 | 87 | private: 88 | ModelPerLabelT _model_impl; 89 | }; 90 | 91 | } // namespace ml 92 | 93 | #endif // INCLUDE_ML_CLASSIFIERS_EM_EM_CLASSIFIER_HPP 94 | -------------------------------------------------------------------------------- /include/ml/classifiers/extremum_dist.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef INCLUDE_ML_CLASSIFIERS_MIN_DIST_HPP 17 | #define INCLUDE_ML_CLASSIFIERS_MIN_DIST_HPP 18 | 19 | #include "ml/classifiers/classifier.hpp" 20 | #include "ml/math/mat_ops.hpp" 21 | 22 | namespace ml { 23 | 24 | /** 25 | * @brief Determine whether the classifier should minimize or maximize the 26 | * computed distance. 27 | */ 28 | enum extremum_dist_compare { LESS, GREATER }; 29 | 30 | namespace detail { 31 | 32 | template 33 | struct compare_detail; 34 | 35 | template <> 36 | struct compare_detail { 37 | template 38 | using Op = std::less; 39 | static constexpr int SIGN = -1; 40 | }; 41 | 42 | template <> 43 | struct compare_detail { 44 | template 45 | using Op = std::greater; 46 | static constexpr int SIGN = 1; 47 | }; 48 | 49 | } // namespace detail 50 | 51 | /** 52 | * @brief Abstract class of all classifiers minimizing or maximizing a distance. 53 | * 54 | * @tparam DataT 55 | * @tparam LabelT 56 | * @tparam Compare minimize or maximize the computed distance 57 | */ 58 | template 59 | class extremum_dist : public virtual classifier { 60 | protected: 61 | using Op = typename detail::compare_detail::template Op; 62 | 63 | static constexpr DataT SIGN = 64 | static_cast(detail::compare_detail::SIGN); 65 | SYCLIndexT _predict_data_dim_assert; 66 | 67 | virtual void compute_dist(queue& q, matrix_t& dataset, 68 | matrix_t& dist) = 0; 69 | 70 | template 71 | using NameGenED = NameGen; 72 | 73 | public: 74 | virtual vector_t predict(queue& q, 75 | matrix_t& dataset) override { 76 | assert_eq(access_data_dim(dataset, 1), this->_predict_data_dim_assert); 77 | 78 | auto nb_labels = this->get_nb_labels(); 79 | auto nb_obs = access_data_dim(dataset, 0); 80 | auto padded_nb_obs = 81 | get_device_constants()->pad_sub_buffer_size(nb_obs); 82 | // The pad between nb_obs and padded_nb_obs can be left uninitialized. 83 | // It will produce random values in predicted_labels which shouldn't be 84 | // read. 85 | matrix_t dist(range<2>(nb_labels, nb_obs), 86 | get_optimal_nd_range(nb_labels, padded_nb_obs)); 87 | compute_dist(q, dataset, dist); 88 | 89 | // Find extremum dist for each column 90 | vector_t predicted_labels(range<1>(nb_obs), 91 | get_optimal_nd_range(padded_nb_obs)); 92 | q.submit([this, &dist, &predicted_labels, nb_labels](handler& cgh) { 93 | auto dist_acc = dist.template get_access_2d(cgh); 94 | auto label_idx_to_user_acc = 95 | this->_label_idx_to_label_user 96 | .template get_access_1d(cgh); 97 | auto predicted_labels_acc = 98 | predicted_labels.template get_access_1d( 99 | cgh); 100 | cgh.parallel_for>( 101 | predicted_labels.get_nd_range(), [=](nd_item<1> item) { 102 | auto col = item.get_global_id(0); 103 | auto extremum_index = 0; 104 | auto extremum_dist = dist_acc(extremum_index, col); 105 | for (unsigned i = 1; i < nb_labels; ++i) { // Loop is small enough 106 | if (Op()(dist_acc(i, col), extremum_dist)) { 107 | extremum_dist = dist_acc(i, col); 108 | extremum_index = i; 109 | } 110 | } 111 | predicted_labels_acc(col) = label_idx_to_user_acc(extremum_index); 112 | }); 113 | }); 114 | 115 | return predicted_labels; 116 | } 117 | }; 118 | 119 | } // namespace ml 120 | 121 | #endif // INCLUDE_ML_CLASSIFIERS_MIN_DIST_HPP 122 | -------------------------------------------------------------------------------- /include/ml/classifiers/svm/kernel_cache.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef INCLUDE_ML_CLASSIFIERS_SVM_KERNEL_CACHE_HPP 17 | #define INCLUDE_ML_CLASSIFIERS_SVM_KERNEL_CACHE_HPP 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #include "ml/classifiers/svm/svm_kernels.hpp" 25 | 26 | namespace ml { 27 | 28 | namespace detail { 29 | 30 | /** 31 | * @brief Cache either the whole kernel matrix or only the last row used. 32 | * 33 | * @tparam KerFun 34 | * @tparam T 35 | */ 36 | template 37 | class kernel_cache { 38 | public: 39 | kernel_cache(queue& q, const KerFun& ker, matrix_t& x, 40 | const range<1>& data_rng, const nd_range<1>& ker_rng) 41 | : _q(q), _ker(ker), _x(x), _ker_diag_buf(data_rng, ker_rng) { 42 | // Compute the diagonal values of ker only once 43 | ker(q, x, _ker_diag_buf); 44 | auto m = access_ker_dim(x, 0); 45 | auto padded_m = to_pow2(m); 46 | auto pad_size_rng = get_optimal_nd_range(range<1>(padded_m - m), id<1>(m)); 47 | if (pad_size_rng.get_global_linear_range() > 0) { 48 | sycl_memset(q, _ker_diag_buf, pad_size_rng); 49 | } 50 | } 51 | 52 | virtual vector_t get_ker_row(SYCLIndexT row) = 0; 53 | 54 | inline vector_t& get_ker_diag() { return _ker_diag_buf; } 55 | inline T get_ker_diag(SYCLIndexT row) { 56 | return _ker_diag_buf.read_to_host(row); 57 | } 58 | 59 | protected: 60 | queue& _q; 61 | const KerFun& _ker; 62 | 63 | matrix_t& _x; 64 | vector_t _ker_diag_buf; // diagonal of kernel matrix 65 | }; 66 | 67 | /** 68 | * @brief Compute the whole kernel matrix once 69 | * 70 | * If resulting matrix is too big, use kernel_cache_row instead. 71 | * 72 | * @tparam KerFun 73 | * @tparam T 74 | */ 75 | template 76 | class kernel_cache_matrix : public kernel_cache { 77 | public: 78 | kernel_cache_matrix(queue& q, const KerFun& ker, matrix_t& x, 79 | const range<1>& data_rng, const nd_range<1>& ker_rng) 80 | : kernel_cache(q, ker, x, data_rng, ker_rng), _ker_mat() { 81 | auto nb_obs = access_ker_dim(x, 0); 82 | auto padded_nb_obs = get_device_constants()->pad_sub_buffer_size(nb_obs); 83 | _ker_mat = matrix_t(range<2>(nb_obs, nb_obs), 84 | get_optimal_nd_range(nb_obs, padded_nb_obs)); 85 | ker(q, x, x, _ker_mat); 86 | } 87 | 88 | inline virtual vector_t get_ker_row(SYCLIndexT row) override { 89 | return _ker_mat.get_row(row); 90 | } 91 | 92 | private: 93 | matrix_t _ker_mat; 94 | }; 95 | 96 | /** 97 | * @brief Map a row index with its corresponding row in the kernel matrix. 98 | * 99 | * Should be used if the kernel matrix is too large. 100 | * 101 | * nb_cache_line is the maximum number of kernel line to cache. 102 | * It should be 2 for simple kernel (linear or polynomial) and grow bigger for 103 | * more complex kernels. The maximum size of the cache in byte is sizeof(T) * n 104 | * * nb_cache_line. 105 | * 106 | * @tparam KerFun 107 | * @tparam T 108 | */ 109 | template 110 | class kernel_cache_row : public kernel_cache { 111 | public: 112 | kernel_cache_row(queue& q, const KerFun& ker, matrix_t& x, 113 | const range<1>& data_rng, const nd_range<1>& ker_rng, 114 | SYCLIndexT nb_cache_line) 115 | : kernel_cache(q, ker, x, data_rng, ker_rng), 116 | _nb_cache_line(nb_cache_line), 117 | _ker_cache(), 118 | _cache_last_access() {} 119 | 120 | virtual vector_t get_ker_row(SYCLIndexT row) override { 121 | auto it = _ker_cache.find(row); 122 | if (it != _ker_cache.end()) { 123 | // Move element row to the end 124 | auto row_it = 125 | std::find(_cache_last_access.begin(), _cache_last_access.end(), row); 126 | _cache_last_access.splice(_cache_last_access.end(), _cache_last_access, 127 | row_it); 128 | return it->second; 129 | } 130 | 131 | _cache_last_access.push_back(row); 132 | if (_ker_cache.size() >= _nb_cache_line) { 133 | auto replace_row = _cache_last_access.front(); 134 | _cache_last_access.pop_front(); 135 | auto ker_row = std::move(_ker_cache[replace_row]); 136 | _ker_cache.erase(replace_row); 137 | this->_ker(this->_q, this->_x, row, ker_row); 138 | auto inserted_it = _ker_cache.insert(std::make_pair(row, ker_row)); 139 | return inserted_it.first->second; 140 | } 141 | 142 | auto inserted_it = _ker_cache.emplace( 143 | std::piecewise_construct, std::forward_as_tuple(row), 144 | std::forward_as_tuple(this->_ker_diag_buf.data_range, 145 | this->_ker_diag_buf.kernel_range)); 146 | auto& ker_row = inserted_it.first->second; 147 | this->_ker(this->_q, this->_x, row, ker_row); 148 | return ker_row; 149 | } 150 | 151 | private: 152 | SYCLIndexT _nb_cache_line; 153 | std::unordered_map> 154 | _ker_cache; // Cached rows of kernel matrix 155 | std::list _cache_last_access; // Indices of last used rows 156 | }; 157 | 158 | } // namespace detail 159 | 160 | } // namespace ml 161 | 162 | #endif // INCLUDE_ML_CLASSIFIERS_SVM_KERNEL_CACHE_HPP 163 | -------------------------------------------------------------------------------- /include/ml/eigen/eigen.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | /** 17 | * @file 18 | * @brief Include the Tensor module of Eigen using SYCL and define useful 19 | * aliases. 20 | */ 21 | 22 | #ifndef INCLUDE_ML_EIGEN_MY_EIGEN_HPP 23 | #define INCLUDE_ML_EIGEN_MY_EIGEN_HPP 24 | 25 | #include 26 | 27 | namespace ml { 28 | 29 | using Eigen::Dynamic; 30 | 31 | template 32 | using tensor_map_t = Eigen::TensorMap>; 33 | 34 | #define DEFINE_EIGEN_ALIAS(NAME, DIM) \ 35 | template \ 36 | using eig_##NAME##_t = Eigen::Tensor; \ 37 | template \ 38 | using eig_##NAME##_map_t = Eigen::TensorMap> 39 | 40 | /// @brief Generate \p eig_scalar_t and \p eig_scalar_map_t 41 | DEFINE_EIGEN_ALIAS(scalar, 0); 42 | /// @brief Generate \p eig_vec_t and \p eig_vec_map_t 43 | DEFINE_EIGEN_ALIAS(vec, 1); 44 | /// @brief Generate \p eig_mat_t and \p eig_mat_map_t 45 | DEFINE_EIGEN_ALIAS(mat, 2); 46 | /// @brief Generate \p eig_mats_t and \p eig_mats_map_t 47 | DEFINE_EIGEN_ALIAS(mats, 3); 48 | 49 | using eig_index_t = typename eig_mat_t::Index; 50 | using eig_dim_pair_t = typename eig_mat_t::DimensionPair; 51 | template 52 | using eig_dsize_t = Eigen::DSizes; 53 | template 54 | using eig_dims_t = Eigen::array; 55 | 56 | } // namespace ml 57 | 58 | #endif // INCLUDE_ML_EIGEN_MY_EIGEN_HPP 59 | -------------------------------------------------------------------------------- /include/ml/eigen/sycl_to_eigen.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef INCLUDE_ML_EIGEN_SYCL_TO_EIGEN_HPP 17 | #define INCLUDE_ML_EIGEN_SYCL_TO_EIGEN_HPP 18 | 19 | #include 20 | 21 | #include "ml/utils/access.hpp" 22 | #include "ml/utils/buffer_t.hpp" 23 | 24 | namespace ml { 25 | 26 | namespace detail { 27 | 28 | template 29 | eig_dsize_t range_to_dsize(const range& r) { 30 | static_assert(IN_DIM <= OUT_DIM, ""); 31 | 32 | eig_dsize_t dim; 33 | int i = 0; 34 | for (; i < IN_DIM; ++i) { 35 | dim[i] = static_cast(r[i]); 36 | } 37 | for (; i < OUT_DIM; ++i) { 38 | dim[i] = 1; 39 | } 40 | return dim; 41 | } 42 | 43 | template <> 44 | eig_dsize_t<0> range_to_dsize<1, 0>(const range<1>&) { 45 | return {}; 46 | } 47 | 48 | } // namespace detail 49 | 50 | /** 51 | * @brief Convert a SYCL buffer to an Eigen Tensor. 52 | * 53 | * The class holds the host pointer and makes sure that the Tensor is destroyed 54 | * at the end.\n Thus this object must stay alive as long as the Tensor is used. 55 | * 56 | * @todo Because of the way Eigen works if 2 \p sycl_to_eigen_t objects are 57 | * created with the same buffer and one is destroyed, the 2 Tensors become 58 | * invalid. The fix would require to either count the number of references for 59 | * each buffer or to create a different pointer if one already exist. 60 | * 61 | * @tparam T 62 | * @tparam IN_DIM dimension of the SYCL buffer 63 | * @tparam OUT_DIM dimension of the Eigen Tensor 64 | * @tparam DataLayout Eigen::RowMajor or Eigen::ColMajor 65 | */ 66 | template 68 | class sycl_to_eigen_t { 69 | private: 70 | using Self = sycl_to_eigen_t; 71 | 72 | public: 73 | sycl_to_eigen_t() = default; 74 | 75 | sycl_to_eigen_t(buffer_t& b, const eig_dsize_t& sizes) { 76 | auto reinterpret_buffer = 77 | b.template reinterpret( 78 | cl::sycl::range<1>(b.get_count() * sizeof(T))); 79 | _host_ptr = 80 | static_cast(get_eigen_device().attach_buffer(reinterpret_buffer)) + 81 | b.sub_buffer_offset.get(0); 82 | _tensor = std::make_unique>(_host_ptr, 83 | sizes); 84 | } 85 | 86 | ~sycl_to_eigen_t() { 87 | if (_host_ptr) { 88 | get_eigen_device().detach_buffer(_host_ptr); 89 | } 90 | } 91 | 92 | /** 93 | * @return the Eigen Tensor 94 | */ 95 | inline auto& tensor() { return *_tensor; } 96 | 97 | /** 98 | * @return the Eigen TensorDevice (for assignment) 99 | */ 100 | inline auto device() { return tensor().device(get_eigen_device()); } 101 | 102 | inline const T* ptr() const { return _host_ptr; } 103 | 104 | // No copy, only move 105 | sycl_to_eigen_t(const Self&) = delete; 106 | sycl_to_eigen_t(Self&&) = default; 107 | Self& operator=(const Self&) = delete; 108 | Self& operator=(Self&&) = default; 109 | 110 | private: 111 | T* _host_ptr; 112 | std::unique_ptr> _tensor; 113 | }; 114 | 115 | /** 116 | * @brief Create a Tensor of dimension 0 from a SYCL buffer. 117 | * 118 | * Only the first value of the buffer is used. 119 | * 120 | * @tparam IN_DIM 121 | * @tparam DataLayout 122 | * @tparam T 123 | * @param b 124 | * @return the \p sycl_to_eigen_t associated to \p b 125 | */ 126 | template 128 | inline auto sycl_to_scalar_eigen(buffer_t& b) { 129 | assert_less_or_eq(1LU, b.get_kernel_size()); 130 | return sycl_to_eigen_t(b, eig_dsize_t<0>()); 131 | } 132 | 133 | /** 134 | * @brief Create a Tensor of any dimensions from a SYCL buffer. 135 | * 136 | * @tparam IN_DIM dimension of the input buffer 137 | * @tparam OUT_DIM dimension of the output Tensor 138 | * @tparam R_DIM dimension of the range 139 | * @tparam DataLayout 140 | * @tparam T 141 | * @param b 142 | * @param r range defining the size of the tensor 143 | * @return the \p sycl_to_eigen_t associated to \p b 144 | */ 145 | template 148 | inline auto sycl_to_eigen(buffer_t& b, const range& r) { 149 | static_assert( 150 | R_DIM >= IN_DIM && (R_DIM <= OUT_DIM || (R_DIM == 1 && OUT_DIM == 0)), 151 | ""); 152 | assert_less_or_eq(r.size(), b.get_kernel_size()); 153 | return sycl_to_eigen_t( 154 | b, detail::range_to_dsize(r)); 155 | } 156 | 157 | /// @see sycl_to_eigen(buffer_t&, const range&) 158 | template 160 | inline auto sycl_to_eigen(buffer_t& b) { 161 | return sycl_to_eigen(b, b.get_kernel_range()); 162 | } 163 | 164 | /** 165 | * @brief Force a buffer of dimension 1 to be converted to a Tensor of 166 | * dimension 2. 167 | * 168 | * @tparam D whether to build the Tensor as a column (by default) or a row. 169 | * @tparam DataLayout 170 | * @tparam T 171 | * @param v 172 | * @return the \p sycl_to_eigen_t associated to \p b 173 | */ 174 | template 176 | inline auto sycl_to_eigen_2d(vector_t& v) { 177 | return sycl_to_eigen<1, 2, DataLayout>( 178 | v, build_lin_or_tr(), range<2>>(v.get_kernel_range()[0], 1)); 179 | } 180 | 181 | } // namespace ml 182 | 183 | #endif // INCLUDE_ML_EIGEN_SYCL_TO_EIGEN_HPP 184 | -------------------------------------------------------------------------------- /include/ml/math/cov.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef INCLUDE_ML_MATH_COV_HPP 17 | #define INCLUDE_ML_MATH_COV_HPP 18 | 19 | #include "ml/math/mat_mul.hpp" 20 | 21 | namespace ml { 22 | 23 | /** 24 | * @brief Compute the covariance matrix of \p dataset 25 | * 26 | * Assumes the data has been centered already. 27 | * It is normalized by the number of observation N (instead of the usual N-1). 28 | * Formula for D=ROW is \f$ (dataset' * dataset) / N \f$ 29 | * 30 | * @tparam D specifies which dimension represents the number of observations 31 | * @tparam T 32 | * @param q 33 | * @param[in] dataset 34 | * @param[out] cov_mat 35 | */ 36 | template 37 | void cov(queue& q, matrix_t& dataset, matrix_t& cov_mat) { 38 | auto nb_obs = access_data_dim(dataset, 0); 39 | auto data_dim = access_data_dim(dataset, 1); 40 | assert_rng_eq(cov_mat.data_range, range<2>(data_dim, data_dim)); 41 | 42 | mat_mul(), D>(q, dataset, dataset, cov_mat); 43 | sycl_normalize(q, cov_mat, static_cast(nb_obs)); 44 | } 45 | 46 | } // namespace ml 47 | 48 | #endif // INCLUDE_ML_MATH_COV_HPP 49 | -------------------------------------------------------------------------------- /include/ml/math/functors.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef INCLUDE_ML_MATH_FUNCTORS_HPP 17 | #define INCLUDE_ML_MATH_FUNCTORS_HPP 18 | 19 | #include "ml/utils/common.hpp" 20 | 21 | namespace ml { 22 | 23 | namespace functors { 24 | 25 | template 26 | struct positive { 27 | constexpr T operator()(T x) const { return x > 0; } 28 | }; 29 | 30 | template 31 | struct negative { 32 | constexpr T operator()(T x) const { return x < 0; } 33 | }; 34 | 35 | template 36 | struct identity { 37 | constexpr T operator()(T x) const { return x; } 38 | }; 39 | 40 | template 41 | struct sqrt { 42 | constexpr T operator()(T x) const { return cl::sycl::sqrt(x); } 43 | }; 44 | 45 | template 46 | class partial_binary_op { 47 | public: 48 | partial_binary_op(T c, BinaryOp binary_op = BinaryOp()) 49 | : _c(c), _binary_op(binary_op) {} 50 | 51 | inline constexpr T operator()(T x) const { return _binary_op(_c, x); } 52 | 53 | private: 54 | T _c; 55 | BinaryOp _binary_op; 56 | }; 57 | 58 | template 59 | struct sum_log_abs { 60 | inline constexpr T operator()(T x1, T x2) const { 61 | return x1 + cl::sycl::log(cl::sycl::fabs(x2)); 62 | } 63 | }; 64 | 65 | template 66 | struct exp_diff { 67 | template 68 | constexpr T operator()(T1 x1, T2 x2) const { 69 | return cl::sycl::exp(x1 - x2); 70 | } 71 | }; 72 | 73 | template 74 | struct amortize { 75 | amortize(T factor) : _factor(factor) {} 76 | constexpr T operator()(T act, T prev) const { return act - prev * _factor; } 77 | 78 | private: 79 | T _factor; 80 | }; 81 | 82 | } // namespace functors 83 | 84 | } // namespace ml 85 | 86 | #endif // INCLUDE_ML_MATH_FUNCTORS_HPP 87 | -------------------------------------------------------------------------------- /include/ml/math/helper.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef INCLUDE_ML_MATH_HELPER_HPP 17 | #define INCLUDE_ML_MATH_HELPER_HPP 18 | 19 | #include 20 | 21 | namespace ml { 22 | 23 | template 24 | static const T PI = std::atan(1) * 4; 25 | 26 | } 27 | 28 | #endif // INCLUDE_ML_MATH_HELPER_HPP 29 | -------------------------------------------------------------------------------- /include/ml/math/mat_inv.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef INCLUDE_ML_MATH_MAT_INV_HPP 17 | #define INCLUDE_ML_MATH_MAT_INV_HPP 18 | 19 | #include "ml/math/mat_mul.hpp" 20 | #include "ml/math/mat_ops.hpp" 21 | 22 | namespace ml { 23 | 24 | class ml_mat_inv; 25 | 26 | /** 27 | * @brief Invert the given matrix of size nxn. 28 | * 29 | * Uses the Gauss-Jordan method. 30 | * 31 | * @see tri_solve(queue&, matrix_t&, matrix_t&) for a more numerically 32 | * stable solution 33 | * @tparam T 34 | * @param q 35 | * @param[in] mat 36 | * @param[out] inv 37 | * @param c_buffer temporary buffer must be at least of size nx(2*n) 38 | * @param block_buffer temporary buffer must be at least of size nx(n+1) 39 | * @return A SYCL event corresponding to the last submitted operation 40 | */ 41 | template 42 | event mat_inv(queue& q, matrix_t& mat, matrix_t& inv, 43 | matrix_t& c_buffer, matrix_t& block_buffer) { 44 | auto data_dim = mat.data_range[1]; 45 | mat.assert_square(); 46 | assert_rng_less_or_eq(mat.get_kernel_range(), inv.data_range); 47 | assert_rng_less_or_eq(c_buffer.data_range, data_dim, 2 * data_dim); 48 | assert_rng_less_or_eq(block_buffer.data_range, data_dim, data_dim + 1); 49 | 50 | // C = [A|I] 51 | q.submit([&mat, &c_buffer](handler& cgh) { 52 | auto mat_acc = mat.template get_access_2d(cgh); 53 | auto c_acc = 54 | c_buffer.template get_access_2d(cgh); 55 | cgh.parallel_for>( 56 | c_buffer.get_nd_range(), [=](nd_item<2> item) { 57 | auto global_nb_rows = item.get_global_range()[0]; 58 | auto row = item.get_global_id(0); 59 | auto col = item.get_global_id(1); 60 | // Copy A if left part, set identity otherwise 61 | c_acc(row, col) = (col < global_nb_rows) 62 | ? mat_acc(row, col) 63 | : ((row + global_nb_rows) == col); 64 | }); 65 | }); 66 | 67 | // Compute C so that C = [I|A^-1] 68 | for (decltype(data_dim) r = 0; r < data_dim; ++r) { 69 | // Write update in block_buffer 70 | q.submit([&c_buffer, &block_buffer, r](handler& cgh) { 71 | auto c_acc = c_buffer.template get_access_2d(cgh); 72 | auto block_acc = 73 | block_buffer.template get_access_2d(cgh); 74 | cgh.parallel_for>( 75 | block_buffer.get_nd_range(), [=](nd_item<2> item) { 76 | auto row = item.get_global_id(0); 77 | auto col = item.get_global_id(1); 78 | int is_row_eq_r = row == r; 79 | // if row == r: C(i,j) = C(i,j) / C(r,r) 80 | // else: C(i,j) = C(i,j) - (C(i,r) / C(r,r)) * C(r, j) 81 | block_acc(row, col) = 82 | is_row_eq_r * (c_acc(row, col + r) / c_acc(r, r)) + 83 | !is_row_eq_r * 84 | (c_acc(row, col + r) - 85 | (c_acc(row, r) / c_acc(r, r)) * c_acc(r, col + r)); 86 | }); 87 | }); 88 | 89 | // Copy block_buffer in c_buffer 90 | q.submit([&c_buffer, &block_buffer, r](handler& cgh) { 91 | auto c_acc = c_buffer.template get_access_2d(cgh); 92 | auto block_acc = 93 | block_buffer.template get_access_2d(cgh); 94 | cgh.parallel_for>( 95 | block_buffer.get_nd_range(), [=](nd_item<2> item) { 96 | auto row = item.get_global_id(0); 97 | auto col = item.get_global_id(1); 98 | c_acc(row, col + r) = block_acc(row, col); 99 | }); 100 | }); 101 | } 102 | 103 | // Copy the right part of C to inv 104 | return q.submit([&c_buffer, &inv](handler& cgh) { 105 | auto c_acc = c_buffer.template get_access_2d(cgh); 106 | auto inv_acc = inv.template get_access_2d(cgh); 107 | cgh.parallel_for>( 108 | inv.get_nd_range(), [=](nd_item<2> item) { 109 | auto global_nb_rows = item.get_global_range()[0]; 110 | auto row = item.get_global_id(0); 111 | auto col = item.get_global_id(1); 112 | inv_acc(row, col) = c_acc(row, global_nb_rows + col); 113 | }); 114 | }); 115 | } 116 | 117 | /** 118 | * @brief Invert the given matrix and create any necessary temporary buffers. 119 | * 120 | * @see mat_inv(queue&, matrix_t&, matrix_t&, matrix_t&, matrix_t&) 121 | * @tparam T 122 | * @param q 123 | * @param[in] mat 124 | * @param[out] inv 125 | * @return A SYCL event corresponding to the last submitted operation 126 | */ 127 | template 128 | event mat_inv(queue& q, matrix_t& mat, matrix_t& inv) { 129 | auto data_dim = mat.data_range[1]; 130 | matrix_t c_buffer(range<2>(data_dim, 2 * data_dim)); 131 | matrix_t block_buffer(range<2>(data_dim, data_dim + 1)); 132 | return mat_inv(q, mat, inv, c_buffer, block_buffer); 133 | } 134 | 135 | } // namespace ml 136 | 137 | #endif // INCLUDE_ML_MATH_MAT_INV_HPP 138 | -------------------------------------------------------------------------------- /include/ml/math/mat_mul.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef INCLUDE_ML_MATH_MAT_MUL_HPP 17 | #define INCLUDE_ML_MATH_MAT_MUL_HPP 18 | 19 | #include "ml/math/vec_ops.hpp" 20 | 21 | namespace ml { 22 | 23 | /** 24 | * @brief Matrix multiplication using Eigen. 25 | * 26 | * The tensors are sliced to their data_range first in case their kernel_range 27 | * is bigger. 28 | * 29 | * @tparam D1 whether to transpose \p b1 30 | * @tparam D2 whether to transpose \p b2 31 | * @tparam T 32 | * @tparam DIM1 Tensor dimension of \p b1 33 | * @tparam DIM2 Tensor dimension of \p b2 34 | * @tparam DIM3 Tensor dimension of \p b3 35 | * @param[in] b1 mxk 36 | * @param[in] b2 kxn 37 | * @param[out] b3 mxn 38 | */ 39 | template 41 | void mat_mul(queue&, buffer_t& b1, buffer_t& b2, 42 | buffer_t& b3) { 43 | STATIC_ASSERT_DATA_DIM_FOR_DIM_2(DIM1, D1); 44 | STATIC_ASSERT_DATA_DIM_FOR_DIM_2(DIM2, D2); 45 | static_assert(1 <= DIM1 && DIM1 <= 2, ""); 46 | static_assert(1 <= DIM1 && DIM2 <= 2, ""); 47 | static_assert(DIM3 == std::min(DIM1, DIM2), ""); 48 | 49 | // Act as if data_dim were LIN because the transpose is handled by dims 50 | // Reshape inputs and outputs to be 2D 51 | auto eig_t1 = sycl_to_eigen(b1); 52 | auto eig_t2 = sycl_to_eigen(b2); 53 | auto eig_t3 = sycl_to_eigen(b3); 54 | 55 | auto sliced_t1 = eig_t1.tensor().slice( 56 | eig_dsize_t<2>{0, 0}, detail::range_to_dsize(b1.data_range)); 57 | auto sliced_t2 = eig_t2.tensor().slice( 58 | eig_dsize_t<2>{0, 0}, detail::range_to_dsize(b2.data_range)); 59 | auto sliced_t3 = eig_t3.tensor().slice( 60 | eig_dsize_t<2>{0, 0}, detail::range_to_dsize(b3.data_range)); 61 | 62 | sliced_t3.device(get_eigen_device()) = 63 | sliced_t1.contract(sliced_t2, get_contract_dim(), D2>()); 64 | } 65 | 66 | } // namespace ml 67 | 68 | #endif // INCLUDE_ML_MATH_MAT_MUL_HPP 69 | -------------------------------------------------------------------------------- /include/ml/math/qr.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef INCLUDE_ML_MATH_QR_HPP 17 | #define INCLUDE_ML_MATH_QR_HPP 18 | 19 | #include "ml/math/vec_ops.hpp" 20 | 21 | namespace ml { 22 | 23 | class ml_qr; 24 | 25 | /** 26 | * @brief QR decomposition of the given matrix of size mxn. 27 | * 28 | * Uses the Householder transformations algorithm. 29 | * Note: A blocked Householder would be more performant. 30 | * 31 | * qr(A) computes Q and R such that A = Q * R where Q is an orthogonal matrix 32 | * and R an upper triangular matrix. This implementation assumes that m is 33 | * greater than n and only writes R in the upper triangular part of A. The lower 34 | * triangular part of R should be set to 0 if needed. Note that for each row of 35 | * R a sign can be chosen, this implementation always chooses 1. 36 | * 37 | * @tparam T 38 | * @param q 39 | * @param[in, out] mat 40 | * @param w temporary buffer must be of size m at least. 41 | * @param vec_buf temporary buffer must be of size n at least. 42 | * @param eps threshold below which the division by u1 is avoided. 43 | */ 44 | template 45 | void qr(queue& q, matrix_t& mat, vector_t& w, vector_t& vec_buf, 46 | T eps = 1E-5) { 47 | auto m = access_ker_dim(mat, 0); 48 | auto n = access_ker_dim(mat, 1); 49 | using IndexT = decltype(n); 50 | 51 | assert_less_or_eq(n, m); 52 | assert_less_or_eq(m, w.data_range[0]); 53 | assert_less_or_eq(n, vec_buf.data_range[0]); 54 | 55 | static constexpr T ACT_SIGN = 1; 56 | SYCLIndexT jj_offset; 57 | T host_mat_jj; 58 | T act_norm; 59 | T act_u1; 60 | T act_tau; 61 | 62 | auto eig_mat = sycl_to_eigen(mat); 63 | // Force tensor dim 2 for matrix multiplication 64 | auto eig_w = sycl_to_eigen_2d(w); 65 | auto eig_vec_buf = sycl_to_eigen_2d(vec_buf); 66 | vector_t norm_buf((range<1>(1))); 67 | auto eig_norm = sycl_to_scalar_eigen(norm_buf); 68 | eig_dsize_t<1> slice_offsets_d1; 69 | eig_dsize_t<1> slice_extents_d1; 70 | eig_dsize_t<2> slice_offsets_mat; 71 | eig_dsize_t<2> slice_extents_mat; 72 | eig_dsize_t<2> slice_offsets_w{0, 0}; 73 | eig_dsize_t<2> slice_extents_w{1, 1}; 74 | eig_dsize_t<2> slice_offsets_vec_buf{0, 0}; 75 | eig_dsize_t<2> slice_extents_vec_buf{1, 1}; 76 | 77 | auto compute_acts = [&](IndexT j) { 78 | jj_offset = j * (n + 1); 79 | // Get elements with indices [j, m] of the jth column and take the norm 80 | slice_offsets_d1[0] = j; 81 | slice_extents_d1[0] = m - j; 82 | eig_norm.device() = eig_mat.tensor() 83 | .chip(j, 1) 84 | .slice(slice_offsets_d1, slice_extents_d1) 85 | .square() 86 | .sum() 87 | .sqrt(); 88 | host_mat_jj = mat.read_to_host(jj_offset); 89 | // At each iteration the sign can be chosen to be different. 90 | // Choosing it to be -sign(mat(j,j)) maximizes the value of u1 but is more 91 | // likely to cause division by zero 92 | // act_sign = -cl::sycl::sign(host_mat_jj); 93 | act_norm = ACT_SIGN * norm_buf.read_to_host(0); 94 | act_u1 = host_mat_jj - act_norm; 95 | act_tau = -act_u1 / act_norm; 96 | mat.write_from_host(jj_offset, act_norm); 97 | }; 98 | 99 | auto w_rng = w.kernel_range; 100 | auto mat_rng = mat.kernel_range; 101 | SYCLIndexT nb_rows_ker; 102 | IndexT j = 0; 103 | for (; j < n - 1; ++j) { 104 | compute_acts(j); 105 | 106 | if (std::abs(act_u1) < eps) { 107 | // Note: matrix Q would be inacurate if this is reached 108 | continue; 109 | } 110 | 111 | nb_rows_ker = m - j; 112 | if (nb_rows_ker % 2 == 0) { 113 | bool nb_rows_ker_is_pow2 = is_pow2(nb_rows_ker); 114 | if (nb_rows_ker_is_pow2 || !is_pow2(w_rng.get_global_range()[0])) { 115 | w_rng = get_optimal_nd_range(nb_rows_ker); 116 | } 117 | if (nb_rows_ker_is_pow2 || !is_pow2(mat_rng.get_global_range()[0])) { 118 | mat_rng = get_optimal_nd_range(nb_rows_ker, access_ker_dim(mat, 1)); 119 | } 120 | } 121 | 122 | // Compute w and update R 123 | q.submit([&mat, &w, w_rng, nb_rows_ker, act_u1, j](handler& cgh) { 124 | auto mat_acc = mat.template get_access_2d(cgh); 125 | auto w_acc = w.template get_access_1d(cgh); 126 | cgh.parallel_for>(w_rng, [=](nd_item<1> item) { 127 | auto row = item.get_global_id(0) + 1; 128 | if (row < nb_rows_ker) { 129 | auto val = mat_acc(row + j, j) / act_u1; 130 | mat_acc(row + j, j) = val; 131 | w_acc(row) = val; 132 | } 133 | }); 134 | }); 135 | w.write_from_host(0, T(1)); 136 | 137 | // Compute vec_buf 138 | slice_extents_w[0] = nb_rows_ker; 139 | slice_extents_vec_buf[0] = n - j - 1; 140 | slice_offsets_mat[0] = j; 141 | slice_offsets_mat[1] = j + 1; 142 | slice_extents_mat[0] = nb_rows_ker; 143 | slice_extents_mat[1] = n - j - 1; 144 | auto sliced_w = eig_w.tensor().slice(slice_offsets_w, slice_extents_w); 145 | auto sliced_vec_buf = eig_vec_buf.tensor().slice(slice_offsets_vec_buf, 146 | slice_extents_vec_buf); 147 | auto sliced_mat = 148 | eig_mat.tensor().slice(slice_offsets_mat, slice_extents_mat); 149 | sliced_vec_buf.device(get_eigen_device()) = 150 | sliced_mat.contract(sliced_w, get_contract_dim()); 151 | 152 | // Update R 153 | q.submit([&vec_buf, &w, &mat, mat_rng, act_tau, j, m, n](handler& cgh) { 154 | auto vec_acc = vec_buf.template get_access_1d(cgh); 155 | auto w_acc = w.template get_access_1d(cgh); 156 | auto mat_acc = mat.template get_access_2d(cgh); 157 | cgh.parallel_for>(mat_rng, [=](nd_item<2> item) { 158 | auto row = item.get_global_id(0); 159 | auto col = item.get_global_id(1); 160 | if (row < m - j && col < n - j - 1) { 161 | mat_acc(j + row, j + 1 + col) -= 162 | (act_tau * w_acc(row)) * vec_acc(col); 163 | } 164 | }); 165 | }); 166 | } 167 | 168 | compute_acts(j); 169 | } 170 | 171 | /** 172 | * @brief QR decomposition of the given matrix. 173 | * 174 | * @tparam T 175 | * @param q 176 | * @param[in, out] mat 177 | * @param data_dim_rng 1d range of the size of an observation 178 | * @param data_dim_pow2_rng 1d kernel range of the size of an observation (can 179 | * be padded to a bigger power of 2) 180 | */ 181 | template 182 | void qr(queue& q, matrix_t& mat, const range<1>& data_dim_rng, 183 | const nd_range<1>& data_dim_pow2_rng) { 184 | range<1> nb_obs_rng(access_ker_dim(mat, 0)); 185 | auto nb_obs_pow2_rng = get_optimal_nd_range(nb_obs_rng); 186 | vector_t w_buf(nb_obs_rng, nb_obs_pow2_rng); 187 | vector_t vec_buf(data_dim_rng, data_dim_pow2_rng); 188 | 189 | qr(q, mat, w_buf, vec_buf); 190 | } 191 | 192 | /** 193 | * @brief QR decomposition of the given matrix. 194 | * 195 | * @see qr(queue&, matrix_t&, const range<1>&, const nd_range<1>&, const 196 | * range<1>&, const nd_range<1>&) 197 | * @tparam T 198 | * @param q 199 | * @param[in, out] mat 200 | */ 201 | template 202 | void qr(queue& q, matrix_t& mat) { 203 | range<1> data_dim_rng(access_ker_dim(mat, 1)); 204 | auto data_dim_pow2_rng = get_optimal_nd_range(data_dim_rng); 205 | qr(q, mat, data_dim_rng, data_dim_pow2_rng); 206 | } 207 | 208 | } // namespace ml 209 | 210 | #endif // INCLUDE_ML_MATH_QR_HPP 211 | -------------------------------------------------------------------------------- /include/ml/math/tri_inv.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef INCLUDE_ML_MATH_TRI_INV_HPP 17 | #define INCLUDE_ML_MATH_TRI_INV_HPP 18 | 19 | #include "ml/utils/common.hpp" 20 | 21 | namespace ml { 22 | 23 | class ml_try_inv; 24 | 25 | /** 26 | * @brief Invert the given upper triangular matrix of size nxn. 27 | * 28 | * Uses the Gauss-Jordan method. 29 | * 30 | * @see tri_solve(queue&, matrix_t&, matrix_t&) for a more numerically 31 | * stable solution 32 | * @tparam T 33 | * @param q 34 | * @param[in] tri 35 | * @param[out] inv 36 | * @param t_buffer temporary buffer must be of size nxn at least. 37 | * @param t_pow_buffer temporary buffer must be of size nxn at least. 38 | * @param data_dim_1_nd_rng 1d kernel range of size n. 39 | * @return A SYCL event corresponding to the last submitted operation 40 | */ 41 | template 42 | event tri_inv(queue& q, matrix_t& tri, matrix_t& inv, 43 | matrix_t& t_buffer, matrix_t& t_pow_buffer, 44 | const nd_range<1>& data_dim_1_nd_rng) { 45 | assert(&tri != &inv); 46 | assert(&tri != &t_buffer); 47 | assert(&tri != &t_pow_buffer); 48 | assert(&inv != &t_buffer); 49 | assert(&inv != &t_pow_buffer); 50 | assert(&t_buffer != &t_pow_buffer); 51 | 52 | auto data_dim_2_rng = tri.kernel_range.get_global_range(); 53 | assert_eq(data_dim_1_nd_rng.get_global_range()[0], data_dim_2_rng[0]); 54 | assert_rng_square(data_dim_2_rng); 55 | auto data_dim = data_dim_2_rng[0]; 56 | using IndexT = decltype(data_dim); 57 | assert_rng_less_or_eq(data_dim_2_rng, tri.data_range); 58 | assert_rng_less_or_eq(data_dim_2_rng, inv.data_range); 59 | assert_rng_less_or_eq(data_dim_2_rng, t_buffer.data_range); 60 | assert_rng_less_or_eq(data_dim_2_rng, t_pow_buffer.data_range); 61 | 62 | q.submit([&tri, &t_buffer, &t_pow_buffer, &inv](handler& cgh) { 63 | auto tri_acc = tri.template get_access_2d(cgh); 64 | auto t_acc = 65 | t_buffer.template get_access_2d(cgh); 66 | auto t_pow_acc = 67 | t_pow_buffer.template get_access_2d(cgh); 68 | auto inv_acc = inv.template get_access_2d(cgh); 69 | cgh.parallel_for>( 70 | tri.get_nd_range(), [=](nd_item<2> item) { 71 | auto row = item.get_global_id(0); 72 | auto col = item.get_global_id(1); 73 | T val = (col > row) ? (-tri_acc(row, col) / tri_acc(row, row)) : 0; 74 | t_acc(row, col) = val; 75 | t_pow_acc(row, col) = val; 76 | inv_acc(row, col) = (row == col) ? 1 : val; 77 | }); 78 | }); 79 | 80 | auto tri_nd_range = tri.get_nd_range(); 81 | for (IndexT i = 2; i < data_dim; ++i) { // i = 0 -> id; i = 1 -> t_acc 82 | // mat_mul where we know some zeros 83 | q.submit([&t_pow_buffer, &t_buffer, &inv, tri_nd_range, data_dim, 84 | i](handler& cgh) { 85 | auto t_pow_acc = 86 | t_pow_buffer.template get_access_2d(cgh); 87 | auto t_acc = t_buffer.template get_access_2d(cgh); 88 | auto inv_acc = inv.template get_access_2d(cgh); 89 | cgh.parallel_for>( 90 | tri_nd_range, [=](nd_item<2> item) { 91 | auto row = item.get_global_id(0); 92 | auto col = item.get_global_id(1); 93 | if (row < data_dim - i && col < data_dim - i && col >= row) { 94 | auto diag_idx = col - row; 95 | col += i; 96 | T sum = 0; 97 | // don't use the full line or column because of zeros 98 | for (size_t j = 0; j <= diag_idx; ++j) { 99 | sum += t_pow_acc(row, row + i + j - 1) * 100 | t_acc(row + i + j - 1, col); 101 | } 102 | // Store the result in the lower triangle part and transpose it 103 | // later 104 | t_pow_acc(col, row) = sum; 105 | inv_acc(row, col) += sum; 106 | } 107 | }); 108 | }); 109 | 110 | // Transpose lower part of t_pow_acc to upper part 111 | q.submit([&t_pow_buffer, tri_nd_range, data_dim, i](handler& cgh) { 112 | auto t_pow_acc = 113 | t_pow_buffer.template get_access_2d(cgh); 114 | cgh.parallel_for>( 115 | tri_nd_range, [=](nd_item<2> item) { 116 | auto row = item.get_global_id(0); 117 | auto col = item.get_global_id(1); 118 | if (row < data_dim - i && col < data_dim - i && col >= row) { 119 | col += i; 120 | t_pow_acc(row, col) = t_pow_acc(col, row); 121 | } 122 | }); 123 | }); 124 | } 125 | 126 | return q.submit([&tri, &inv](handler& cgh) { 127 | auto tri_acc = tri.template get_access_2d(cgh); 128 | auto inv_acc = inv.template get_access_2d(cgh); 129 | cgh.parallel_for>( 130 | tri.get_nd_range(), [=](nd_item<2> item) { 131 | auto row = item.get_global_id(0); 132 | auto col = item.get_global_id(1); 133 | inv_acc(row, col) /= tri_acc(col, col); 134 | }); 135 | }); 136 | } 137 | 138 | /** 139 | * @brief Invert the given upper triangular matrix and create any necessary 140 | * temporary buffers. 141 | * 142 | * @see tri_inv(queue&, matrix_t&, matrix_t&, matrix_t&, matrix_t&, 143 | * const nd_range<1>&) 144 | * @tparam T 145 | * @param q 146 | * @param[in] tri 147 | * @param[out] inv 148 | * @return A SYCL event corresponding to the last submitted operation 149 | */ 150 | template 151 | event tri_inv(queue& q, matrix_t& tri, matrix_t& inv) { 152 | tri.assert_square(); 153 | assert_rng_eq(tri.get_kernel_range(), inv.get_kernel_range()); 154 | 155 | matrix_t t_buffer{tri.data_range, tri.kernel_range}; 156 | matrix_t t_pow_buffer{tri.data_range, tri.kernel_range}; 157 | 158 | return tri_inv(q, tri, inv, t_buffer, t_pow_buffer, 159 | get_optimal_nd_range(tri.data_range[0])); 160 | } 161 | 162 | } // namespace ml 163 | 164 | #endif // INCLUDE_ML_MATH_TRI_INV_HPP 165 | -------------------------------------------------------------------------------- /include/ml/math/tri_solve.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef INCLUDE_ML_MATH_TRI_SOLVE_HPP 17 | #define INCLUDE_ML_MATH_TRI_SOLVE_HPP 18 | 19 | #include "ml/math/mat_ops.hpp" 20 | 21 | namespace ml { 22 | 23 | class ml_mat_tri_solve; 24 | class ml_mat_tri_solve_div_row; 25 | 26 | namespace detail { 27 | 28 | template 29 | struct tri_solve_data_dim; 30 | 31 | // Upper specific case 32 | template <> 33 | struct tri_solve_data_dim { 34 | static inline SYCLIndexT get_row_idx(SYCLIndexT n, SYCLIndexT i) { 35 | return n - i - 1; 36 | } 37 | using get_next_row_idx_op = std::minus; 38 | using apply_subtract_condition_op = std::less; 39 | }; 40 | 41 | // Lower specific case 42 | template <> 43 | struct tri_solve_data_dim { 44 | static inline SYCLIndexT get_row_idx(SYCLIndexT, SYCLIndexT i) { return i; } 45 | using get_next_row_idx_op = std::plus; 46 | using apply_subtract_condition_op = std::greater; 47 | }; 48 | 49 | template 50 | event div_row(queue& q, matrix_t& A, matrix_t& X, SYCLIndexT row_idx, 51 | const nd_range<1>& col_ker_rng) { 52 | return q.submit([&A, &X, row_idx, col_ker_rng](handler& cgh) { 53 | // Don't need DA because we only access the diagonal 54 | auto a_acc = A.template get_access_2d(cgh); 55 | auto x_acc = X.template get_access_2d(cgh); 56 | cgh.parallel_for>( 57 | col_ker_rng, [=](nd_item<1> item) { 58 | auto col = item.get_global_id(0); 59 | x_acc(row_idx, col) /= a_acc(row_idx, row_idx); 60 | }); 61 | }); 62 | } 63 | 64 | template 65 | event compute_x(queue& q, matrix_t& A, matrix_t& X, SYCLIndexT row_idx) { 66 | return q.submit([&A, &X, row_idx](handler& cgh) { 67 | auto a_acc = A.template get_access_2d(cgh); 68 | auto x_acc = X.template get_access_2d(cgh); 69 | const auto apply_subtract_condition = 70 | typename detail::tri_solve_data_dim::apply_subtract_condition_op(); 71 | cgh.parallel_for>( 72 | X.get_nd_range(), [=](nd_item<2> item) { 73 | auto row = item.get_global_id(DX); 74 | auto col = item.get_global_id(opp()); 75 | if (apply_subtract_condition(row, row_idx)) { 76 | x_acc(row, col) -= x_acc(row_idx, col) * a_acc(row, row_idx); 77 | } 78 | }); 79 | }); 80 | } 81 | 82 | } // namespace detail 83 | 84 | /** 85 | * @brief Compute X = A \ X = inv(A) * X without explicitly inverting A. 86 | * 87 | * Assumes that A is upper triangular. 88 | * X (resp. X') must have the same number of rows than A if DX=LIN (resp. 89 | * DX=COL) 90 | * 91 | * @tparam DX whether to transpose \p X 92 | * @tparam DA whether to transpose \p A 93 | * @tparam T 94 | * @param q 95 | * @param[in, out] X 96 | * @param[in] A 97 | * @return A SYCL event corresponding to the last submitted operation 98 | */ 99 | template 100 | event tri_solve(queue& q, matrix_t& X, matrix_t& A) { 101 | const auto n = access_ker_dim(A, 0); 102 | A.assert_square(); 103 | assert_eq(access_ker_dim(X, 0), n); 104 | 105 | const auto nb_cols = access_ker_dim(X, 1); 106 | const auto col_ker_rng = get_optimal_nd_range(nb_cols); 107 | const auto get_next_row_idx = 108 | typename detail::tri_solve_data_dim::get_next_row_idx_op(); 109 | 110 | // First iteration can be computed directly 111 | event event; 112 | SYCLIndexT row_idx = detail::tri_solve_data_dim::get_row_idx(n, 0); 113 | SYCLIndexT next_row_idx = get_next_row_idx(row_idx, 1); 114 | event = detail::div_row(q, A, X, row_idx, col_ker_rng); 115 | 116 | // Each result found must be subtracted for the next iterations 117 | for (SYCLIndexT i = 1; i < n; ++i) { 118 | detail::compute_x(q, A, X, row_idx); 119 | row_idx = next_row_idx; 120 | next_row_idx = get_next_row_idx(row_idx, 1); 121 | event = detail::div_row(q, A, X, row_idx, col_ker_rng); 122 | } 123 | return event; 124 | } 125 | 126 | /** 127 | * @brief Compute X = A \ B = inv(A) * B without explicitly inverting A. 128 | * 129 | * @see tri_solve(queue&, matrix_t&, matrix_t&) 130 | * @tparam DX whether to transpose \p X 131 | * @tparam DA whether to transpose \p A 132 | * @tparam T 133 | * @param q 134 | * @param[out] X 135 | * @param[in] A 136 | * @param[in] B 137 | * @return A SYCL event corresponding to the last submitted operation 138 | */ 139 | template 140 | inline event tri_solve(queue& q, matrix_t& X, matrix_t& A, 141 | matrix_t& B) { 142 | sycl_copy(q, B, X); 143 | return tri_solve(q, X, A); 144 | } 145 | 146 | /** 147 | * @brief Compute X = C \ B = inv(C) * B with C = A'*A. 148 | * 149 | * @tparam DX whether to transpose \p X 150 | * @tparam T 151 | * @param q 152 | * @param[out] X 153 | * @param[in] A 154 | * @param[in] B 155 | * @return A SYCL event corresponding to the last submitted operation 156 | */ 157 | template 158 | event chol_solve(queue& q, matrix_t& X, matrix_t& A, matrix_t& B) { 159 | tri_solve(q, X, A, B); 160 | return tri_solve(q, X, A); 161 | } 162 | 163 | } // namespace ml 164 | 165 | #endif // INCLUDE_ML_MATH_TRI_SOLVE_HPP 166 | -------------------------------------------------------------------------------- /include/ml/preprocess/apply_pca.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef INCLUDE_ML_PREPROCESS_APPLY_PCA_HPP 17 | #define INCLUDE_ML_PREPROCESS_APPLY_PCA_HPP 18 | 19 | #include 20 | #include 21 | 22 | #include "pca.hpp" 23 | 24 | namespace ml { 25 | 26 | /** 27 | * @brief Helper to compute and apply the PCA from a training set and applying 28 | * it on a test set. 29 | * 30 | * @see pca_svd 31 | * @tparam T 32 | */ 33 | template 34 | class apply_pca { 35 | public: 36 | apply_pca() 37 | : _enable_pca(true), 38 | _nb_vec_computed(0), 39 | _data_avg(range<1>()), 40 | _eigenvectors(range<2>()) {} 41 | 42 | /** 43 | * @brief Either load the eigenvectors or compute them and apply the PCA to 44 | * the given data. 45 | * 46 | * @param q 47 | * @param[in, out] data this matrix has been centered after this call 48 | * @param pca_args @see struct pca_args 49 | * @return the new data 50 | */ 51 | matrix_t compute_and_apply(queue& q, matrix_t& data, 52 | const pca_args& pca_args) { 53 | _enable_pca = pca_args.keep_percent > 0; 54 | if (!_enable_pca) { 55 | return data; 56 | } 57 | 58 | auto data_dim = access_data_dim(data, 1); 59 | auto data_dim_pow2 = access_ker_dim(data, 1); 60 | 61 | _data_avg = 62 | vector_t(range<1>(data_dim), get_optimal_nd_range(data_dim_pow2)); 63 | 64 | std::string load_filename = get_filename( 65 | data_dim_pow2, pca_args.min_nb_vecs, pca_args.scale_factor); 66 | if (pca_args.auto_load && file_exists(load_filename)) { 67 | // avg and center_data would have been called by pca_svd otherwise 68 | avg(q, data, _data_avg); 69 | center_data(q, data, _data_avg); 70 | 71 | _nb_vec_computed = pca_args.min_nb_vecs; 72 | _eigenvectors = matrix_t( 73 | range<2>(pca_args.min_nb_vecs, data_dim), 74 | get_optimal_nd_range(pca_args.min_nb_vecs, data_dim_pow2)); 75 | load_array(q, _eigenvectors, load_filename); 76 | } else { 77 | std::cout << "Computing PCA..." << std::endl; 78 | _eigenvectors = pca_svd(q, data, _data_avg, pca_args); 79 | _nb_vec_computed = access_data_dim(_eigenvectors, 0); 80 | if (pca_args.save) { 81 | save_array(q, _eigenvectors, 82 | get_filename(data_dim_pow2, _nb_vec_computed, 83 | pca_args.scale_factor)); 84 | } 85 | } 86 | 87 | matrix_t new_data = 88 | matrix_t(range<2>(access_data_dim(data, 0), _nb_vec_computed)); 89 | mat_mul(q, data, _eigenvectors, new_data); 90 | return new_data; 91 | } 92 | 93 | /** 94 | * @brief Apply the PCA to a dataset from previously computed eigenvectors and 95 | * data_avg. 96 | * 97 | * @param q 98 | * @param[in, out] data this matrix has been centered after this call 99 | * @return the new data 100 | */ 101 | matrix_t apply(queue& q, matrix_t& data) { 102 | if (!_enable_pca) { 103 | return data; 104 | } 105 | 106 | assert(_nb_vec_computed != 0); 107 | matrix_t new_data(range<2>(access_data_dim(data, 0), _nb_vec_computed)); 108 | center_data(q, data, _data_avg); 109 | mat_mul(q, data, _eigenvectors, new_data); 110 | return new_data; 111 | } 112 | 113 | private: 114 | bool _enable_pca; 115 | SYCLIndexT _nb_vec_computed; 116 | vector_t _data_avg; 117 | matrix_t _eigenvectors; 118 | 119 | /** 120 | * @brief Get the filename used for saving and loading eigenvectors. 121 | * 122 | * @param data_dim_pow2 123 | * @param nb_vec 124 | * @return the filename 125 | */ 126 | inline std::string get_filename(SYCLIndexT data_dim_pow2, SYCLIndexT nb_vec, 127 | T svd_factor) { 128 | std::stringstream ss; 129 | ss << "pca_" << nb_vec << "_" << data_dim_pow2 << "_" << svd_factor << "_" 130 | << typeid(T).name(); 131 | return ss.str(); 132 | } 133 | 134 | /** 135 | * @param filename 136 | * @return true if filename exists (and is not locked) 137 | */ 138 | inline bool file_exists(const std::string& filename) { 139 | std::ifstream ifs(filename); 140 | return ifs.good(); 141 | } 142 | }; 143 | 144 | } // namespace ml 145 | 146 | #endif // INCLUDE_ML_PREPROCESS_APPLY_PCA_HPP 147 | -------------------------------------------------------------------------------- /include/ml/preprocess/pca.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef INCLUDE_ML_PREPROCESS_PCA_HPP 17 | #define INCLUDE_ML_PREPROCESS_PCA_HPP 18 | 19 | #include "ml/math/cov.hpp" 20 | #include "ml/math/mat_ops.hpp" 21 | #include "ml/math/svd.hpp" 22 | 23 | namespace ml { 24 | 25 | namespace detail { 26 | 27 | class ml_pca_svd_copy_v; 28 | 29 | template 30 | event copy_eigenvectors(queue& q, vector_t& indices, 31 | matrix_t& in_v, matrix_t& out_v) { 32 | return q.submit([&indices, &in_v, &out_v](handler& cgh) { 33 | auto in_acc = in_v.template get_access_2d(cgh); 34 | auto indices_acc = indices.template get_access_1d(cgh); 35 | auto out_acc = 36 | out_v.template get_access_2d(cgh); 37 | cgh.parallel_for>( 38 | out_v.get_nd_range(), [=](nd_item<2> item) { 39 | auto row = item.get_global_id(0); 40 | auto col = item.get_global_id(1); 41 | out_acc(row, col) = in_acc(indices_acc(row), col); 42 | }); 43 | }); 44 | } 45 | 46 | } // namespace detail 47 | 48 | /** 49 | * @brief Arguments given to PCA 50 | * 51 | * auto_load: whether to load the basis vectors from the disk if the expected 52 | * file is present, defaults to true save: whether to save the basis vectors to 53 | * the disk if the PCA was not loaded, defaults to true min_nb_vecs: minimum 54 | * number of vectors to use, defaults to 0 which disable this constraint 55 | * keep_percent: minimum "amount of information" to keep in range [0; 1]. 0 56 | * disables the PCA and 1 keeps as many vectors as possible. Defaults to 1 57 | * scale_factor: factor applied when computing the PCA, a higher value yields 58 | * more precision but is slower. Defaults to 1 59 | * 60 | */ 61 | template 62 | struct pca_args { 63 | pca_args() 64 | : auto_load(true), 65 | save(true), 66 | min_nb_vecs(0), 67 | keep_percent(1.f), 68 | scale_factor(T(1)) {} 69 | 70 | bool auto_load; 71 | bool save; 72 | SYCLIndexT min_nb_vecs; 73 | float keep_percent; 74 | T scale_factor; 75 | }; 76 | 77 | /** 78 | * @brief Center the data and compute the principal components. 79 | * 80 | * Assumes the number of rows is the number of observations and the size of an 81 | * observation is a power of 2. Uses the svd to compute the eigenpairs. V = 82 | * pca(X) gives the eigenvectors so that Y = cX * V' where cX is the data 83 | * centered and Y is the new data with a smaller size of observation. 84 | * 85 | * @see apply_pca_svd 86 | * @tparam T 87 | * @param q 88 | * @param[in] data 89 | * @param[out] data_avg 90 | * @param pca_args @see struct pca_args 91 | * @return the eigenvectors V 92 | */ 93 | template 94 | matrix_t pca_svd(queue& q, matrix_t& data, vector_t& data_avg, 95 | const pca_args& pca_args) { 96 | avg(q, data, data_avg); 97 | center_data(q, data, data_avg); 98 | auto data_dim = access_data_dim(data, 1); 99 | auto data_dim_pow2 = access_ker_dim(data, 1); 100 | 101 | // For precision, scale data to change the eigenvalues but not the 102 | // eigenvectors 103 | auto scaled_data = matrix_t(data.data_range, data.kernel_range); 104 | if (pca_args.scale_factor != T(1)) { 105 | vec_unary_op(q, data, scaled_data, 106 | functors::partial_binary_op>( 107 | pca_args.scale_factor)); 108 | } 109 | 110 | matrix_t cov_matrix(range<2>(data_dim, data_dim), 111 | get_optimal_nd_range(data_dim_pow2, data_dim_pow2)); 112 | cov(q, scaled_data, cov_matrix); 113 | SYCLIndexT estimated_nb_vecs = data_dim; 114 | auto svd_out = svd(q, cov_matrix, estimated_nb_vecs); 115 | 116 | if (pca_args.keep_percent >= 1) { 117 | return svd_out.V; 118 | } 119 | 120 | // Sort indices of l in descending order 121 | std::vector host_indices(estimated_nb_vecs); 122 | std::iota(begin(host_indices), end(host_indices), 0); 123 | auto& host_l = svd_out.L; 124 | std::sort( 125 | begin(host_indices), end(host_indices), 126 | [&](SYCLIndexT i1, SYCLIndexT i2) { return host_l[i1] > host_l[i2]; }); 127 | 128 | // Compute nb_vecs needed to reach keep_percent 129 | SYCLIndexT nb_vecs = 0; 130 | float act_percent = 0; 131 | for (; nb_vecs < estimated_nb_vecs && act_percent < pca_args.keep_percent; 132 | ++nb_vecs) { 133 | act_percent += host_l[host_indices[nb_vecs]] / svd_out.eig_vals_sum; 134 | } 135 | nb_vecs = std::max(nb_vecs, pca_args.min_nb_vecs); 136 | std::cout << "Keeping " << nb_vecs << " vectors" << std::endl; 137 | assert(nb_vecs > 0); 138 | 139 | // Copy the eigenvectors with the highest eigenvalue 140 | vector_t sycl_indices(host_indices.data(), range<1>(nb_vecs)); 141 | matrix_t V(range<2>(nb_vecs, data_dim), 142 | get_optimal_nd_range(nb_vecs, data_dim_pow2)); 143 | detail::copy_eigenvectors(q, sycl_indices, svd_out.V, V); 144 | 145 | return V; 146 | } 147 | 148 | } // namespace ml 149 | 150 | #endif // INCLUDE_ML_PREPROCESS_PCA_HPP 151 | -------------------------------------------------------------------------------- /include/ml/utils/access.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | /** 17 | * @file 18 | * @brief Define data_dim and some common functions related to it. 19 | */ 20 | 21 | #ifndef INCLUDE_ML_UTILS_ACCESS_HPP 22 | #define INCLUDE_ML_UTILS_ACCESS_HPP 23 | 24 | #include "ml/utils/sycl_types.hpp" 25 | 26 | namespace ml { 27 | 28 | /** 29 | * @brief Represent either a choice of dimension or of transposing. 30 | * 31 | * A choice of dimension means whether to use a row or a column.\n 32 | * A choice of transposing means whether to access the matrix as if it were 33 | * transposed or not.\n 34 | * 35 | */ 36 | enum data_dim { 37 | /// 0 38 | ROW = 0, 39 | /// Alias for ROW 40 | LIN = ROW, 41 | /// 1 42 | COL = 1, 43 | /// Alias for COL 44 | TR = COL 45 | }; 46 | 47 | namespace detail { 48 | 49 | template 50 | struct lin_or_tr { 51 | static inline T apply(T lin, T) { return lin; } 52 | }; 53 | 54 | template 55 | struct lin_or_tr { 56 | static inline T apply(T, T tr) { return tr; } 57 | }; 58 | 59 | } // namespace detail 60 | 61 | /** 62 | * @brief Return the first value if LIN, the second otherwise. 63 | * 64 | * @tparam D 65 | * @tparam T 66 | * @param lin 67 | * @param tr 68 | * @return \p lin if D=LIN, \p tr otherwise 69 | */ 70 | template 71 | inline constexpr T lin_or_tr(T lin, T tr) { 72 | return detail::lin_or_tr::apply(lin, tr); 73 | } 74 | 75 | /** 76 | * @brief Return the opposite value of D. 77 | * 78 | * @tparam D 79 | * @return TR if D=LIN, LIN otherwise 80 | */ 81 | template 82 | inline constexpr data_dim opp() { 83 | return static_cast((D + 1) % 2); 84 | } 85 | 86 | /** 87 | * @brief Access an index of a \p range<2> that may be swapped according to \p 88 | * D. 89 | * 90 | * @tparam D 91 | * @param r 92 | * @param i 93 | * @return the ith element if D=LIN, the other element otherwise 94 | */ 95 | template 96 | inline SYCLIndexT access_rng(const range<2>& r, SYCLIndexT i) { 97 | assert(i == 0 || i == 1); 98 | return r[lin_or_tr(i, (i + 1) % 2)]; 99 | } 100 | 101 | /** 102 | * @brief Construct an object \p B with the 2 given parameters that may be 103 | * swapped according to \p D. 104 | * 105 | * @tparam D 106 | * @tparam B class to build, must have a constructor with 2 @ref SYCLIndexT 107 | * @param x1 108 | * @param x2 109 | * @return the built object 110 | */ 111 | template 112 | inline constexpr B build_lin_or_tr(SYCLIndexT x1, SYCLIndexT x2) { 113 | return B(lin_or_tr(x1, x2), lin_or_tr(x2, x1)); 114 | } 115 | 116 | /** 117 | * @brief Construct another object \p B with the 2 parameters extracted from \p 118 | * b that may be swapped according to \p D. 119 | * 120 | * @see build_lin_or_tr(SYCLIndexT, SYCLIndexT) 121 | * @tparam D 122 | * @tparam B class to build, must have a constructor with 2 arguments and a 123 | * squared bracket accessor 124 | * @param b 125 | * @return the built object 126 | */ 127 | template 128 | inline constexpr B build_lin_or_tr(const B& b) { 129 | return build_lin_or_tr(b[0], b[1]); 130 | } 131 | 132 | template 133 | inline constexpr std::array get_contract_dim() { 134 | return {eig_dim_pair_t(D1, D2)}; 135 | } 136 | 137 | } // namespace ml 138 | 139 | #endif // INCLUDE_ML_UTILS_ACCESS_HPP 140 | -------------------------------------------------------------------------------- /include/ml/utils/buffer_acc.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef INCLUDE_ML_UTILS_BUFFER_ACC_HPP 17 | #define INCLUDE_ML_UTILS_BUFFER_ACC_HPP 18 | 19 | #include "ml/utils/access.hpp" 20 | 21 | #ifndef ML_DEBUG_BOUND_CHECK 22 | /** 23 | * @brief Set to 1 for buffer initialization with nan and boundaries access 24 | * check. 25 | * 26 | * For debug only. 27 | * @warning Very slow. 28 | */ 29 | #define ML_DEBUG_BOUND_CHECK 0 30 | #endif // ML_DEBUG_BOUND_CHECK 31 | 32 | namespace ml { 33 | 34 | template 35 | class buffer_t; 36 | 37 | namespace detail { 38 | 39 | template 40 | struct get_index_2d; 41 | 42 | template <> 43 | struct get_index_2d { 44 | static inline SYCLIndexT compute(SYCLIndexT r, SYCLIndexT c, 45 | SYCLIndexT nb_cols) { 46 | return r * nb_cols + c; 47 | } 48 | }; 49 | 50 | template <> 51 | struct get_index_2d { 52 | static inline SYCLIndexT compute(SYCLIndexT r, SYCLIndexT c, 53 | SYCLIndexT nb_cols) { 54 | return c * nb_cols + r; 55 | } 56 | }; 57 | 58 | template 59 | struct is_reference_access { 60 | using value = T&; 61 | }; 62 | 63 | template 64 | struct is_reference_access { 65 | using value = T; 66 | }; 67 | 68 | template 69 | class buffer_1d_acc_t { 70 | public: 71 | buffer_1d_acc_t(handler& cgh, buffer_t* b) 72 | : 73 | #if ML_DEBUG_BOUND_CHECK 74 | _range(b->get_kernel_size()), 75 | #endif 76 | _offset(b->sub_buffer_offset), 77 | _acc(b->template get_access(cgh, b->sub_buffer_range, 78 | b->sub_buffer_offset)) { 79 | } 80 | 81 | inline typename is_reference_access::value operator()( 82 | SYCLIndexT x) const { 83 | x += _offset.get(0); 84 | #if ML_DEBUG_BOUND_CHECK 85 | if (x >= _range[0]) { 86 | printf("Warning accessing at (%lu) from buffer of size (%lu)\n", x, 87 | _range[0]); 88 | } 89 | #endif 90 | return _acc[x]; 91 | } 92 | 93 | inline accessor get() { return _acc; } 94 | 95 | private: 96 | #if ML_DEBUG_BOUND_CHECK 97 | range<1> _range; 98 | #endif 99 | id<1> _offset; 100 | accessor _acc; 101 | }; 102 | 103 | template 104 | class buffer_2d_acc_t { 105 | public: 106 | buffer_2d_acc_t(handler& cgh, buffer_t* b) 107 | : _range(b->get_kernel_range()), 108 | _offset(b->sub_buffer_offset), 109 | _acc(b->template get_access(cgh, b->sub_buffer_range, 110 | b->sub_buffer_offset)) {} 111 | 112 | inline typename is_reference_access::value operator()( 113 | SYCLIndexT r, SYCLIndexT c) const { 114 | auto idx = 115 | _offset.get(0) + detail::get_index_2d::compute(r, c, _range[1]); 116 | #if ML_DEBUG_BOUND_CHECK 117 | if (idx >= _range.size()) { 118 | printf( 119 | "Warning accessing at (%lu, %lu)+%lu from buffer of size (%lu, " 120 | "%lu)\n", 121 | r, c, _offset.get(0), access_rng(_range, 0), 122 | access_rng(_range, 1)); 123 | } 124 | #endif 125 | return _acc[idx]; 126 | } 127 | 128 | inline accessor get() { return _acc; } 129 | 130 | private: 131 | range<2> _range; 132 | id<1> _offset; 133 | accessor _acc; 134 | }; 135 | 136 | template 137 | class buffer_3d_acc_t { 138 | public: 139 | buffer_3d_acc_t(handler& cgh, buffer_t* b) 140 | : _range(b->get_kernel_range()), 141 | _acc(b->template get_access(cgh, b->sub_buffer_range, 142 | b->sub_buffer_offset)) {} 143 | 144 | inline typename is_reference_access::value operator()( 145 | SYCLIndexT x, SYCLIndexT y, SYCLIndexT z) const { 146 | auto idx = _offset.get(0) + x + _range[1] * (y + _range[2] * z); 147 | #if ML_DEBUG_BOUND_CHECK 148 | if (idx >= _range.size()) { 149 | printf( 150 | "Warning accessing at (%lu, %lu, %lu)+%lu from buffer of size (%lu, " 151 | "%lu, " 152 | "%lu)\n", 153 | x, y, z, _offset.get(0), _range[0], _range[1], _range[2]); 154 | } 155 | #endif 156 | return _acc[idx]; 157 | } 158 | 159 | inline accessor get() { return _acc; } 160 | 161 | private: 162 | range<3> _range; 163 | id<1> _offset; 164 | accessor _acc; 165 | }; 166 | 167 | } // namespace detail 168 | 169 | } // namespace ml 170 | 171 | #endif // INCLUDE_ML_UTILS_BUFFER_ACC_HPP 172 | -------------------------------------------------------------------------------- /include/ml/utils/common.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | /** 17 | * @file 18 | * @brief Regroup common headers to all files that submit SYCL kernels. 19 | */ 20 | 21 | #ifndef INCLUDE_ML_UTILS_COMMON_HPP 22 | #define INCLUDE_ML_UTILS_COMMON_HPP 23 | 24 | #include 25 | 26 | #include "ml/eigen/sycl_to_eigen.hpp" 27 | #include "ml/utils/save_utils.hpp" 28 | 29 | // Debug 30 | #include "ml/utils/debug/assert.hpp" 31 | #include "ml/utils/debug/print_utils.hpp" 32 | #include "ml/utils/debug/write_bmp.hpp" 33 | 34 | #endif // INCLUDE_ML_UTILS_COMMON_HPP 35 | -------------------------------------------------------------------------------- /include/ml/utils/debug/assert.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | /** 17 | * @file 18 | * @brief Common assert functions, only active in debug mode. 19 | */ 20 | 21 | #ifndef INCLUDE_ML_UTILS_DEBUG_ASSERT_HPP 22 | #define INCLUDE_ML_UTILS_DEBUG_ASSERT_HPP 23 | 24 | #include 25 | #include 26 | #include 27 | 28 | #include "ml/utils/access.hpp" 29 | 30 | namespace ml { 31 | 32 | #define STATIC_ASSERT_A_IMPLIES_B(a, b) static_assert(((a) && (b)) || !(a), "") 33 | #define STATIC_ASSERT_DATA_DIM_FOR_DIM_2(dim, d) \ 34 | STATIC_ASSERT_A_IMPLIES_B(dim != 2, d == LIN) 35 | 36 | #ifndef NDEBUG 37 | template 38 | void assert_eq(T actual, T expected) { 39 | if (actual != expected) { 40 | std::cerr << "Error: got " << actual << " expected " << expected 41 | << std::endl; 42 | assert(false); 43 | } 44 | } 45 | 46 | template 47 | void assert_vec_eq(const T& actual, const T& expected, size_t size) { 48 | for (size_t i = 0; i < size; ++i) { 49 | assert_eq(actual[i], expected[i]); 50 | } 51 | } 52 | 53 | template 54 | void assert_rng_eq(const range& actual, const range& expected) { 55 | assert_vec_eq(actual, expected, DIM); 56 | } 57 | 58 | template 59 | void assert_less_or_eq(T x, T high) { 60 | if (x > high) { 61 | std::stringstream ss; 62 | ss << "Error: " << x << " larger than " << high; 63 | std::cerr << ss.str() << std::endl; 64 | assert(false); 65 | } 66 | } 67 | 68 | template 69 | inline void assert_rng_size_less_or_eq(const range& r, 70 | SYCLIndexT high_size) { 71 | assert_less_or_eq(r.size(), high_size); 72 | } 73 | 74 | template 75 | void assert_rng_less_or_eq(const range& r, const range& high_r) { 76 | for (int i = 0; i < DIM; ++i) { 77 | assert_less_or_eq(r[i], high_r[i]); 78 | } 79 | } 80 | 81 | template <> 82 | inline void assert_rng_less_or_eq(const range<2>& r, 83 | const range<2>& high_r) { 84 | assert_less_or_eq(r[1], high_r[0]); 85 | assert_less_or_eq(r[0], high_r[1]); 86 | } 87 | 88 | template 89 | inline void assert_rng_less_or_eq(const range<1>& r, SYCLIndexT high0) { 90 | assert_rng_less_or_eq(r, range<1>(high0)); 91 | } 92 | 93 | template 94 | inline void assert_rng_less_or_eq(const range<2>& r, SYCLIndexT high0, 95 | SYCLIndexT high1) { 96 | assert_rng_less_or_eq(range<2>(access_rng(r, 0), access_rng(r, 1)), 97 | range<2>(high0, high1)); 98 | } 99 | 100 | template 101 | inline void assert_rng_less_or_eq(const range<3>& r, SYCLIndexT high0, 102 | SYCLIndexT high1, SYCLIndexT high2) { 103 | assert_rng_less_or_eq(r, range<3>(high0, high1, high2)); 104 | } 105 | 106 | template 107 | void assert_real(T x) { 108 | if (!std::isfinite(x)) { 109 | std::stringstream ss; 110 | ss << "Error: value is "; 111 | if (std::isnan(x)) { 112 | ss << "nan"; 113 | } else if (std::isinf(x)) { 114 | ss << "inf"; 115 | } else { 116 | ss << x; 117 | } 118 | std::cerr << ss.str() << std::endl; 119 | assert(false); 120 | } 121 | } 122 | 123 | inline void assert_rng_square(const range<2>& r) { 124 | assert_eq(r[0], r[1]); 125 | } 126 | 127 | #else // NDEBUG 128 | template 129 | inline void assert_eq(T, T) {} 130 | template 131 | inline void assert_vec_eq(const T&, const T&, size_t) {} 132 | template 133 | inline void assert_rng_eq(const range&, const range&) {} 134 | template 135 | inline void assert_less_or_eq(T, T) {} 136 | template 137 | inline void assert_rng_size_less_or_eq(range, SYCLIndexT) {} 138 | template 139 | inline void assert_rng_less_or_eq(const range&, const range&) {} 140 | template 141 | inline void assert_rng_less_or_eq(const range<1>&, SYCLIndexT) {} 142 | template 143 | inline void assert_rng_less_or_eq(const range<2>&, SYCLIndexT, SYCLIndexT) {} 144 | template 145 | inline void assert_rng_less_or_eq(const range<3>&, SYCLIndexT, SYCLIndexT, 146 | SYCLIndexT) {} 147 | template 148 | inline void assert_real(T) {} 149 | inline void assert_rng_square(const range<2>&) {} 150 | #endif // end NDEBUG 151 | 152 | } // namespace ml 153 | 154 | #endif // INCLUDE_ML_UTILS_DEBUG_ASSERT_HPP 155 | -------------------------------------------------------------------------------- /include/ml/utils/debug/print_utils.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | /** 17 | * @file 18 | * @brief Allow to print generic array, std pair as well as sycl id, range and 19 | * nd_range 20 | */ 21 | 22 | #ifndef INCLUDE_ML_UTILS_DEBUG_PRINT_UTILS_HPP 23 | #define INCLUDE_ML_UTILS_DEBUG_PRINT_UTILS_HPP 24 | 25 | #include 26 | #include 27 | 28 | #include "ml/utils/sycl_types.hpp" 29 | 30 | namespace ml { 31 | 32 | /** 33 | * @brief Print std::pair 34 | * 35 | * @tparam T1 36 | * @tparam T2 37 | * @param os 38 | * @param p 39 | * @return os 40 | */ 41 | template 42 | std::ostream& operator<<(std::ostream& os, const std::pair& p) { 43 | os << "(" << p.first << "," << p.second << ")"; 44 | return os; 45 | } 46 | 47 | /** 48 | * @brief Print cl::sycl::id 49 | * 50 | * @tparam DIM 51 | * @param os 52 | * @param id_ 53 | * @return os 54 | */ 55 | template 56 | std::ostream& operator<<(std::ostream& os, const cl::sycl::id& id_) { 57 | os << "[" << id_[0]; 58 | for (int i = 1; i < DIM; ++i) { 59 | os << ", " << id_[i]; 60 | } 61 | os << "]"; 62 | return os; 63 | } 64 | 65 | /** 66 | * @brief Print cl::sycl::range 67 | * 68 | * @tparam DIM 69 | * @param os 70 | * @param r 71 | * @return os 72 | */ 73 | template 74 | std::ostream& operator<<(std::ostream& os, const cl::sycl::range& r) { 75 | os << "[" << r[0]; 76 | for (int i = 1; i < DIM; ++i) { 77 | os << ", " << r[i]; 78 | } 79 | os << "]"; 80 | return os; 81 | } 82 | 83 | /** 84 | * @brief Print cl::sycl::nd_range 85 | * 86 | * @tparam DIM 87 | * @param os 88 | * @param r 89 | * @return os 90 | */ 91 | template 92 | std::ostream& operator<<(std::ostream& os, const cl::sycl::nd_range& r) { 93 | return os << r.get_global_range() << "@" << r.get_local_range() << "@" 94 | << r.get_offset(); 95 | } 96 | 97 | /** 98 | * @brief Print any data array as a matrix 99 | * 100 | * @tparam T data type with a [] accessor 101 | * @param os 102 | * @param data 103 | * @param nrows 104 | * @param ncols 105 | * @param off 106 | * @return os 107 | */ 108 | template 109 | std::ostream& print(std::ostream& os, const T& data, size_t nrows, size_t ncols, 110 | size_t off = 0) { 111 | for (size_t r = 0; r < nrows; ++r) { 112 | for (size_t c = 0; c < ncols; ++c) { 113 | os << data[r * ncols + c + off] << ' '; 114 | } 115 | os << std::endl; 116 | } 117 | return os; 118 | } 119 | 120 | /** 121 | * @brief Print any data array as a matrix 122 | * 123 | * @tparam T data type with a [] accessor 124 | * @param os 125 | * @param data 126 | * @param nrows 127 | * @param ncols 128 | * @param off 129 | * @return os 130 | */ 131 | template 132 | std::ostream& print(const T& data, size_t nrows, size_t ncols, size_t off = 0) { 133 | return print(std::cout, data, nrows, ncols, off); 134 | } 135 | 136 | } // namespace ml 137 | 138 | #endif // INCLUDE_ML_UTILS_DEBUG_PRINT_UTILS_HPP 139 | -------------------------------------------------------------------------------- /include/ml/utils/device_constants.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef INCLUDE_ML_UTILS_DEVICE_CONSTANTS_HPP 17 | #define INCLUDE_ML_UTILS_DEVICE_CONSTANTS_HPP 18 | 19 | #include 20 | #include 21 | 22 | #include "ml/utils/sycl_types.hpp" 23 | 24 | namespace ml { 25 | 26 | /** 27 | * @brief Singleton that holds device specific constant. 28 | * 29 | * The user must initialize the instance before using it.\n 30 | * This will create a \p sycl::queue which can be retrieved with the \p 31 | * Eigen::SyclDevice. Note that the library assumes that only one device is used 32 | * for now. 33 | * @tparam Void Do not use, only here to avoid the use of a source file 34 | */ 35 | template 36 | class device_constants { 37 | public: 38 | static device_constants* instance; 39 | 40 | device_constants() 41 | : _eigen_queue(default_selector()), _eigen_device(&_eigen_queue) { 42 | const cl::sycl::device& sycl_device = 43 | _eigen_queue.sycl_queue().get_device(); 44 | const cl::sycl::platform& platform = sycl_device.get_platform(); 45 | using namespace cl::sycl::info; 46 | std::cout << "Selected device: " 47 | << sycl_device.get_info() << ", "; 48 | std::cout << "type: " 49 | << device_type_to_str( 50 | sycl_device.get_info()) 51 | << ", "; 52 | std::cout << "platform: " << platform.get_info(); 53 | std::cout << " [" << platform.get_info() << "]\n"; 54 | std::cout << std::endl; 55 | 56 | MAX_WORK_GROUP_SIZE = 57 | sycl_device.get_info(); 58 | MEM_BASE_ADDR_ALIGN = 59 | sycl_device.get_info(); 60 | MAX_WORK_ITEM_SIZES = 61 | sycl_device.get_info(); 62 | } 63 | 64 | inline size_t get_max_work_group_size() { return MAX_WORK_GROUP_SIZE; } 65 | inline size_t get_mem_base_addr_align() { return MEM_BASE_ADDR_ALIGN; } 66 | inline id<3> get_max_work_item_sizes() { return MAX_WORK_ITEM_SIZES; } 67 | 68 | /** 69 | * @tparam T 70 | * @return Return the value by which the size of a sub-buffer of type T must 71 | * be divisible. 72 | */ 73 | template 74 | inline size_t get_sub_buffer_range_divisor() { 75 | return get_mem_base_addr_align() / (sizeof(T) * CHAR_BIT); 76 | } 77 | 78 | /** 79 | * @brief Round size up to be used by a sub-buffer. 80 | * 81 | * @see get_sub_buffer_range_divisor 82 | * @tparam T 83 | * @param size 84 | * @return a size usable by a sub-buffer 85 | */ 86 | template 87 | inline size_t pad_sub_buffer_size(size_t size) { 88 | auto divisor = get_sub_buffer_range_divisor(); 89 | return static_cast((size / divisor + (size % divisor > 0)) * 90 | divisor); 91 | } 92 | 93 | inline Eigen::SyclDevice& get_eigen_device() { return _eigen_device; } 94 | 95 | private: 96 | size_t MAX_WORK_GROUP_SIZE; 97 | size_t MEM_BASE_ADDR_ALIGN; 98 | id<3> MAX_WORK_ITEM_SIZES; 99 | 100 | Eigen::QueueInterface _eigen_queue; 101 | Eigen::SyclDevice _eigen_device; 102 | 103 | inline std::string device_type_to_str(cl::sycl::info::device_type type) { 104 | using namespace cl::sycl::info; 105 | switch (type) { 106 | case info::device_type::cpu: 107 | return "CPU"; 108 | case info::device_type::gpu: 109 | return "GPU"; 110 | case info::device_type::accelerator: 111 | return "accelerator"; 112 | case info::device_type::custom: 113 | return "custom"; 114 | case info::device_type::automatic: 115 | return "automatic"; 116 | case info::device_type::host: 117 | return "host"; 118 | default: 119 | return "NONE"; 120 | } 121 | } 122 | }; 123 | 124 | template <> 125 | device_constants<>* device_constants<>::instance = nullptr; 126 | 127 | /// @brief Return the device_constants instance. 128 | inline device_constants<>* get_device_constants() { 129 | return device_constants<>::instance; 130 | } 131 | 132 | /// @brief Return the \p Eigen::SyclDevice. 133 | inline Eigen::SyclDevice& get_eigen_device() { 134 | return get_device_constants()->get_eigen_device(); 135 | } 136 | 137 | } // namespace ml 138 | 139 | #endif // INCLUDE_ML_UTILS_DEVICE_CONSTANTS_HPP 140 | -------------------------------------------------------------------------------- /include/ml/utils/optimal_range.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef INCLUDE_ML_UTILS_OPTIMAL_RANGE_HPP 17 | #define INCLUDE_ML_UTILS_OPTIMAL_RANGE_HPP 18 | 19 | #include 20 | 21 | #include "ml/utils/access.hpp" 22 | #include "ml/utils/device_constants.hpp" 23 | 24 | namespace ml { 25 | 26 | /** 27 | * @tparam T 28 | * @param x 29 | * @return true if x is a power of 2 30 | */ 31 | template 32 | inline bool is_pow2(T x) { 33 | return (x & (x - 1)) == 0; 34 | } 35 | 36 | /** 37 | * @tparam T 38 | * @param x 39 | * @return the closest power of 2 higher or equal to x 40 | */ 41 | template 42 | inline T to_pow2(T x) { 43 | return std::pow(2, std::ceil(std::log2(x))); 44 | } 45 | 46 | /** 47 | * @brief Compute the best suitable local_range associated to global_range. 48 | * 49 | * The function is trivial if the global range is smaller or equal to the max 50 | * work group size.\n If not the function only tries to find divisor that are 51 | * power of 2. Finding all possible divisors would be too costly otherwise. 52 | * 53 | * @tparam DIM 54 | * @param global_range 55 | * @return local_range 56 | */ 57 | template 58 | range get_optimal_local_range(const range& global_range) { 59 | auto max_work_group_size = get_device_constants()->get_max_work_group_size(); 60 | range local_range; 61 | if (global_range.size() <= max_work_group_size) { 62 | local_range = global_range; 63 | } else { 64 | auto max_work_group_item_sizes = 65 | get_device_constants()->get_max_work_item_sizes(); 66 | for (int i = 0; i < DIM; ++i) { 67 | local_range[i] = max_work_group_item_sizes[i]; 68 | while (global_range[i] % local_range[i]) { 69 | local_range[i] >>= 1; 70 | } 71 | } 72 | 73 | // Make sure the local size does not exceed the maximum 74 | for (int i = 0; i < DIM && local_range.size() > max_work_group_size; ++i) { 75 | // Try to divide the ith local size to reach a size of max_work_group_size 76 | auto divide_by = local_range.size() / max_work_group_size; 77 | local_range[i] /= std::min(local_range[i], divide_by); 78 | } 79 | } 80 | 81 | return local_range; 82 | } 83 | 84 | /** 85 | * @see get_optimal_local_range 86 | * @tparam DIM 87 | * @param global_range 88 | * @param offset 89 | * @return the nd_range built from \p global_range with a local range as big as 90 | * possible 91 | */ 92 | template 93 | inline nd_range get_optimal_nd_range(const range& global_range, 94 | const id& offset = id()) { 95 | return nd_range(global_range, get_optimal_local_range(global_range), 96 | offset); 97 | } 98 | 99 | /** 100 | * @see get_optimal_nd_range(const range&, const id&) 101 | * @tparam Args 102 | * @param args 103 | * @return the nd_range built from \p args with a local range as big as possible 104 | */ 105 | template 106 | inline nd_range get_optimal_nd_range(Args... args) { 107 | return get_optimal_nd_range(range(args...)); 108 | } 109 | 110 | } // namespace ml 111 | 112 | #endif // INCLUDE_ML_UTILS_OPTIMAL_RANGE_HPP 113 | -------------------------------------------------------------------------------- /include/ml/utils/save_utils.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | /** 17 | * @file 18 | * @brief Allow the loading and saving of generic arrays and SYCL buffers to and 19 | * from disk 20 | */ 21 | 22 | #ifndef INCLUDE_ML_UTILS_SAVE_UTILS_HPP 23 | #define INCLUDE_ML_UTILS_SAVE_UTILS_HPP 24 | 25 | #include 26 | #include 27 | #include 28 | 29 | #include "ml/utils/copy.hpp" 30 | 31 | namespace ml { 32 | 33 | template 34 | void save_array(const T* data, size_t length, const std::string& file_path) { 35 | std::cout << "Saving to " << file_path << "..." << std::endl; 36 | std::ofstream os(file_path.c_str(), std::ios::binary | std::ios::out); 37 | if (!os.is_open()) { 38 | std::cerr << "Could not open " << file_path << std::endl; 39 | return; 40 | } 41 | os.write(reinterpret_cast(data), length * sizeof(T)); 42 | os.close(); 43 | } 44 | 45 | template 46 | void load_array(T* data, size_t length, const std::string& file_path) { 47 | std::cout << "Loading from " << file_path << "..." << std::endl; 48 | std::ifstream is(file_path.c_str(), std::ios::binary | std::ios::in); 49 | if (!is.is_open()) { 50 | std::cerr << "Could not open " << file_path << std::endl; 51 | return; 52 | } 53 | is.read(reinterpret_cast(data), length * sizeof(T)); 54 | is.close(); 55 | } 56 | 57 | template 58 | void save_array(queue& q, buffer_t& buf, const std::string& file_path) { 59 | std::vector host_buf(buf.get_kernel_size()); 60 | auto event = sycl_copy_device_to_host(q, buf, host_buf.data()); 61 | event.wait_and_throw(); 62 | save_array(host_buf.data(), host_buf.size(), file_path); 63 | } 64 | 65 | template 66 | void load_array(queue& q, buffer_t& buf, const std::string& file_path) { 67 | std::vector host_buf(buf.get_kernel_size()); 68 | load_array(host_buf.data(), host_buf.size(), file_path); 69 | auto event = sycl_copy_host_to_device(q, host_buf.data(), buf); 70 | event.wait_and_throw(); 71 | } 72 | 73 | } // namespace ml 74 | 75 | #endif // INCLUDE_ML_UTILS_SAVE_UTILS_HPP 76 | -------------------------------------------------------------------------------- /include/ml/utils/sycl_types.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | /** 17 | * @file 18 | * @brief Common SYCL aliases 19 | */ 20 | 21 | #ifndef INCLUDE_ML_UTILS_SYCL_TYPES_HPP 22 | #define INCLUDE_ML_UTILS_SYCL_TYPES_HPP 23 | 24 | #include 25 | 26 | #include "ml/eigen/eigen.hpp" 27 | 28 | namespace ml { 29 | 30 | using namespace cl::sycl; 31 | 32 | using SYCLIndexT = size_t; 33 | 34 | template 35 | using sycl_vec_t = buffer; 36 | 37 | template 38 | class NameGen {}; 39 | 40 | } // namespace ml 41 | 42 | #endif // INCLUDE_ML_UTILS_SYCL_TYPES_HPP 43 | -------------------------------------------------------------------------------- /tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (C) Codeplay Software Limited. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | include_directories("src") 16 | 17 | # Build each test 18 | file(GLOB_RECURSE SOURCES "src/*.cpp") 19 | foreach(SOURCE ${SOURCES}) 20 | add_sycl_ml_executable(${SOURCE}) 21 | if(SYCLML_TEST_DOUBLE) 22 | target_compile_definitions(${TARGET_NAME} PUBLIC SYCLML_TEST_DOUBLE) 23 | endif() 24 | add_test(NAME ${TARGET_NAME} COMMAND ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME} ${MNIST_RELATIVE_PATH}) 25 | set_tests_properties(${TARGET_NAME} PROPERTIES LABELS "${OUTPUT_SYCL_DIR}") 26 | endforeach() 27 | -------------------------------------------------------------------------------- /tests/src/math/test_center.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include 17 | 18 | #include "ml/math/mat_ops.hpp" 19 | #include "utils/utils.hpp" 20 | 21 | template 22 | void test_center() { 23 | constexpr auto NB_OBS = 5LU; 24 | constexpr auto ACT_SIZE_OBS = 3LU; 25 | std::array host_data{1.0, 4.0, 7.0, 2.0, 0.0, 26 | -8.0, 1.0, 2.0, 1.0, 0.0, 27 | 0.0, 1.0, -5.0, -4.0, -3.0}; 28 | 29 | std::array host_avg_data; 30 | std::array host_center_data; 31 | { 32 | cl::sycl::queue& q = create_queue(); 33 | ml::matrix_t sycl_data(host_data.data(), 34 | cl::sycl::range<2>(NB_OBS, ACT_SIZE_OBS)); 35 | ml::vector_t sycl_data_avg{cl::sycl::range<1>(ACT_SIZE_OBS)}; 36 | 37 | ml::avg(q, sycl_data, sycl_data_avg); 38 | ml::center_data()>(q, sycl_data, sycl_data_avg); 39 | 40 | sycl_data.set_final_data(host_center_data.data()); 41 | sycl_data_avg.set_final_data(host_avg_data.data()); 42 | clear_eigen_device(); 43 | } 44 | 45 | /* 46 | std::cout << "host data:\n"; 47 | ml::print(host_data, NB_OBS, ACT_SIZE_OBS); 48 | std::cout << "\navg data:\n"; 49 | ml::print(host_avg_data, 1, ACT_SIZE_OBS); 50 | std::cout << "\ncenter data:\n"; 51 | ml::print(host_center_data, NB_OBS, ACT_SIZE_OBS); 52 | */ 53 | 54 | // avg data 55 | assert_vec_almost_eq(host_avg_data, {-0.2, 0.4, -0.4}); 56 | 57 | // center data 58 | assert_vec_almost_eq(host_center_data, 59 | {1.2, 3.6, 7.4, 2.2, -0.4, -7.6, 1.2, 1.6, 1.4, 0.2, 60 | -0.4, 1.4, -4.8, -4.4, -2.6}); 61 | } 62 | 63 | int main() { 64 | try { 65 | test_center(); 66 | #ifdef SYCLML_TEST_DOUBLE 67 | test_center(); 68 | #endif 69 | } catch (cl::sycl::exception e) { 70 | std::cerr << e.what(); 71 | } 72 | 73 | return 0; 74 | } 75 | -------------------------------------------------------------------------------- /tests/src/math/test_cov.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include 17 | 18 | #include "ml/math/cov.hpp" 19 | #include "ml/math/mat_ops.hpp" 20 | #include "utils/utils.hpp" 21 | 22 | template 23 | void test_cov_square() { 24 | static constexpr ml::data_dim D = ml::LIN; 25 | std::array host_data{1.0, 4.0, 7.0, 2.0, 0.0, -8.0, 1.0, 2.0, 1.0}; 26 | 27 | std::array host_cov; 28 | { 29 | cl::sycl::queue& q = create_queue(); 30 | ml::matrix_t sycl_data(host_data.data(), cl::sycl::range<2>(3, 3)); 31 | sycl_data.set_final_data(nullptr); 32 | ml::vector_t sycl_data_avg{cl::sycl::range<1>(3)}; 33 | 34 | ml::avg(q, sycl_data, sycl_data_avg); 35 | ml::center_data()>(q, sycl_data, sycl_data_avg); 36 | 37 | ml::matrix_t sycl_cov(cl::sycl::range<2>(3, 3)); 38 | ml::cov(q, sycl_data, sycl_cov); 39 | sycl_cov.set_final_data(host_cov.data()); 40 | clear_eigen_device(); 41 | } 42 | 43 | /* 44 | std::cout << "host data:\n"; 45 | ml::print(host_data, 3, 3); 46 | std::cout << "\ncov:\n"; 47 | ml::print(host_cov, 3, 3); 48 | */ 49 | 50 | std::array expected{2.0 / 9.0, -2.0 / 3.0, -8.0 / 3.0, 51 | host_cov[1], 8.0 / 3.0, 10.0, 52 | host_cov[2], host_cov[5], 38.0}; 53 | assert_vec_almost_eq(host_cov, expected); 54 | } 55 | 56 | template 57 | void test_cov_general() { 58 | static constexpr ml::data_dim D = ml::TR; 59 | // 3 observations that have 2 variables each (transposed) 60 | std::array host_data{1.0, 2.0, 3.0, 2.0, 2.0, 11.0}; 61 | 62 | std::array host_cov; 63 | { 64 | cl::sycl::queue& q = create_queue(); 65 | ml::matrix_t sycl_data(host_data.data(), cl::sycl::range<2>(2, 3)); 66 | ml::vector_t sycl_data_avg(cl::sycl::range<1>(2)); 67 | 68 | ml::avg(q, sycl_data, sycl_data_avg); 69 | ml::center_data()>(q, sycl_data, sycl_data_avg); 70 | 71 | ml::matrix_t sycl_cov(cl::sycl::range<2>(2, 2)); 72 | ml::cov(q, sycl_data, sycl_cov); 73 | sycl_cov.set_final_data(host_cov.data()); 74 | clear_eigen_device(); 75 | } 76 | 77 | /* 78 | std::cout << "data:\n"; 79 | ml::print(host_data, 3, 2); 80 | std::cout << "\ncov:\n"; 81 | ml::print(host_cov, 2, 2); 82 | */ 83 | 84 | std::array expected{2.0 / 3.0, 3.0, host_cov[1], 18.0}; 85 | assert_vec_almost_eq(host_cov, expected); 86 | } 87 | 88 | template 89 | void test_all() { 90 | test_cov_square(); 91 | test_cov_general(); 92 | } 93 | 94 | int main(void) { 95 | try { 96 | test_all(); 97 | #ifdef SYCLML_TEST_DOUBLE 98 | test_all(); 99 | #endif 100 | } catch (cl::sycl::exception e) { 101 | std::cerr << e.what(); 102 | } 103 | 104 | return 0; 105 | } 106 | -------------------------------------------------------------------------------- /tests/src/math/test_dot_product.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include 17 | 18 | #include "ml/math/vec_ops.hpp" 19 | #include "utils/utils.hpp" 20 | 21 | template 22 | void test_dot_product_self() { 23 | constexpr size_t SIZE = 4; 24 | std::array in{1, 0.5, -1, 0}; 25 | T res; 26 | 27 | { 28 | cl::sycl::queue& q = create_queue(); 29 | ml::vector_t sycl_vec(in.data(), cl::sycl::range<1>(in.size())); 30 | res = ml::sycl_dot_product(q, sycl_vec); 31 | clear_eigen_device(); 32 | } 33 | 34 | /* 35 | for (unsigned i = 0; i < SIZE; ++i) { 36 | std::cout << in[i] << " "; 37 | } 38 | std::cout << "\nres=" << res << std::endl; 39 | */ 40 | 41 | assert_almost_eq(res, T(2.25)); 42 | } 43 | 44 | template 45 | void test_dot_product_other() { 46 | constexpr size_t SIZE = 4; 47 | std::array in1{1, 2, 3, 4}; 48 | std::array in2{2, 2, 1, 0.5}; 49 | T res; 50 | 51 | { 52 | cl::sycl::queue& q = create_queue(); 53 | ml::vector_t sycl_vec1(in1.data(), cl::sycl::range<1>(in1.size())); 54 | sycl_vec1.set_final_data(nullptr); 55 | ml::vector_t sycl_vec2(in2.data(), cl::sycl::range<1>(in2.size())); 56 | sycl_vec2.set_final_data(nullptr); 57 | res = ml::sycl_dot_product(q, sycl_vec1, sycl_vec2); 58 | clear_eigen_device(); 59 | } 60 | 61 | /* 62 | for (unsigned i = 0; i < SIZE; ++i) { 63 | std::cout << in1[i] << " "; 64 | } 65 | std::cout << std::endl; 66 | for (unsigned i = 0; i < SIZE; ++i) { 67 | std::cout << in2[i] << " "; 68 | } 69 | std::cout << "\nres=" << res << std::endl; 70 | */ 71 | 72 | assert_almost_eq(res, T(11)); 73 | } 74 | 75 | template 76 | void test_all() { 77 | test_dot_product_self(); 78 | test_dot_product_other(); 79 | } 80 | 81 | int main() { 82 | try { 83 | test_all(); 84 | #ifdef SYCLML_TEST_DOUBLE 85 | test_all(); 86 | #endif 87 | } catch (cl::sycl::exception e) { 88 | std::cerr << e.what(); 89 | } 90 | 91 | return 0; 92 | } 93 | -------------------------------------------------------------------------------- /tests/src/math/test_inv.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include 17 | #include 18 | 19 | #include "ml/math/mat_inv.hpp" 20 | #include "ml/math/tri_inv.hpp" 21 | #include "utils/utils.hpp" 22 | 23 | template 24 | void test_inv() { 25 | std::array host_data{1.0, 4.0, 6.0, 0.0, -1.0, 2.0, 5.0, 3.0, 4.0}; 26 | 27 | std::array host_inv; 28 | { 29 | cl::sycl::queue& q = create_queue(); 30 | ml::matrix_t sycl_data(host_data.data(), cl::sycl::range<2>(3, 3)); 31 | sycl_data.set_final_data(nullptr); 32 | 33 | ml::matrix_t sycl_inv{cl::sycl::range<2>(3, 3)}; 34 | ml::mat_inv(q, sycl_data, sycl_inv); 35 | 36 | sycl_inv.set_final_data(host_inv.data()); 37 | clear_eigen_device(); 38 | } 39 | 40 | /* 41 | std::cout << "data:\n"; 42 | ml::print(host_data, 3, 3); 43 | std::cout << "\ninv:\n"; 44 | ml::print(host_inv, 3, 3); 45 | */ 46 | 47 | std::array expected{-0.166667, 0.0333335, 0.233333, 48 | 0.166667, -0.433333, -0.0333333, 49 | 0.0833333, 0.283333, -0.0166667}; 50 | assert_vec_almost_eq(host_inv, expected); 51 | } 52 | 53 | template 54 | void test_inv_big() { 55 | static constexpr unsigned SIDE = 100; 56 | static constexpr unsigned SIZE = SIDE * SIDE; 57 | static constexpr T MAX = 1E2; 58 | std::array host_data; 59 | srand(time(0)); 60 | std::generate(std::begin(host_data), std::end(host_data), [=]() { 61 | return MAX * ((2 * (static_cast(rand()) / RAND_MAX)) - 1); 62 | }); 63 | 64 | // Make the input matrix diagonally dominant to ensure that it is invertible 65 | for (unsigned r = 0; r < SIDE; ++r) { 66 | T abs_max = host_data[r]; 67 | for (unsigned c = 0; c < SIDE; ++c) { 68 | T abs_rc = std::abs(host_data[r * SIDE + c]); 69 | if (abs_rc > abs_max) { 70 | abs_max = abs_rc; 71 | } 72 | } 73 | auto& x = host_data[r * SIDE + r]; 74 | x = cl::sycl::sign(x) * (std::abs(x) + abs_max); 75 | } 76 | 77 | std::array host_diff; 78 | { 79 | cl::sycl::queue& q = create_queue(); 80 | cl::sycl::range<2> rng(SIDE, SIDE); 81 | ml::matrix_t sycl_data(host_data.data(), rng); 82 | sycl_data.set_final_data(nullptr); 83 | 84 | ml::matrix_t sycl_inv{rng}; 85 | ml::mat_inv(q, sycl_data, sycl_inv); 86 | // ml::write_bmp_grayscale("inv_" + std::to_string(SIDE), sycl_inv, true, 87 | // true); 88 | 89 | ml::matrix_t multiplication{rng}; 90 | ml::mat_mul(q, sycl_data, sycl_inv, multiplication); 91 | // ml::write_bmp_grayscale("inv_multiplication_" + std::to_string(SIDE), 92 | // multiplication, true, true); 93 | 94 | ml::matrix_t identity{rng}; 95 | ml::eye(q, identity); 96 | ml::matrix_t diff{rng}; 97 | ml::sycl_copy(q, identity, diff); 98 | ml::mat_inplace_binary_op(q, diff, multiplication, std::minus()); 99 | // ml::write_bmp_grayscale("inv_diff_" + std::to_string(SIDE), diff, true, 100 | // true); 101 | diff.set_final_data(host_diff.data()); 102 | clear_eigen_device(); 103 | } 104 | 105 | for (unsigned i = 0; i < SIZE; ++i) { 106 | assert_almost_eq(host_diff[i], T(0), T(1E-3)); 107 | } 108 | } 109 | 110 | template 111 | void test_tri_inv() { 112 | std::array host_data{1.0, 2.0, 3.0, 4.0, 0.0, 5.0, 6.0, 7.0, 113 | 0.0, 0.0, 8.0, 9.0, 0.0, 0.0, 0.0, 10.0}; 114 | 115 | std::array host_inv; 116 | { 117 | cl::sycl::queue& q = create_queue(); 118 | ml::matrix_t sycl_data(host_data.data(), cl::sycl::range<2>(4, 4)); 119 | sycl_data.set_final_data(nullptr); 120 | ml::matrix_t sycl_inv{cl::sycl::range<2>(4, 4)}; 121 | ml::tri_inv(q, sycl_data, sycl_inv); 122 | sycl_inv.set_final_data(host_inv.data()); 123 | clear_eigen_device(); 124 | } 125 | 126 | /* 127 | std::cout << "data:\n"; 128 | ml::print(host_data, 4, 4); 129 | std::cout << "\ninv:\n"; 130 | ml::print(host_inv, 4, 4); 131 | */ 132 | 133 | std::array expected{1.0, -0.4, -0.075, -0.0525, 0.0, 0.2, 134 | -0.15, -0.005, 0.0, 0.0, 0.125, -0.1125, 135 | 0.0, 0.0, 0.0, 0.1}; 136 | assert_vec_almost_eq(host_inv, expected); 137 | } 138 | 139 | template 140 | void test_tri_inv_big() { 141 | static constexpr unsigned SIDE = 64; 142 | static constexpr unsigned SIZE = SIDE * SIDE; 143 | std::array host_data; 144 | for (unsigned r = 0; r < SIDE; ++r) { 145 | for (unsigned c = 0; c < SIDE; ++c) { 146 | host_data[r * SIDE + c] = r > c ? 0 : r * SIDE + c + 1; 147 | } 148 | } 149 | 150 | std::array host_diff; 151 | { 152 | cl::sycl::queue& q = create_queue(); 153 | cl::sycl::range<2> rng(SIDE, SIDE); 154 | ml::matrix_t sycl_data(host_data.data(), rng); 155 | sycl_data.set_final_data(nullptr); 156 | 157 | ml::matrix_t sycl_tri_inv{rng}; 158 | ml::tri_inv(q, sycl_data, sycl_tri_inv); 159 | // ml::write_bmp_grayscale("tri_inv_" + std::to_string(SIDE), sycl_tri_inv, 160 | // true, true); 161 | 162 | ml::matrix_t multiplication{rng}; 163 | ml::mat_mul(q, sycl_data, sycl_tri_inv, multiplication); 164 | // ml::write_bmp_grayscale("tri_inv_multiplication_" + std::to_string(SIDE), 165 | // multiplication, true, true); 166 | 167 | ml::matrix_t identity{rng}; 168 | ml::eye(q, identity); 169 | ml::matrix_t diff{rng}; 170 | ml::sycl_copy(q, identity, diff); 171 | ml::mat_inplace_binary_op(q, diff, multiplication, std::minus()); 172 | // ml::write_bmp_grayscale("tri_inv_diff_" + std::to_string(SIDE), diff, 173 | // true, true); 174 | 175 | diff.set_final_data(host_diff.data()); 176 | clear_eigen_device(); 177 | } 178 | 179 | for (unsigned i = 0; i < SIZE; ++i) { 180 | assert_almost_eq(host_diff[i], T(0), T(1E-2)); 181 | } 182 | } 183 | 184 | template 185 | void test_all() { 186 | test_inv(); 187 | // test_inv_big(); 188 | test_tri_inv(); 189 | // test_tri_inv_big(); 190 | } 191 | 192 | int main(void) { 193 | try { 194 | test_all(); 195 | #ifdef SYCLML_TEST_DOUBLE 196 | test_all(); 197 | #endif 198 | } catch (cl::sycl::exception e) { 199 | std::cerr << e.what(); 200 | } 201 | 202 | return 0; 203 | } 204 | -------------------------------------------------------------------------------- /tests/src/math/test_mat_mul.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include 17 | #include 18 | 19 | #include "ml/math/mat_mul.hpp" 20 | #include "utils/utils.hpp" 21 | 22 | template 23 | void test_square() { 24 | std::array m1{1.0, 2.0, 3.0, 4.0}; 25 | std::array m2{-1.0, 1.0, 5.0, -2.0}; 26 | std::array m3; 27 | 28 | { 29 | cl::sycl::queue& q = create_queue(); 30 | ml::matrix_t m1_buffer(m1.data(), cl::sycl::range<2>(2, 2)); 31 | m1_buffer.set_final_data(nullptr); 32 | ml::matrix_t m2_buffer(m2.data(), cl::sycl::range<2>(2, 2)); 33 | m2_buffer.set_final_data(nullptr); 34 | ml::matrix_t out_buffer(cl::sycl::range<2>(2, 2)); 35 | ml::mat_mul(q, m1_buffer, m2_buffer, out_buffer); 36 | out_buffer.set_final_data(m3.data()); 37 | clear_eigen_device(); 38 | } 39 | 40 | /* 41 | std::cout << "m1:\n"; 42 | ml::print(m1, 2, 2); 43 | std::cout << "\nm2:\n"; 44 | ml::print(m2, 2, 2); 45 | std::cout << "\nm3:\n"; 46 | ml::print(m3, 2, 2); 47 | */ 48 | 49 | assert_vec_almost_eq(m3, {9.0, -3.0, 17.0, -5.0}); 50 | } 51 | 52 | template 53 | void test_general() { 54 | std::array m1{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; 55 | std::array m2{-1.0, 5.0, 2.0}; 56 | std::array m3; 57 | 58 | { 59 | cl::sycl::queue& q = create_queue(); 60 | ml::matrix_t m1_buffer(m1.data(), cl::sycl::range<2>(2, 3)); 61 | m1_buffer.set_final_data(nullptr); 62 | ml::matrix_t m2_buffer(m2.data(), cl::sycl::range<2>(3, 1)); 63 | m2_buffer.set_final_data(nullptr); 64 | ml::matrix_t out_buffer(cl::sycl::range<2>(2, 1)); 65 | ml::mat_mul(q, m1_buffer, m2_buffer, out_buffer); 66 | out_buffer.set_final_data(m3.data()); 67 | clear_eigen_device(); 68 | } 69 | 70 | /* 71 | std::cout << "m1:\n"; 72 | ml::print(m1, 2, 3); 73 | std::cout << "\nm2:\n"; 74 | ml::print(m2, 3, 1); 75 | std::cout << "\nm3:\n"; 76 | ml::print(m3, 2, 1); 77 | */ 78 | 79 | assert_vec_almost_eq(m3, {15.0, 33.0}); 80 | } 81 | 82 | template 83 | void test_all() { 84 | test_square(); 85 | test_general(); 86 | } 87 | 88 | int main() { 89 | try { 90 | test_all(); 91 | #ifdef SYCLML_TEST_DOUBLE 92 | test_all(); 93 | #endif 94 | } catch (cl::sycl::exception e) { 95 | std::cerr << e.what(); 96 | } 97 | 98 | return 0; 99 | } 100 | -------------------------------------------------------------------------------- /tests/src/math/test_qr.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include 17 | 18 | #include "ml/math/cov.hpp" 19 | #include "ml/math/mat_mul.hpp" 20 | #include "ml/math/mat_ops.hpp" 21 | #include "ml/math/qr.hpp" 22 | #include "utils/utils.hpp" 23 | 24 | template 25 | void test_small_qr() { 26 | static constexpr unsigned NB_OBS = 5; 27 | static constexpr unsigned DATA_DIM = 3; 28 | std::array host_data{1.0, 4.0, 7.0, 2.0, 0.0, 29 | -8.0, 1.0, 2.0, 1.0, -3.0, 30 | -1.0, -1.0, 0.0, -9.0, 6.0}; 31 | 32 | std::array host_qr; 33 | { 34 | cl::sycl::queue& q = create_queue(); 35 | ml::matrix_t sycl_data(host_data.data(), 36 | cl::sycl::range<2>(NB_OBS, DATA_DIM)); 37 | qr(q, sycl_data); 38 | sycl_data.set_final_data(host_qr.data()); 39 | clear_eigen_device(); 40 | } 41 | 42 | /* 43 | std::cout << "host data:\n"; 44 | ml::print(host_data, NB_OBS, DATA_DIM); 45 | std::cout << "\nhost R:\n"; 46 | ml::print(host_qr, NB_OBS, DATA_DIM); 47 | */ 48 | 49 | // Multiple correct results are possible. Each row can be multiplied by -1. 50 | // In the current implementation all values on the diagonal are positive. 51 | // Only test the upper triangle matrix as the rest can have any value. 52 | assert_almost_eq(host_qr[0], T(3.87298)); 53 | assert_almost_eq(host_qr[1], T(2.32379)); 54 | assert_almost_eq(host_qr[2], T(-1.29099)); 55 | assert_almost_eq(host_qr[4], T(9.82853)); 56 | assert_almost_eq(host_qr[5], T(-2.03489)); 57 | assert_almost_eq(host_qr[8], T(12.04959)); 58 | } 59 | 60 | template 61 | void test_qr_square() { 62 | static constexpr unsigned N = 2; 63 | static constexpr T DET_SIGN = -((N % 2) * 2) + 1; 64 | std::array host_data; 65 | 66 | // Generate a random matrix with determinant 1 67 | fill_random(host_data, T(0.0), T(1.0)); 68 | T det_data = compute_det(host_data); 69 | if (det_data < 0) { 70 | det_data *= -1; 71 | std::transform(begin(host_data), begin(host_data) + N, begin(host_data), 72 | [](T x) { return -x; }); 73 | } 74 | T factor = std::pow(det_data, -T(1.0) / N); 75 | std::transform(begin(host_data), end(host_data), begin(host_data), 76 | [factor](T x) { return factor * x; }); 77 | det_data = compute_det(host_data); 78 | assert_almost_eq(det_data, T(1)); 79 | 80 | std::array host_qr; 81 | T det_r; 82 | { 83 | cl::sycl::queue& q = create_queue(); 84 | ml::matrix_t sycl_data(host_data.data(), cl::sycl::range<2>(N, N)); 85 | ml::qr(q, sycl_data); 86 | det_r = DET_SIGN * reduce_diag(q, sycl_data, 0, T(1), std::multiplies()); 87 | sycl_data.set_final_data(host_qr.data()); 88 | clear_eigen_device(); 89 | } 90 | 91 | /* 92 | std::cout << "host data:\n"; 93 | ml::print(host_data, N, N); 94 | std::cout << "\nhost R:\n"; 95 | ml::print(host_qr, N, N); 96 | std::cout << "\ndeterminant: " << det_r << std::endl; 97 | */ 98 | 99 | assert_almost_eq(det_r, DET_SIGN * host_qr[0] * host_qr[3]); 100 | assert_almost_eq(det_r, det_data); 101 | } 102 | 103 | class MLNormalizeR; 104 | template 105 | void test_qr() { 106 | static constexpr unsigned NB_OBS = 103; 107 | static constexpr unsigned DATA_DIM = 64; 108 | std::array host_data; 109 | fill_random(host_data, T(-10), T(10)); 110 | 111 | std::array host_cov; 112 | std::array host_r2; 113 | { 114 | cl::sycl::queue& q = create_queue(); 115 | ml::matrix_t sycl_data(host_data.data(), 116 | cl::sycl::range<2>(NB_OBS, DATA_DIM)); 117 | sycl_data.set_final_data(nullptr); 118 | 119 | // Center data 120 | ml::vector_t sycl_data_avg((cl::sycl::range<1>(DATA_DIM))); 121 | ml::avg(q, sycl_data, sycl_data_avg); 122 | ml::center_data(q, sycl_data, sycl_data_avg); 123 | 124 | // Expected cov 125 | ml::matrix_t sycl_cov(cl::sycl::range<2>(DATA_DIM, DATA_DIM)); 126 | ml::cov(q, sycl_data, sycl_cov); 127 | 128 | // QR 129 | ml::qr(q, sycl_data); 130 | ml::matrix_t sycl_r(cl::sycl::range<2>(DATA_DIM, DATA_DIM)); 131 | q.submit([&sycl_data, &sycl_r](cl::sycl::handler& cgh) { 132 | auto old_r_acc = 133 | sycl_data.template get_access_2d(cgh); 134 | auto new_r_acc = 135 | sycl_r.template get_access_2d( 136 | cgh); 137 | cgh.parallel_for>( 138 | sycl_r.get_nd_range(), [=](cl::sycl::nd_item<2> item) { 139 | auto row = item.get_global_id(0); 140 | auto col = item.get_global_id(1); 141 | new_r_acc(row, col) = 142 | col >= row ? old_r_acc(row, col) / cl::sycl::sqrt(T(NB_OBS)) 143 | : 0; 144 | }); 145 | }); 146 | 147 | // Reconstructed cov 148 | ml::matrix_t sycl_r2(cl::sycl::range<2>(DATA_DIM, DATA_DIM)); 149 | ml::mat_mul(q, sycl_r, sycl_r, sycl_r2); 150 | 151 | sycl_cov.set_final_data(host_cov.data()); 152 | sycl_r2.set_final_data(host_r2.data()); 153 | clear_eigen_device(); 154 | } 155 | 156 | assert_vec_almost_eq(host_r2, host_cov, T(1E-3)); 157 | } 158 | 159 | template 160 | void test_all() { 161 | test_small_qr(); 162 | test_qr_square(); 163 | test_qr(); 164 | } 165 | 166 | int main(void) { 167 | try { 168 | test_all(); 169 | #ifdef SYCLML_TEST_DOUBLE 170 | test_all(); 171 | #endif 172 | } catch (cl::sycl::exception e) { 173 | std::cerr << e.what(); 174 | } 175 | 176 | return 0; 177 | } 178 | -------------------------------------------------------------------------------- /tests/src/math/test_svd.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include 17 | 18 | #include "ml/math/mat_mul.hpp" 19 | #include "ml/math/mat_ops.hpp" 20 | #include "ml/math/svd.hpp" 21 | #include "utils/utils.hpp" 22 | 23 | template 24 | void test_svd_general() { 25 | constexpr auto NB_OBS = 4LU; 26 | constexpr auto ACT_SIZE_OBS = NB_OBS; 27 | constexpr auto SIZE_OBS_POW2 = ACT_SIZE_OBS; 28 | std::array host_data{ 29 | 1.0, 2.0, 0.0, -3.0, 2.0, -5.0, 2.0, 1.0, 30 | 0.0, 2.0, -1.0, -1.0, -3.0, 1.0, -1.0, 3.0}; 31 | 32 | constexpr auto NB_VEC = ACT_SIZE_OBS; 33 | 34 | std::array host_V; 35 | std::array host_L; 36 | std::array host_U; 37 | std::array host_residual; 38 | std::array host_data_svd; 39 | std::array host_centered_data; 40 | { 41 | cl::sycl::queue& q = create_queue(); 42 | ml::matrix_t sycl_data(host_data.data(), 43 | cl::sycl::range<2>(NB_OBS, SIZE_OBS_POW2)); 44 | sycl_data.data_range = cl::sycl::range<2>(NB_OBS, ACT_SIZE_OBS); 45 | ml::vector_t sycl_data_avg(cl::sycl::range<1>(ACT_SIZE_OBS), 46 | ml::get_optimal_nd_range(SIZE_OBS_POW2)); 47 | 48 | ml::avg(q, sycl_data, sycl_data_avg); 49 | ml::center_data()>(q, sycl_data, sycl_data_avg); 50 | ml::matrix_t sycl_centered_data(sycl_data.data_range, 51 | sycl_data.kernel_range); 52 | ml::sycl_copy(q, sycl_data, sycl_centered_data); 53 | 54 | auto VLU = ml::svd(q, sycl_data); 55 | auto& sycl_U = VLU.U; 56 | auto& sycl_V = VLU.V; 57 | auto& vec_L = VLU.L; 58 | ml::assert_rng_eq({NB_OBS, NB_VEC}, sycl_U.data_range); 59 | ml::assert_eq(NB_VEC, vec_L.size()); 60 | ml::assert_rng_eq({NB_VEC, ACT_SIZE_OBS}, sycl_V.data_range); 61 | 62 | std::copy(std::begin(vec_L), std::end(vec_L), std::begin(host_L)); 63 | ml::vector_t sycl_L(host_L.data(), cl::sycl::range<1>(host_L.size())); 64 | sycl_L.set_final_data(nullptr); 65 | 66 | ml::matrix_t sycl_data_svd(sycl_data.data_range, sycl_data.kernel_range); 67 | ml::matrix_t sycl_copy_V(sycl_V.data_range, sycl_V.kernel_range); 68 | ml::sycl_copy(q, sycl_V, sycl_copy_V); 69 | ml::mat_vec_apply_op(q, sycl_copy_V, sycl_L, 70 | std::multiplies()); // diag(L) * V 71 | ml::mat_mul(q, sycl_U, sycl_copy_V, sycl_data_svd); 72 | ml::mat_inplace_binary_op(q, sycl_data_svd, sycl_data, 73 | std::plus()); // Add residual 74 | 75 | sycl_data.set_final_data(host_residual.data()); 76 | sycl_centered_data.set_final_data(host_centered_data.data()); 77 | sycl_data_svd.set_final_data(host_data_svd.data()); 78 | sycl_U.set_final_data(host_U.data()); 79 | sycl_V.set_final_data(host_V.data()); 80 | clear_eigen_device(); 81 | } 82 | 83 | /* 84 | std::cout << "host data:\n"; 85 | ml::print(host_data, NB_OBS, SIZE_OBS_POW2); 86 | std::cout << "\nU:\n"; 87 | ml::print(host_U, NB_VEC, SIZE_OBS_POW2); 88 | std::cout << "\nL:\n"; 89 | ml::print(host_L, 1, NB_VEC); 90 | std::cout << "\nV:\n"; 91 | ml::print(host_V, NB_OBS, NB_VEC); 92 | std::cout << "\nR:\n"; 93 | ml::print(host_residual, NB_OBS, SIZE_OBS_POW2); 94 | std::cout << "\ndata svd:\n"; 95 | ml::print(host_data_svd, NB_OBS, SIZE_OBS_POW2); 96 | */ 97 | 98 | assert_vec_almost_eq(host_centered_data, host_data_svd); 99 | for (unsigned i = 0; i < NB_OBS * SIZE_OBS_POW2; ++i) { 100 | assert_almost_eq(host_residual[i], T(0)); 101 | } 102 | } 103 | 104 | int main(void) { 105 | try { 106 | test_svd_general(); 107 | #ifdef SYCLML_TEST_DOUBLE 108 | test_svd_general(); 109 | #endif 110 | } catch (cl::sycl::exception e) { 111 | std::cerr << e.what(); 112 | } 113 | 114 | return 0; 115 | } 116 | -------------------------------------------------------------------------------- /tests/src/math/test_tr_op.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include 17 | #include 18 | 19 | #include "ml/math/mat_ops.hpp" 20 | #include "utils/utils.hpp" 21 | 22 | template 23 | void test_lin_tr_inplace_mat_op() { 24 | static constexpr size_t M = 10; 25 | static constexpr size_t N = 2; 26 | 27 | std::array m1; 28 | std::array expected_m1; 29 | std::array m2; 30 | 31 | srand(time(0)); 32 | static constexpr T MAX = 1E2; 33 | fill_random(m1, -MAX, MAX); 34 | 35 | // Expect first column unchanged, second multiplied by 2 36 | for (size_t i = 0; i < M; ++i) { 37 | for (size_t j = 0; j < N; ++j) { 38 | m2[j * M + i] = j + 1; 39 | expected_m1[i * N + j] = m1[i * N + j] * m2[j * M + i]; 40 | } 41 | } 42 | 43 | { 44 | cl::sycl::queue& q = create_queue(); 45 | ml::matrix_t m1_buffer(m1.data(), cl::sycl::range<2>(M, N)); 46 | ml::matrix_t m2_buffer(m2.data(), cl::sycl::range<2>(N, M)); // TR 47 | m2_buffer.set_final_data(nullptr); 48 | 49 | ml::mat_inplace_binary_op(q, m1_buffer, m2_buffer, 50 | std::multiplies()); 51 | clear_eigen_device(); 52 | } 53 | 54 | assert_vec_almost_eq(m1, expected_m1); 55 | } 56 | 57 | template 58 | void test_tr_lin_inplace_mat_op() { 59 | static constexpr size_t M = 2; 60 | static constexpr size_t N = 10; 61 | 62 | std::array m1; 63 | std::array expected_m1; 64 | std::array m2; 65 | 66 | srand(time(0)); 67 | static constexpr T MAX = 1E2; 68 | fill_random(m1, -MAX, MAX); 69 | 70 | // Expect first column unchanged, second multiplied by 2 71 | for (size_t i = 0; i < M; ++i) { 72 | for (size_t j = 0; j < N; ++j) { 73 | m2[j * M + i] = i + 1; 74 | expected_m1[i * N + j] = m1[i * N + j] * m2[j * M + i]; 75 | } 76 | } 77 | 78 | { 79 | cl::sycl::queue& q = create_queue(); 80 | ml::matrix_t m1_buffer(m1.data(), cl::sycl::range<2>(M, N)); // TR 81 | ml::matrix_t m2_buffer(m2.data(), cl::sycl::range<2>(N, M)); 82 | m2_buffer.set_final_data(nullptr); 83 | 84 | ml::mat_inplace_binary_op(q, m1_buffer, m2_buffer, 85 | std::multiplies()); 86 | clear_eigen_device(); 87 | } 88 | 89 | assert_vec_almost_eq(m1, expected_m1); 90 | } 91 | 92 | template 93 | void test_all() { 94 | test_lin_tr_inplace_mat_op(); 95 | test_tr_lin_inplace_mat_op(); 96 | } 97 | 98 | int main() { 99 | try { 100 | test_all(); 101 | #ifdef SYCLML_TEST_DOUBLE 102 | test_all(); 103 | #endif 104 | } catch (cl::sycl::exception e) { 105 | std::cerr << e.what(); 106 | } 107 | 108 | return 0; 109 | } 110 | -------------------------------------------------------------------------------- /tests/src/math/test_tri_solve.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include 17 | #include 18 | 19 | #include "ml/math/tri_solve.hpp" 20 | #include "utils/utils.hpp" 21 | 22 | template 23 | void test_tri_solve() { 24 | std::array host_A{1.0, 2.0, 3.0, 0.0, 4.0, 5.0, 0.0, 0.0, 6.0}; 25 | std::array host_B{9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 26 | 3.0, 2.0, 1.0, 0.0, 1.0, 0.0}; 27 | 28 | std::array host_Y; 29 | std::array host_X; 30 | 31 | { 32 | cl::sycl::queue& q = create_queue(); 33 | ml::matrix_t sycl_A(host_A.data(), cl::sycl::range<2>(3, 3)); 34 | sycl_A.set_final_data(nullptr); 35 | ml::matrix_t sycl_B(host_B.data(), cl::sycl::range<2>(3, 4)); 36 | sycl_B.set_final_data(nullptr); 37 | 38 | ml::matrix_t sycl_Y{cl::sycl::range<2>(3, 4)}; 39 | ml::matrix_t sycl_X{cl::sycl::range<2>(3, 4)}; 40 | 41 | ml::tri_solve(q, sycl_Y, sycl_A, sycl_B); 42 | ml::tri_solve(q, sycl_X, sycl_A, sycl_Y); 43 | 44 | sycl_Y.set_final_data(host_Y.data()); 45 | sycl_X.set_final_data(host_X.data()); 46 | clear_eigen_device(); 47 | } 48 | 49 | /* 50 | std::cout << "Y:\n"; 51 | ml::print(host_Y, 3, 4); 52 | std::cout << "\nX:\n"; 53 | ml::print(host_X, 3, 4); 54 | */ 55 | 56 | std::array expected_Y{9.0, 8.0, 7.0, 6.0, -3.25, -3.0, 57 | -2.75, -2.5, -1.625, -1.5, -1.04167, -0.91667}; 58 | std::array expected_X{10.76042, 9.62500, 8.46181, 7.32639, 59 | -0.47396, -0.43750, -0.47049, -0.43403, 60 | -0.27083, -0.25000, -0.17361, -0.15278}; 61 | 62 | assert_vec_almost_eq(host_Y, expected_Y); 63 | assert_vec_almost_eq(host_X, expected_X); 64 | } 65 | 66 | template 67 | void test_tri_solve_tr() { 68 | std::array host_A{1.0, 2.0, 3.0, 0.0, 4.0, 5.0, 0.0, 0.0, 6.0}; 69 | std::array host_B{9.0, 5.0, 1.0, 8.0, 4.0, 0.0, 70 | 7.0, 3.0, 1.0, 6.0, 2.0, 0.0}; 71 | 72 | std::array host_Y; 73 | std::array host_X; 74 | 75 | { 76 | cl::sycl::queue& q = create_queue(); 77 | ml::matrix_t sycl_A(host_A.data(), cl::sycl::range<2>(3, 3)); 78 | sycl_A.set_final_data(nullptr); 79 | ml::matrix_t sycl_B(host_B.data(), cl::sycl::range<2>(4, 3)); 80 | sycl_B.set_final_data(nullptr); 81 | 82 | ml::matrix_t sycl_Y{cl::sycl::range<2>(4, 3)}; 83 | ml::matrix_t sycl_X{cl::sycl::range<2>(4, 3)}; 84 | 85 | ml::tri_solve(q, sycl_Y, sycl_A, sycl_B); 86 | ml::tri_solve(q, sycl_X, sycl_A, sycl_Y); 87 | 88 | sycl_Y.set_final_data(host_Y.data()); 89 | sycl_X.set_final_data(host_X.data()); 90 | clear_eigen_device(); 91 | } 92 | 93 | /* 94 | std::cout << "Y:\n"; 95 | ml::print(host_Y, 4, 3); 96 | std::cout << "\nX:\n"; 97 | ml::print(host_X, 4, 3); 98 | */ 99 | 100 | std::array expected_Y{9.0, -3.25, -1.625, 8.0, -3.0, -1.5, 101 | 7.0, -2.75, -1.04167, 6.0, -2.5, -0.91667}; 102 | std::array expected_X{10.76042, -0.47396, -0.27083, 9.62500, 103 | -0.43750, -0.25000, 8.46181, -0.47049, 104 | -0.17361, 7.32639, -0.43403, -0.15278}; 105 | 106 | assert_vec_almost_eq(host_Y, expected_Y); 107 | assert_vec_almost_eq(host_X, expected_X); 108 | } 109 | 110 | template 111 | void test_all() { 112 | test_tri_solve(); 113 | test_tri_solve_tr(); 114 | } 115 | 116 | int main(void) { 117 | try { 118 | test_all(); 119 | #ifdef SYCLML_TEST_DOUBLE 120 | test_all(); 121 | #endif 122 | } catch (cl::sycl::exception e) { 123 | std::cerr << e.what(); 124 | } 125 | 126 | return 0; 127 | } 128 | -------------------------------------------------------------------------------- /tests/src/svm/test_arg_extremum_cond.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include 17 | 18 | #include "ml/classifiers/svm/svm.hpp" 19 | #include "utils/utils.hpp" 20 | 21 | template 22 | void test_argmin_cond() { 23 | constexpr auto NB_ELT = 128LU; 24 | constexpr auto EXPECTED_MIN_IDX = 13LU; 25 | constexpr auto TRUE_MIN_IDX = 15LU; 26 | std::array host_data; 27 | fill_random(host_data, 0, 100); 28 | host_data[EXPECTED_MIN_IDX] = -1; 29 | host_data[TRUE_MIN_IDX] = -2; 30 | 31 | unsigned long min_idx; 32 | { 33 | cl::sycl::queue& q = create_queue(); 34 | ml::vector_t sycl_data(host_data.data(), cl::sycl::range<1>(NB_ELT)); 35 | ml::vector_t sycl_cond((cl::sycl::range<1>(NB_ELT))); 36 | 37 | ml::sycl_memset(q, sycl_cond, T(true)); 38 | // Ignore this index so it should not be returned 39 | sycl_cond.write_from_host(TRUE_MIN_IDX, false); 40 | 41 | { 42 | ml::vector_t device_scalar(ml::range<1>(1)); 43 | auto eig_scalar = ml::sycl_to_eigen<1, 0>(device_scalar); 44 | bool found = 45 | ml::detail::argmin_cond(q, sycl_cond, sycl_data, eig_scalar, min_idx); 46 | assert(found); 47 | } 48 | 49 | sycl_data.set_final_data(nullptr); 50 | clear_eigen_device(); 51 | } 52 | 53 | assert_eq(min_idx, EXPECTED_MIN_IDX); 54 | } 55 | 56 | int main() { 57 | try { 58 | test_argmin_cond(); 59 | #ifdef SYCLML_TEST_DOUBLE 60 | test_argmin_cond(); 61 | #endif 62 | } catch (cl::sycl::exception e) { 63 | std::cerr << e.what(); 64 | } 65 | 66 | return 0; 67 | } 68 | -------------------------------------------------------------------------------- /tests/src/svm/test_svm_linear.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include 17 | 18 | #include "ml/classifiers/svm/svm.hpp" 19 | #include "utils/utils.hpp" 20 | 21 | template 22 | void test_svm_linear() { 23 | /* 24 | * Solves the OR problem, kernel can be linear. 25 | * y 0 1 26 | * x 27 | * 0 0 1 28 | * 1 1 1 29 | */ 30 | std::array host_data{0, 0, 0, 1, 1, 0, 1, 1}; 31 | std::vector host_labels{0, 1, 1, 1}; 32 | std::vector host_alphas; 33 | DataT host_rho; 34 | 35 | { 36 | cl::sycl::queue& q = create_queue(); 37 | ml::matrix_t sycl_data(host_data.data(), cl::sycl::range<2>(4, 2)); 38 | 39 | ml::svm, LabelT> svm(10); 40 | svm.train_binary(q, sycl_data, host_labels); 41 | 42 | auto smo_out = svm.get_smo_outs().front(); 43 | assert_eq(smo_out.alphas.data_range[0], 3LU); 44 | host_alphas.resize(smo_out.alphas.get_kernel_size()); 45 | auto event = 46 | ml::sycl_copy_device_to_host(q, smo_out.alphas, host_alphas.data()); 47 | event.wait_and_throw(); 48 | host_rho = smo_out.rho; 49 | 50 | sycl_data.set_final_data(nullptr); 51 | clear_eigen_device(); 52 | } 53 | 54 | /* 55 | std::cout << "alphas:\n"; 56 | ml::print(host_alphas.data(), 1, 3); 57 | std::cout << "\nrho: " << host_rho << std::endl; 58 | */ 59 | 60 | std::array expected_alphas{-4, 2, 2}; 61 | assert_vec_almost_eq(host_alphas.data(), expected_alphas.data(), 62 | expected_alphas.size()); 63 | assert_almost_eq(host_rho, DataT(-1)); 64 | } 65 | 66 | int main() { 67 | try { 68 | test_svm_linear(); 69 | #ifdef SYCLML_TEST_DOUBLE 70 | test_svm_linear(); 71 | #endif 72 | } catch (cl::sycl::exception e) { 73 | std::cerr << e.what(); 74 | } 75 | 76 | return 0; 77 | } 78 | -------------------------------------------------------------------------------- /tests/src/svm/test_svm_poly.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include 17 | 18 | #include "ml/classifiers/svm/svm.hpp" 19 | #include "utils/utils.hpp" 20 | 21 | template 22 | void test_svm_poly() { 23 | /* 24 | * Solves the XOR problem, kernel has to be at least polynomial or more 25 | * complex. y 0 1 26 | * x 27 | * 0 0 1 28 | * 1 1 0 29 | */ 30 | std::array host_data{0, 0, 0, 1, 1, 0, 1, 1}; 31 | std::vector host_labels{0, 1, 1, 0}; 32 | std::vector host_alphas; 33 | DataT host_rho; 34 | 35 | { 36 | cl::sycl::queue& q = create_queue(); 37 | ml::matrix_t sycl_data(host_data.data(), cl::sycl::range<2>(4, 2)); 38 | 39 | using KernelType = ml::svm_polynomial_kernel; 40 | ml::svm svm(1000, KernelType(1, 1, 2), 2, 1E-6); 41 | svm.train_binary(q, sycl_data, host_labels); 42 | 43 | auto smo_out = svm.get_smo_outs().front(); 44 | assert_eq(smo_out.alphas.data_range[0], 4LU); 45 | host_alphas.resize(smo_out.alphas.get_kernel_size()); 46 | auto event = 47 | ml::sycl_copy_device_to_host(q, smo_out.alphas, host_alphas.data()); 48 | event.wait_and_throw(); 49 | host_rho = smo_out.rho; 50 | 51 | sycl_data.set_final_data(nullptr); 52 | clear_eigen_device(); 53 | } 54 | 55 | /* 56 | std::cout << "alphas:\n"; 57 | ml::print(host_alphas.data(), 1, 4); 58 | std::cout << "\nrho: " << host_rho << std::endl; 59 | */ 60 | 61 | std::array expected_alphas{-3.332425, 2.665940, 2.665940, 62 | -1.999455}; 63 | assert_vec_almost_eq(host_alphas.data(), expected_alphas.data(), 64 | expected_alphas.size(), DataT(1E-3)); 65 | assert_almost_eq(host_rho, DataT(-0.999728), DataT(1E-3)); 66 | } 67 | 68 | int main() { 69 | try { 70 | test_svm_poly(); 71 | #ifdef SYCLML_TEST_DOUBLE 72 | test_svm_poly(); 73 | #endif 74 | } catch (cl::sycl::exception e) { 75 | std::cerr << e.what(); 76 | } 77 | 78 | return 0; 79 | } 80 | -------------------------------------------------------------------------------- /tests/src/utils/assert_utils.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef TEST_SRC_UTILS_ASSERT_UTILS_HPP 17 | #define TEST_SRC_UTILS_ASSERT_UTILS_HPP 18 | 19 | #include 20 | #include 21 | #include 22 | 23 | #undef NDEBUG 24 | #include 25 | 26 | #define EPS 1E-5 27 | 28 | template 29 | void assert_eq(T actual, T expected) { 30 | if (actual != expected) { 31 | std::cerr << "Error: got " << actual << " expected " << expected 32 | << std::endl; 33 | assert(false); 34 | } 35 | } 36 | 37 | template 38 | void assert_almost_eq(T actual, T expected, const T eps = EPS) { 39 | if (std::fabs(actual - expected) > eps) { 40 | std::cerr << "Error: got " << actual << " expected " << expected 41 | << std::endl; 42 | assert(false); 43 | } 44 | } 45 | 46 | template 47 | void assert_vec_almost_eq(const T* actual, const T* expected, size_t size, 48 | const T eps = EPS) { 49 | for (size_t i = 0; i < size; ++i) { 50 | assert_almost_eq(actual[i], expected[i], eps); 51 | } 52 | } 53 | 54 | template 55 | void assert_vec_almost_eq(const std::array& actual, 56 | const std::array& expected, 57 | const T eps = EPS) { 58 | assert_vec_almost_eq(actual.data(), expected.data(), DIM, eps); 59 | } 60 | 61 | template 62 | void assert_vector_almost_eq_no_direction(const T* actual, const T* expected, 63 | const T eps = EPS) { 64 | T norm_pos = 0; 65 | T norm_neg = 0; 66 | for (unsigned i = 0; i < DIM; ++i) { 67 | T diff = actual[i] - expected[i]; 68 | T sum = actual[i] + expected[i]; 69 | norm_pos += diff * diff; 70 | norm_neg += sum * sum; 71 | } 72 | T norm = std::min(norm_neg, norm_pos); 73 | assert_almost_eq(norm, 0.0f, eps); 74 | } 75 | 76 | #endif // TEST_SRC_UTILS_ASSERT_UTILS_HPP 77 | -------------------------------------------------------------------------------- /tests/src/utils/sycl_utils.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef TEST_SRC_UTILS_SYCL_UTILS_HPP 17 | #define TEST_SRC_UTILS_SYCL_UTILS_HPP 18 | 19 | #include "ml/utils/common.hpp" 20 | 21 | class init_first_kernel; 22 | 23 | /** 24 | * @brief Used to avoid measuring OpenCL initialization overhead 25 | * @param q 26 | */ 27 | void launch_first_kernel(cl::sycl::queue& q) { 28 | q.submit([](cl::sycl::handler& cgh) { 29 | cgh.single_task([]() {}); 30 | }); 31 | } 32 | 33 | /** 34 | * @brief Initialize device_constants and return the queue. 35 | * @return the sycl queue 36 | */ 37 | cl::sycl::queue& create_queue() { 38 | ml::device_constants::instance = new ml::device_constants(); 39 | auto& q = ml::get_eigen_device().sycl_queue(); 40 | launch_first_kernel(q); 41 | return q; 42 | } 43 | 44 | /** 45 | * @brief Free the singleton device_constants. 46 | */ 47 | void clear_eigen_device() { 48 | ml::get_eigen_device().synchronize(); 49 | delete ml::get_device_constants(); 50 | } 51 | 52 | #endif // TEST_SRC_UTILS_SYCL_UTILS_HPP 53 | -------------------------------------------------------------------------------- /tests/src/utils/test_save_load.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include 17 | 18 | #include "ml/utils/buffer_t.hpp" 19 | #include "utils/utils.hpp" 20 | 21 | template 22 | void test_save_load_host() { 23 | constexpr size_t SIZE = 4; 24 | std::array buf{-1, 0, -1.5, 0.5}; 25 | std::array res; 26 | 27 | ml::save_array(buf.data(), SIZE, "test_buf"); 28 | ml::load_array(res.data(), SIZE, "test_buf"); 29 | 30 | /* 31 | std::cout << "Saved: "; 32 | ml::print(buf, 1, SIZE); 33 | std::cout << "Loaded: "; 34 | ml::print(res, 1, SIZE); 35 | */ 36 | 37 | assert_vec_almost_eq(res, buf); 38 | } 39 | 40 | template 41 | void test_save_load_device() { 42 | constexpr size_t SIZE = 6; 43 | std::array buf{-10, 0, -1.5, 3, 3, 1}; 44 | std::array res; 45 | 46 | { 47 | cl::sycl::queue& q = create_queue(); 48 | { 49 | ml::matrix_t sycl_buf(const_cast(buf.data()), 50 | cl::sycl::range<2>(2, 3)); 51 | ml::save_array(q, sycl_buf, "test_buf"); 52 | } 53 | ml::matrix_t sycl_res(cl::sycl::range<2>(2, 3)); 54 | ml::load_array(q, sycl_res, "test_buf"); 55 | 56 | sycl_res.set_final_data(res.data()); 57 | clear_eigen_device(); 58 | } 59 | 60 | /* 61 | std::cout << "Saved: "; 62 | ml::print(buf, 1, SIZE); 63 | std::cout << "Loaded: "; 64 | ml::print(res, 1, SIZE); 65 | */ 66 | 67 | assert_vec_almost_eq(res, buf); 68 | } 69 | 70 | template 71 | void test_all() { 72 | test_save_load_host(); 73 | test_save_load_device(); 74 | } 75 | 76 | int main() { 77 | try { 78 | test_all(); 79 | #ifdef SYCLML_TEST_DOUBLE 80 | test_all(); 81 | #endif 82 | } catch (cl::sycl::exception e) { 83 | std::cerr << e.what(); 84 | } 85 | 86 | return 0; 87 | } 88 | -------------------------------------------------------------------------------- /tests/src/utils/utils.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Codeplay Software Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef TEST_SRC_UTILS_UTILS_HPP 17 | #define TEST_SRC_UTILS_UTILS_HPP 18 | 19 | #include 20 | 21 | #include "assert_utils.hpp" 22 | #include "sycl_utils.hpp" 23 | 24 | template 25 | void fill_random(Array& a, T min, T max) { 26 | std::generate(begin(a), end(a), [=]() { 27 | return (max - min) * (static_cast(rand()) / RAND_MAX) + min; 28 | }); 29 | } 30 | 31 | template 32 | T compute_det(const std::array& d) { 33 | return d[0] * d[3] - d[2] * d[1]; 34 | } 35 | 36 | #endif // TEST_SRC_UTILS_UTILS_HPP 37 | --------------------------------------------------------------------------------